001 /** 002 * DetailedHoldOutEvaluator.java 003 * jCOLIBRI2 framework. 004 * @author Juan A. Recio-García. 005 * @author Lisa Cummins. 006 * GAIA - Group for Artificial Intelligence Applications 007 * http://gaia.fdi.ucm.es 008 * 23/07/2007 009 */ 010 package jcolibri.extensions.maintenance_evaluation.evaluators; 011 012 import java.util.ArrayList; 013 import java.util.Collection; 014 import java.util.Date; 015 import java.util.List; 016 017 import jcolibri.cbrcore.CBRCase; 018 import jcolibri.cbrcore.CBRCaseBase; 019 import jcolibri.extensions.maintenance_evaluation.MaintenanceEvaluator; 020 021 import org.apache.commons.logging.LogFactory; 022 023 /** 024 * This evaluation splits the case base in two sets: a training set and a test set. 025 * The training set is used as a case-base. It is maintained and then the cases in the 026 * test set are used as queries to evaluate the training set. 027 * This process is performed serveral times. 028 * 029 * @author Lisa Cummins. 030 * @author Juan A. Recio García - GAIA http://gaia.fdi.ucm.es 031 */ 032 public class MaintenanceHoldOutEvaluator extends MaintenanceEvaluator 033 { 034 /** 035 * Performs the Hold-Out evaluation. 036 * @param testPercent percentage of the case base used as queries. 037 * The case base is split randomly in each repetition. 038 * @param repetitions number of repetitions. 039 */ 040 public void HoldOut(int testPercent, int repetitions) { 041 try { 042 // Obtain the time 043 long t = (new Date()).getTime(); 044 int numberOfCycles = 0; 045 046 // Run the precycle to load the case base 047 LogFactory.getLog(this.getClass()).info("Running precycle()"); 048 CBRCaseBase caseBase = app.preCycle(); 049 050 if (!(caseBase instanceof jcolibri.casebase.CachedLinealCaseBase)) 051 LogFactory 052 .getLog(this.getClass()) 053 .warn( 054 "Evaluation should be executed using a cached case base"); 055 056 ArrayList<CBRCase> originalCases = new ArrayList<CBRCase>( 057 caseBase.getCases()); 058 059 int totalSteps = ((originalCases.size() * testPercent) / 100); 060 totalSteps = totalSteps*repetitions; 061 jcolibri.util.ProgressController.init(getClass(),"Hold Out Evaluation", totalSteps); 062 063 // For each repetition 064 for (int rep = 0; rep < repetitions; rep++) 065 { ArrayList<CBRCase> querySet = new ArrayList<CBRCase>(); 066 prepareCases(originalCases, querySet, testPercent, caseBase); 067 068 // Run cycle for each case in querySet 069 for (CBRCase c : querySet) 070 { // Run the cycle 071 LogFactory.getLog(this.getClass()).info( 072 "Running cycle() " + numberOfCycles); 073 074 app.cycle(c); 075 076 jcolibri.util.ProgressController.step(getClass()); 077 numberOfCycles++; 078 } 079 } 080 081 jcolibri.util.ProgressController.finish(getClass()); 082 083 //Revert case base to original state 084 caseBase.forgetCases(originalCases); 085 caseBase.learnCases(originalCases); 086 087 // Run the poscycle to finish the application 088 LogFactory.getLog(this.getClass()).info("Running postcycle()"); 089 app.postCycle(); 090 091 t = (new Date()).getTime() - t; 092 093 // Obtain and complete the evaluation result 094 report.setTotalTime(t); 095 report.setNumberOfCycles(numberOfCycles); 096 097 } catch (Exception e) { 098 System.out.println(e); 099 } 100 } 101 102 /** 103 * Prepares the cases for evaluation by setting up test and training sets 104 * @param originalCases Complete original set of cases 105 * @param querySet Where queries are to be stored 106 * @param testPercent Percentage of cases used as queries 107 * @param caseBase The case base 108 */ 109 protected void prepareCases(Collection<CBRCase> originalCases, List<CBRCase> querySet, 110 int testPercent, CBRCaseBase caseBase) 111 { 112 ArrayList<CBRCase> caseBaseSet = new ArrayList<CBRCase>(); 113 114 // Split the case base 115 splitCaseBase(originalCases, querySet, caseBaseSet, testPercent); 116 117 // Clear the caseBase 118 caseBase.forgetCases(originalCases); 119 120 // Set the cases that acts as case base in this repetition 121 caseBase.learnCases(caseBaseSet); 122 123 if(this.simConfig != null && this.editMethod != null) 124 { // Perform maintenance on this case base 125 editCaseBase(caseBase); 126 } 127 } 128 129 /** 130 * Splits the case base in two sets: queries and case base 131 * 132 * @param wholeCaseBase 133 * Complete original case base 134 * @param querySet 135 * Output param where queries are stored 136 * @param casebaseSet 137 * Output param where case base is stored 138 * @param testPercent 139 * Percentage of cases used as queries 140 */ 141 public static void splitCaseBase(Collection<CBRCase> wholeCaseBase, 142 List<CBRCase> querySet, List<CBRCase> casebaseSet, int testPercent) 143 { querySet.clear(); 144 casebaseSet.clear(); 145 146 int querySetSize = (wholeCaseBase.size() * testPercent) / 100; 147 casebaseSet.addAll(wholeCaseBase); 148 149 for (int i = 0; i < querySetSize; i++) 150 { int random = (int) (Math.random() * casebaseSet.size()); 151 CBRCase _case = casebaseSet.get(random); 152 casebaseSet.remove(random); 153 querySet.add(_case); 154 } 155 } 156 }