001 /** 002 * SameSplitEvaluator.java 003 * jCOLIBRI2 framework. 004 * @author Juan A. Recio-García. 005 * GAIA - Group for Artificial Intelligence Applications 006 * http://gaia.fdi.ucm.es 007 * 07/05/2007 008 */ 009 package jcolibri.evaluation.evaluators; 010 011 import java.io.BufferedReader; 012 import java.io.BufferedWriter; 013 import java.io.FileReader; 014 import java.io.FileWriter; 015 import java.util.ArrayList; 016 import java.util.Collection; 017 import java.util.Date; 018 import java.util.Iterator; 019 import java.util.List; 020 021 import jcolibri.cbraplications.StandardCBRApplication; 022 import jcolibri.cbrcore.CBRCase; 023 import jcolibri.cbrcore.CBRCaseBase; 024 import jcolibri.evaluation.EvaluationReport; 025 import jcolibri.evaluation.Evaluator; 026 import jcolibri.exception.ExecutionException; 027 import jcolibri.util.FileIO; 028 029 import org.apache.commons.logging.LogFactory; 030 031 /** 032 * This method splits the case base in two sets: one used for testing where each 033 * case is used as query, and another that acts as normal case base. 034 * <br> 035 * This method is different of the other evaluators beacuse the split is stored in a file that can be used in following evaluations. 036 * This way, the same set is used as queries for each evaluation. 037 * <br> 038 * The generateSplit() method does the initial random split and saves the query set in a file. 039 * Later, the HoldOutfromFile() method uses that file to load the queries set and perform the evaluation. 040 * 041 * @author Juan A. Recio García & Lisa Cummins 042 * @version 2.0 043 */ 044 045 public class SameSplitEvaluator extends Evaluator { 046 047 protected StandardCBRApplication app; 048 049 public void init(StandardCBRApplication cbrApp) { 050 051 report = new EvaluationReport(); 052 app = cbrApp; 053 try { 054 app.configure(); 055 } catch (ExecutionException e) { 056 LogFactory.getLog(this.getClass()).error(e); 057 } 058 } 059 060 public void generateSplit(int testPercent, String filename) 061 { 062 //Run the precycle to load the case base 063 LogFactory.getLog(this.getClass()).info("Running precycle()"); 064 CBRCaseBase caseBase = null; 065 try { 066 caseBase = app.preCycle(); 067 } catch (ExecutionException e) { 068 org.apache.commons.logging.LogFactory.getLog(this.getClass()).error(e); 069 } 070 071 if (!(caseBase instanceof jcolibri.casebase.CachedLinealCaseBase)) 072 LogFactory 073 .getLog(this.getClass()) 074 .warn( 075 "Evaluation should be executed using a cached case base"); 076 077 ArrayList<CBRCase> originalCases = new ArrayList<CBRCase>( 078 caseBase.getCases()); 079 080 ArrayList<CBRCase> querySet = new ArrayList<CBRCase>(); 081 ArrayList<CBRCase> caseBaseSet = new ArrayList<CBRCase>(); 082 083 // Split the case base 084 splitCaseBase(originalCases, querySet, caseBaseSet, testPercent); 085 086 save(querySet, filename); 087 } 088 089 protected void save(Collection<CBRCase> queries, String filename) 090 { 091 try { 092 BufferedWriter br = null; 093 br = new BufferedWriter(new FileWriter(filename)); 094 if (br == null) 095 throw new Exception("Error opening file for writing: "+ filename); 096 097 for (CBRCase _case : queries) { 098 br.write(_case.getID().toString()); 099 br.newLine(); 100 } 101 br.close(); 102 } catch (Exception e) { 103 org.apache.commons.logging.LogFactory.getLog(this.getClass()).error(e); 104 } 105 106 107 } 108 109 public void HoldOutfromFile(String file) { 110 try { 111 // Obtain the time 112 long t = (new Date()).getTime(); 113 int numberOfCycles = 0; 114 115 // Run the precycle to load the case base 116 LogFactory.getLog(this.getClass()).info("Running precycle()"); 117 CBRCaseBase caseBase = app.preCycle(); 118 119 if (!(caseBase instanceof jcolibri.casebase.CachedLinealCaseBase)) 120 LogFactory 121 .getLog(this.getClass()) 122 .warn( 123 "Evaluation should be executed using a cached case base"); 124 125 ArrayList<CBRCase> originalCases = new ArrayList<CBRCase>(caseBase.getCases()); 126 ArrayList<CBRCase> querySet = new ArrayList<CBRCase>(); 127 ArrayList<CBRCase> caseBaseSet = new ArrayList<CBRCase>(); 128 129 // Split the case base 130 splitCaseBaseFromFile(originalCases, querySet, caseBaseSet, file); 131 132 int totalSteps = querySet.size(); 133 jcolibri.util.ProgressController.init(getClass(), 134 "Same Split - Hold Out Evaluation", totalSteps); 135 136 137 // Clear the caseBase 138 caseBase.forgetCases(originalCases); 139 140 // Set the cases that acts as case base in this repetition 141 caseBase.learnCases(caseBaseSet); 142 143 // Run cycle for each case in querySet 144 for (CBRCase c : querySet) { 145 // Run the cycle 146 LogFactory.getLog(this.getClass()).info( 147 "Running cycle() " + numberOfCycles); 148 app.cycle(c); 149 150 jcolibri.util.ProgressController.step(getClass()); 151 numberOfCycles++; 152 } 153 154 jcolibri.util.ProgressController.finish(getClass()); 155 156 // Revert case base to original state 157 caseBase.forgetCases(originalCases); 158 caseBase.learnCases(originalCases); 159 160 // Run the poscycle to finish the application 161 LogFactory.getLog(this.getClass()).info("Running postcycle()"); 162 app.postCycle(); 163 164 t = (new Date()).getTime() - t; 165 166 // Obtain and complete the evaluation result 167 report.setTotalTime(t); 168 report.setNumberOfCycles(numberOfCycles); 169 170 } catch (Exception e) { 171 LogFactory.getLog(this.getClass()).error(e); 172 } 173 174 } 175 176 /** 177 * Splits the case base in two sets: queries and case base 178 * 179 * @param wholeCaseBase 180 * Complete original case base 181 * @param querySet 182 * Output param where queries are stored 183 * @param casebaseSet 184 * Output param where case base is stored 185 * @param testPercent 186 * Percentage of cases used as queries 187 */ 188 protected void splitCaseBase(Collection<CBRCase> wholeCaseBase, 189 List<CBRCase> querySet, List<CBRCase> casebaseSet, int testPercent) { 190 querySet.clear(); 191 casebaseSet.clear(); 192 193 int querySetSize = (wholeCaseBase.size() * testPercent) / 100; 194 casebaseSet.addAll(wholeCaseBase); 195 196 for (int i = 0; i < querySetSize; i++) { 197 int random = (int) (Math.random() * casebaseSet.size()); 198 CBRCase _case = casebaseSet.get(random); 199 casebaseSet.remove(random); 200 querySet.add(_case); 201 } 202 } 203 204 protected void splitCaseBaseFromFile(Collection<CBRCase> wholeCaseBase, 205 List<CBRCase> querySet, List<CBRCase> casebaseSet, String filename) 206 { 207 querySet.clear(); 208 casebaseSet.clear(); 209 210 casebaseSet.addAll(wholeCaseBase); 211 212 try { 213 BufferedReader br = null; 214 br = new BufferedReader(new FileReader(FileIO.findFile(filename).getFile())); 215 if (br == null) 216 throw new Exception("Error opening file: " + filename); 217 218 String line = ""; 219 while ((line = br.readLine()) != null) { 220 CBRCase c = null; 221 int pos=0; 222 boolean found = false; 223 for(Iterator<CBRCase> iter = casebaseSet.iterator(); iter.hasNext() && (!found); ) 224 { 225 c = iter.next(); 226 if(c.getID().toString().equals(line)) 227 found = true; 228 else 229 pos++; 230 } 231 if(c==null) 232 { 233 org.apache.commons.logging.LogFactory.getLog(this.getClass()).error("Case "+line+" not found into case base"); 234 continue; 235 } 236 237 casebaseSet.remove(pos); 238 querySet.add(c); 239 } 240 br.close(); 241 } catch (Exception e) { 242 org.apache.commons.logging.LogFactory.getLog(this.getClass()).error(e); 243 } 244 } 245 246 }