001 /** 002 * SameSplitEvaluator.java 003 * jCOLIBRI2 framework. 004 * @author Juan A. Recio-García. 005 * @author Lisa Cummins. 006 * GAIA - Group for Artificial Intelligence Applications 007 * http://gaia.fdi.ucm.es 008 * 07/05/2007 009 */ 010 package jcolibri.extensions.evaluation.evaluators; 011 012 import java.io.BufferedReader; 013 import java.io.FileReader; 014 import java.util.ArrayList; 015 import java.util.Collection; 016 import java.util.Date; 017 import java.util.Iterator; 018 import java.util.List; 019 020 import jcolibri.cbrcore.CBRCase; 021 import jcolibri.cbrcore.CBRCaseBase; 022 import jcolibri.extensions.evaluation.MaintenanceEvaluator; 023 import jcolibri.util.FileIO; 024 025 import org.apache.commons.logging.LogFactory; 026 027 /** 028 * This method splits the case base in two sets: one used for testing where each 029 * case is used as query, and another that acts as normal case base. 030 * <br> 031 * This method is different of the other evaluators beacuse the split is stored in a file that can be used in following evaluations. 032 * This way, the same set is used as queries for each evaluation. 033 * <br> 034 * The generateSplit() method does the initial random split and saves the query set in a file. 035 * Later, the HoldOutfromFile() method uses that file to load the queries set and perform the evaluation. 036 * 037 * @author Juan A. Recio García & Lisa Cummins 038 * @version 2.0 039 */ 040 041 public class MaintenanceSameSplitEvaluator extends MaintenanceEvaluator { 042 043 public void HoldOutfromFile(String file) 044 { try 045 { // Obtain the time 046 long t = (new Date()).getTime(); 047 int numberOfCycles = 0; 048 049 // Run the precycle to load the case base 050 LogFactory.getLog(this.getClass()).info("Running precycle()"); 051 CBRCaseBase caseBase = app.preCycle(); 052 053 if (!(caseBase instanceof jcolibri.casebase.CachedLinealCaseBase)) 054 LogFactory 055 .getLog(this.getClass()) 056 .warn( 057 "Evaluation should be executed using a cached case base"); 058 059 ArrayList<CBRCase> originalCases = new ArrayList<CBRCase>(caseBase.getCases()); 060 ArrayList<CBRCase> querySet = new ArrayList<CBRCase>(); 061 062 prepareCases(originalCases, querySet, file, caseBase); 063 064 int totalSteps = querySet.size(); 065 jcolibri.util.ProgressController.init(getClass(), 066 "Same Split - Hold Out Evaluation", totalSteps); 067 068 // Run cycle for each case in querySet 069 for (CBRCase c : querySet) { 070 // Run the cycle 071 LogFactory.getLog(this.getClass()).info( 072 "Running cycle() " + numberOfCycles); 073 074 // report.storeQueryNum(); 075 076 app.cycle(c); 077 078 jcolibri.util.ProgressController.step(getClass()); 079 numberOfCycles++; 080 } 081 082 jcolibri.util.ProgressController.finish(getClass()); 083 084 // Revert case base to original state 085 caseBase.forgetCases(originalCases); 086 caseBase.learnCases(originalCases); 087 088 // Run the poscycle to finish the application 089 LogFactory.getLog(this.getClass()).info("Running postcycle()"); 090 app.postCycle(); 091 092 t = (new Date()).getTime() - t; 093 094 // Obtain and complete the evaluation result 095 report.setTotalTime(t); 096 report.setNumberOfCycles(numberOfCycles); 097 098 } catch (Exception e) 099 { LogFactory.getLog(this.getClass()).error(e); 100 } 101 } 102 103 104 105 106 /** 107 * Prepares the cases for evaluation by setting up test and training sets 108 * @param originalCases Complete original set of cases 109 * @param querySet Where queries are to be stored 110 * @param testPercent Percentage of cases used as queries 111 * @param caseBase The case base 112 */ 113 protected void prepareCases(Collection<CBRCase> originalCases, List<CBRCase> querySet, 114 String file, CBRCaseBase caseBase) 115 { 116 ArrayList<CBRCase> caseBaseSet = new ArrayList<CBRCase>(); 117 118 // Split the case base 119 splitCaseBaseFromFile(originalCases, querySet, caseBaseSet, file); 120 121 // Clear the caseBase 122 caseBase.forgetCases(originalCases); 123 124 // Set the cases that acts as case base in this repetition 125 caseBase.learnCases(caseBaseSet); 126 127 if(this.simConfig != null && this.editMethod != null) 128 { // Perform maintenance on this case base 129 editCaseBase(caseBase); 130 } 131 } 132 133 /** 134 * Splits the case base in two sets: queries and case base, with the 135 * queries contained in the given file 136 * 137 * @param wholeCaseBase 138 * Complete original case base 139 * @param querySet 140 * Output param where queries are stored 141 * @param casebaseSet 142 * Output param where case base is stored 143 * @param filename 144 * File which contains the queries 145 */ 146 public static void splitCaseBaseFromFile(Collection<CBRCase> wholeCaseBase, 147 List<CBRCase> querySet, List<CBRCase> casebaseSet, String filename) 148 { querySet.clear(); 149 casebaseSet.clear(); 150 151 casebaseSet.addAll(wholeCaseBase); 152 153 try 154 { BufferedReader br = null; 155 br = new BufferedReader(new FileReader(FileIO.findFile(filename).getFile())); 156 if (br == null) 157 throw new Exception("Error opening file: " + filename); 158 159 String line = ""; 160 while ((line = br.readLine()) != null) 161 { CBRCase c = null; 162 int pos=0; 163 boolean found = false; 164 for(Iterator<CBRCase> iter = casebaseSet.iterator(); iter.hasNext() && (!found); ) 165 { c = iter.next(); 166 if(c.getID().toString().equals(line)) 167 found = true; 168 else 169 pos++; 170 } 171 if(c==null) 172 { System.out.println("Case "+line+" not found into case base"); 173 continue; 174 } 175 176 casebaseSet.remove(pos); 177 querySet.add(c); 178 } 179 br.close(); 180 } catch (Exception e) 181 { System.out.println(e); 182 } 183 } 184 }