001 package jcolibri.extensions.maintenance_evaluation.evaluators; 002 003 import java.io.BufferedReader; 004 import java.io.FileReader; 005 import java.util.ArrayList; 006 import java.util.Collection; 007 import java.util.Date; 008 import java.util.Iterator; 009 import java.util.List; 010 011 import jcolibri.cbrcore.CBRCase; 012 import jcolibri.cbrcore.CBRCaseBase; 013 import jcolibri.extensions.maintenance_evaluation.MaintenanceEvaluator; 014 import jcolibri.util.FileIO; 015 016 import org.apache.commons.logging.LogFactory; 017 018 /** 019 * This evaluation splits the case base in two sets: one used for testing where each 020 * case is used as query, and another that acts as normal case base. 021 * It uses queries from a file so that the evaluation can be repeated with the 022 * same test/training set split. 023 * The generateSplit() method does the initial random split and saves the query set in a file. 024 * Later, the HoldOutfromFile() method uses that file to load the query set and 025 * perform the evaluation. 026 * 027 * @author Juan A. Recio García & Lisa Cummins 028 */ 029 public class MaintenanceSameSplitEvaluator extends MaintenanceEvaluator { 030 031 /** 032 * Perform HoldOut evaluation using the queries contained in the specified file. 033 * @param file the file containing the queries. 034 */ 035 public void HoldOutfromFile(String file) 036 { try 037 { // Obtain the time 038 long t = (new Date()).getTime(); 039 int numberOfCycles = 0; 040 041 // Run the precycle to load the case base 042 LogFactory.getLog(this.getClass()).info("Running precycle()"); 043 CBRCaseBase caseBase = app.preCycle(); 044 045 if (!(caseBase instanceof jcolibri.casebase.CachedLinealCaseBase)) 046 LogFactory 047 .getLog(this.getClass()) 048 .warn( 049 "Evaluation should be executed using a cached case base"); 050 051 ArrayList<CBRCase> originalCases = new ArrayList<CBRCase>(caseBase.getCases()); 052 ArrayList<CBRCase> querySet = new ArrayList<CBRCase>(); 053 054 prepareCases(originalCases, querySet, file, caseBase); 055 056 int totalSteps = querySet.size(); 057 jcolibri.util.ProgressController.init(getClass(), 058 "Same Split - Hold Out Evaluation", totalSteps); 059 060 // Run cycle for each case in querySet 061 for (CBRCase c : querySet) { 062 // Run the cycle 063 LogFactory.getLog(this.getClass()).info( 064 "Running cycle() " + numberOfCycles); 065 066 // report.storeQueryNum(); 067 068 app.cycle(c); 069 070 jcolibri.util.ProgressController.step(getClass()); 071 numberOfCycles++; 072 } 073 074 jcolibri.util.ProgressController.finish(getClass()); 075 076 // Revert case base to original state 077 caseBase.forgetCases(originalCases); 078 caseBase.learnCases(originalCases); 079 080 // Run the poscycle to finish the application 081 LogFactory.getLog(this.getClass()).info("Running postcycle()"); 082 app.postCycle(); 083 084 t = (new Date()).getTime() - t; 085 086 // Obtain and complete the evaluation result 087 report.setTotalTime(t); 088 report.setNumberOfCycles(numberOfCycles); 089 090 } catch (Exception e) 091 { LogFactory.getLog(this.getClass()).error(e); 092 } 093 } 094 095 096 /** 097 * Prepares the cases for evaluation by setting up test and training sets 098 * @param originalCases Complete original set of cases 099 * @param querySet Where queries are to be stored 100 * @param caseBase The case base 101 */ 102 protected void prepareCases(Collection<CBRCase> originalCases, List<CBRCase> querySet, 103 String file, CBRCaseBase caseBase) 104 { 105 ArrayList<CBRCase> caseBaseSet = new ArrayList<CBRCase>(); 106 107 // Split the case base 108 splitCaseBaseFromFile(originalCases, querySet, caseBaseSet, file); 109 110 // Clear the caseBase 111 caseBase.forgetCases(originalCases); 112 113 // Set the cases that acts as case base in this repetition 114 caseBase.learnCases(caseBaseSet); 115 116 if(this.simConfig != null && this.editMethod != null) 117 { // Perform maintenance on this case base 118 editCaseBase(caseBase); 119 } 120 } 121 122 /** 123 * Splits the case base in two sets: queries and case base, with the 124 * queries contained in the given file 125 * 126 * @param wholeCaseBase 127 * Complete original case base 128 * @param querySet 129 * Output param where queries are stored 130 * @param casebaseSet 131 * Output param where case base is stored 132 * @param filename 133 * File which contains the queries 134 */ 135 public static void splitCaseBaseFromFile(Collection<CBRCase> wholeCaseBase, 136 List<CBRCase> querySet, List<CBRCase> casebaseSet, String filename) 137 { querySet.clear(); 138 casebaseSet.clear(); 139 140 casebaseSet.addAll(wholeCaseBase); 141 142 try 143 { BufferedReader br = null; 144 br = new BufferedReader(new FileReader(FileIO.findFile(filename).getFile())); 145 if (br == null) 146 throw new Exception("Error opening file: " + filename); 147 148 String line = ""; 149 while ((line = br.readLine()) != null) 150 { CBRCase c = null; 151 int pos=0; 152 boolean found = false; 153 for(Iterator<CBRCase> iter = casebaseSet.iterator(); iter.hasNext() && (!found); ) 154 { c = iter.next(); 155 if(c.getID().toString().equals(line)) 156 found = true; 157 else 158 pos++; 159 } 160 if(c==null) 161 { System.out.println("Case "+line+" not found into case base"); 162 continue; 163 } 164 165 casebaseSet.remove(pos); 166 querySet.add(c); 167 } 168 br.close(); 169 } catch (Exception e) 170 { System.out.println(e); 171 } 172 } 173 }