001 package jcolibri.extensions.maintenance_evaluation.evaluators; 002 003 import java.util.ArrayList; 004 import java.util.Collection; 005 import java.util.Date; 006 import java.util.List; 007 008 import jcolibri.cbrcore.CBRCase; 009 import jcolibri.cbrcore.CBRCaseBase; 010 import jcolibri.extensions.maintenance_evaluation.MaintenanceEvaluator; 011 012 import org.apache.commons.logging.LogFactory; 013 014 /** 015 * This evaluation divides the case base into several random folds 016 * (indicated by the user). 017 * For each fold, their cases are used as queries and the remaining folds are 018 * used together as case base. Maintenance is performed on the case-base before 019 * running the queries. 020 * This process is performed several times. 021 * 022 * @author Lisa Cummins. 023 * @author Juan A. Recio García - GAIA http://gaia.fdi.ucm.es 024 */ 025 public class MaintenanceNFoldEvaluator extends MaintenanceEvaluator 026 { 027 /** 028 * Executes the N-Fold evaluation. 029 * @param numFolds the number of randomly generated folds. 030 * @param repetitions the number of repetitions 031 */ 032 public void NFoldEvaluation(int numFolds, int repetitions) 033 { try 034 { //Get the time 035 long t = (new Date()).getTime(); 036 int numberOfCycles = 0; 037 038 // Run the precycle to load the case base 039 LogFactory.getLog(this.getClass()).info("Running precycle()"); 040 CBRCaseBase caseBase = app.preCycle(); 041 042 if (!(caseBase instanceof jcolibri.casebase.CachedLinealCaseBase)) 043 LogFactory.getLog(this.getClass()).warn( 044 "Evaluation should be executed using a cached case base"); 045 046 Collection<CBRCase> cases = new ArrayList<CBRCase>(caseBase.getCases()); 047 048 //For each repetition 049 for(int r=0; r<repetitions; r++) 050 { //Create the folds 051 ArrayList<ArrayList<CBRCase>> folds = createFolds(cases, numFolds); 052 053 //For each fold 054 for(int f=0; f<numFolds; f++) 055 { ArrayList<CBRCase> querySet = new ArrayList<CBRCase>(); 056 prepareCases(cases, querySet, f, caseBase, folds); 057 058 //Run cycle for each case in querySet (current fold) 059 for(CBRCase c: querySet) 060 { LogFactory.getLog(this.getClass()).info( 061 "Running cycle() " + numberOfCycles); 062 app.cycle(c); 063 numberOfCycles++; 064 } 065 } 066 } 067 068 //Revert case base to original state 069 caseBase.forgetCases(cases); 070 caseBase.learnCases(cases); 071 072 //Run the poscycle to finish the application 073 LogFactory.getLog(this.getClass()).info("Running postcycle()"); 074 app.postCycle(); 075 076 //Complete the evaluation result 077 report.setTotalTime(t); 078 report.setNumberOfCycles(numberOfCycles); 079 080 } catch (Exception e) 081 { LogFactory.getLog(this.getClass()).error(e); 082 } 083 084 } 085 086 /** 087 * Prepares the cases for evaluation by setting up test and training sets 088 * @param originalCases Complete original set of cases 089 * @param querySet Where queries are to be stored 090 * @param fold The fold number 091 * @param caseBase The case base 092 */ 093 protected void prepareCases(Collection<CBRCase> originalCases, List<CBRCase> querySet, 094 int fold, CBRCaseBase caseBase, ArrayList<ArrayList<CBRCase>> folds) 095 { ArrayList<CBRCase> caseBaseSet = new ArrayList<CBRCase>(); 096 097 //Obtain the query and casebase sets 098 getFolds(fold, querySet, caseBaseSet, folds); 099 100 //Clear the caseBase 101 caseBase.forgetCases(originalCases); 102 103 //Set the cases that acts as casebase in this cycle 104 caseBase.learnCases(caseBaseSet); 105 106 if(this.simConfig != null && this.editMethod != null) 107 { // Perform maintenance on this case base 108 editCaseBase(caseBase); 109 } 110 } 111 112 /** 113 * Divides the given cases into the given number of folds. 114 * @param cases the original cases. 115 * @param numFolds the number of folds. 116 */ 117 protected ArrayList<ArrayList<CBRCase>> createFolds(Collection<CBRCase> cases, int numFolds) 118 { ArrayList<ArrayList<CBRCase>> folds = new ArrayList<ArrayList<CBRCase>>(); 119 int foldsize = cases.size() / numFolds; 120 ArrayList<CBRCase> copy = new ArrayList<CBRCase>(cases); 121 122 for(int f=0; f<numFolds; f++) 123 { ArrayList<CBRCase> fold = new ArrayList<CBRCase>(); 124 for(int i=0; (i<foldsize)&&(copy.size()>0); i++) 125 { int random = (int) (Math.random() * copy.size()); 126 CBRCase _case = copy.get( random ); 127 copy.remove(random); 128 fold.add(_case); 129 } 130 folds.add(fold); 131 } 132 return folds; 133 } 134 135 /** 136 * Clears the current query and case base sets and populates the query set with fold 137 * f and the case base set with the cases not contained in fold f. 138 * @param f the fold to use. 139 * @param querySet the set of queries. 140 * @param caseBaseSet the set of cases. 141 */ 142 public static void getFolds(int f, List<CBRCase> querySet, List<CBRCase> caseBaseSet, ArrayList<ArrayList<CBRCase>> folds) 143 { querySet.clear(); 144 caseBaseSet.clear(); 145 146 querySet.addAll(folds.get(f)); 147 148 for(int i=0; i<folds.size(); i++) 149 if(i!=f) 150 caseBaseSet.addAll(folds.get(i)); 151 } 152 }