001 /** 002 * DetailedHoldOutEvaluator.java 003 * jCOLIBRI2 framework. 004 * @author Juan A. Recio-García. 005 * @author Lisa Cummins. 006 * GAIA - Group for Artificial Intelligence Applications 007 * http://gaia.fdi.ucm.es 008 * 23/07/2007 009 */ 010 package jcolibri.extensions.evaluation.evaluators; 011 012 import java.util.ArrayList; 013 import java.util.Collection; 014 import java.util.Date; 015 import java.util.List; 016 017 import jcolibri.cbrcore.CBRCase; 018 import jcolibri.cbrcore.CBRCaseBase; 019 import jcolibri.extensions.evaluation.MaintenanceEvaluator; 020 021 import org.apache.commons.logging.LogFactory; 022 023 /** 024 * This method splits the case base in two sets: one used for testing where each 025 * case is used as query, and another that acts as normal case base. This 026 * process is performed serveral times. 027 * 028 * @author Juan A. Recio García - GAIA http://gaia.fdi.ucm.es 029 * @author Lisa Cummins. 030 * @version 2.0 031 */ 032 public class MaintenanceHoldOutEvaluator extends MaintenanceEvaluator 033 { 034 /** 035 * Performs the Hold-Out evaluation. 036 * @param testPercent Percent of the case base used as queries. The case base is splited randomly in each repetition. 037 * @param repetitions Number of repetitions. 038 */ 039 public void HoldOut(int testPercent, int repetitions) { 040 try { 041 // Obtain the time 042 long t = (new Date()).getTime(); 043 int numberOfCycles = 0; 044 045 // Run the precycle to load the case base 046 LogFactory.getLog(this.getClass()).info("Running precycle()"); 047 CBRCaseBase caseBase = app.preCycle(); 048 049 if (!(caseBase instanceof jcolibri.casebase.CachedLinealCaseBase)) 050 LogFactory 051 .getLog(this.getClass()) 052 .warn( 053 "Evaluation should be executed using a cached case base"); 054 055 ArrayList<CBRCase> originalCases = new ArrayList<CBRCase>( 056 caseBase.getCases()); 057 058 int totalSteps = ((originalCases.size() * testPercent) / 100); 059 totalSteps = totalSteps*repetitions; 060 jcolibri.util.ProgressController.init(getClass(),"Hold Out Evaluation", totalSteps); 061 062 // For each repetition 063 for (int rep = 0; rep < repetitions; rep++) 064 { ArrayList<CBRCase> querySet = new ArrayList<CBRCase>(); 065 prepareCases(originalCases, querySet, testPercent, caseBase); 066 067 // Run cycle for each case in querySet 068 for (CBRCase c : querySet) 069 { // Run the cycle 070 LogFactory.getLog(this.getClass()).info( 071 "Running cycle() " + numberOfCycles); 072 073 app.cycle(c); 074 075 jcolibri.util.ProgressController.step(getClass()); 076 numberOfCycles++; 077 } 078 } 079 080 jcolibri.util.ProgressController.finish(getClass()); 081 082 //Revert case base to original state 083 caseBase.forgetCases(originalCases); 084 caseBase.learnCases(originalCases); 085 086 // Run the poscycle to finish the application 087 LogFactory.getLog(this.getClass()).info("Running postcycle()"); 088 app.postCycle(); 089 090 t = (new Date()).getTime() - t; 091 092 // Obtain and complete the evaluation result 093 report.setTotalTime(t); 094 report.setNumberOfCycles(numberOfCycles); 095 096 } catch (Exception e) { 097 System.out.println(e); 098 } 099 } 100 101 /** 102 * Prepares the cases for evaluation by setting up test and training sets 103 * @param originalCases Complete original set of cases 104 * @param querySet Where queries are to be stored 105 * @param testPercent Percentage of cases used as queries 106 * @param caseBase The case base 107 */ 108 protected void prepareCases(Collection<CBRCase> originalCases, List<CBRCase> querySet, 109 int testPercent, CBRCaseBase caseBase) 110 { 111 ArrayList<CBRCase> caseBaseSet = new ArrayList<CBRCase>(); 112 113 // Split the case base 114 splitCaseBase(originalCases, querySet, caseBaseSet, testPercent); 115 116 // Clear the caseBase 117 caseBase.forgetCases(originalCases); 118 119 // Set the cases that acts as case base in this repetition 120 caseBase.learnCases(caseBaseSet); 121 122 if(this.simConfig != null && this.editMethod != null) 123 { // Perform maintenance on this case base 124 editCaseBase(caseBase); 125 } 126 } 127 128 /** 129 * Splits the case base in two sets: queries and case base 130 * 131 * @param wholeCaseBase 132 * Complete original case base 133 * @param querySet 134 * Output param where queries are stored 135 * @param casebaseSet 136 * Output param where case base is stored 137 * @param testPercent 138 * Percentage of cases used as queries 139 */ 140 public static void splitCaseBase(Collection<CBRCase> wholeCaseBase, 141 List<CBRCase> querySet, List<CBRCase> casebaseSet, int testPercent) 142 { querySet.clear(); 143 casebaseSet.clear(); 144 145 int querySetSize = (wholeCaseBase.size() * testPercent) / 100; 146 casebaseSet.addAll(wholeCaseBase); 147 148 for (int i = 0; i < querySetSize; i++) 149 { int random = (int) (Math.random() * casebaseSet.size()); 150 CBRCase _case = casebaseSet.get(random); 151 casebaseSet.remove(random); 152 querySet.add(_case); 153 } 154 } 155 }