001 /** 002 * HoldOutEvaluator.java 003 * jCOLIBRI2 framework. 004 * @author Juan A. Recio-García. 005 * GAIA - Group for Artificial Intelligence Applications 006 * http://gaia.fdi.ucm.es 007 * 07/05/2007 008 */ 009 package jcolibri.evaluation.evaluators; 010 011 import java.util.ArrayList; 012 import java.util.Collection; 013 import java.util.Date; 014 import java.util.List; 015 016 import jcolibri.cbraplications.StandardCBRApplication; 017 import jcolibri.cbrcore.CBRCase; 018 import jcolibri.cbrcore.CBRCaseBase; 019 import jcolibri.evaluation.EvaluationReport; 020 import jcolibri.evaluation.Evaluator; 021 import jcolibri.exception.ExecutionException; 022 023 import org.apache.commons.logging.LogFactory; 024 025 /** 026 * This method splits the case base in two sets: one used for testing where each 027 * case is used as query, and another that acts as normal case base. This 028 * process is performed serveral times. 029 * 030 * @author Juan A. Recio García - GAIA http://gaia.fdi.ucm.es 031 * @version 2.0 032 */ 033 public class HoldOutEvaluator extends Evaluator { 034 035 protected StandardCBRApplication app; 036 037 public void init(StandardCBRApplication cbrApp) { 038 039 report = new EvaluationReport(); 040 app = cbrApp; 041 try { 042 app.configure(); 043 } catch (ExecutionException e) { 044 LogFactory.getLog(this.getClass()).error(e); 045 } 046 } 047 048 /** 049 * Performs the Hold-Out evaluation. 050 * @param testPercent Percent of the case base used as queries. The case base is splited randomly in each repetition. 051 * @param repetitions Number of repetitions. 052 */ 053 public void HoldOut(int testPercent, int repetitions) { 054 try { 055 // Obtain the time 056 long t = (new Date()).getTime(); 057 int numberOfCycles = 0; 058 // Run the precycle to load the case base 059 LogFactory.getLog(this.getClass()).info("Running precycle()"); 060 CBRCaseBase caseBase = app.preCycle(); 061 062 if (!(caseBase instanceof jcolibri.casebase.CachedLinealCaseBase)) 063 LogFactory 064 .getLog(this.getClass()) 065 .warn( 066 "Evaluation should be executed using a cached case base"); 067 068 ArrayList<CBRCase> originalCases = new ArrayList<CBRCase>( 069 caseBase.getCases()); 070 071 int totalSteps = ((originalCases.size() * testPercent) / 100); 072 totalSteps = totalSteps*repetitions; 073 jcolibri.util.ProgressController.init(getClass(),"Hold Out Evaluation", totalSteps); 074 075 // For each repetition 076 for (int rep = 0; rep < repetitions; rep++) { 077 ArrayList<CBRCase> querySet = new ArrayList<CBRCase>(); 078 ArrayList<CBRCase> caseBaseSet = new ArrayList<CBRCase>(); 079 // Split the case base 080 splitCaseBase(originalCases, querySet, caseBaseSet, testPercent); 081 082 // Clear the caseBase 083 caseBase.forgetCases(originalCases); 084 085 // Set the cases that acts as case base in this repetition 086 caseBase.learnCases(caseBaseSet); 087 088 // Run cycle for each case in querySet 089 for (CBRCase c : querySet) { 090 // Run the cycle 091 LogFactory.getLog(this.getClass()).info( 092 "Running cycle() " + numberOfCycles); 093 app.cycle(c); 094 095 jcolibri.util.ProgressController.step(getClass()); 096 numberOfCycles++; 097 } 098 } 099 100 jcolibri.util.ProgressController.finish(getClass()); 101 102 //Revert case base to original state 103 caseBase.forgetCases(originalCases); 104 caseBase.learnCases(originalCases); 105 106 // Run the poscycle to finish the application 107 LogFactory.getLog(this.getClass()).info("Running postcycle()"); 108 app.postCycle(); 109 110 t = (new Date()).getTime() - t; 111 112 // Obtain and complete the evaluation result 113 report.setTotalTime(t); 114 report.setNumberOfCycles(numberOfCycles); 115 116 } catch (Exception e) { 117 LogFactory.getLog(this.getClass()).error(e); 118 } 119 120 } 121 122 /** 123 * Splits the case base in two sets: queries and case base 124 * 125 * @param holeCaseBase 126 * Complete original case base 127 * @param querySet 128 * Output param where queries are stored 129 * @param casebaseSet 130 * Output param where case base is stored 131 * @param testPercent 132 * Percentage of cases used as queries 133 */ 134 protected void splitCaseBase(Collection<CBRCase> holeCaseBase, 135 List<CBRCase> querySet, List<CBRCase> casebaseSet, int testPercent) { 136 querySet.clear(); 137 casebaseSet.clear(); 138 139 int querySetSize = (holeCaseBase.size() * testPercent) / 100; 140 casebaseSet.addAll(holeCaseBase); 141 142 for (int i = 0; i < querySetSize; i++) { 143 int random = (int) (Math.random() * casebaseSet.size()); 144 CBRCase _case = casebaseSet.get(random); 145 casebaseSet.remove(random); 146 querySet.add(_case); 147 } 148 } 149 }