001    /**
002     * HoldOutEvaluator.java
003     * jCOLIBRI2 framework. 
004     * @author Juan A. Recio-García.
005     * GAIA - Group for Artificial Intelligence Applications
006     * http://gaia.fdi.ucm.es
007     * 07/05/2007
008     */
009    package jcolibri.evaluation.evaluators;
010    
011    import java.util.ArrayList;
012    import java.util.Collection;
013    import java.util.Date;
014    import java.util.List;
015    
016    import jcolibri.cbraplications.StandardCBRApplication;
017    import jcolibri.cbrcore.CBRCase;
018    import jcolibri.cbrcore.CBRCaseBase;
019    import jcolibri.evaluation.EvaluationReport;
020    import jcolibri.evaluation.Evaluator;
021    import jcolibri.exception.ExecutionException;
022    
023    import org.apache.commons.logging.LogFactory;
024    
025    /**
026     * This method splits the case base in two sets: one used for testing where each
027     * case is used as query, and another that acts as normal case base. This
028     * process is performed serveral times.
029     * 
030     * @author Juan A. Recio García - GAIA http://gaia.fdi.ucm.es
031     * @version 2.0
032     */
033    public class HoldOutEvaluator extends Evaluator {
034    
035            protected StandardCBRApplication app;
036    
037            public void init(StandardCBRApplication cbrApp) {
038                    
039                    report = new EvaluationReport();
040                    app = cbrApp;
041                    try {
042                            app.configure();
043                    } catch (ExecutionException e) {
044                            LogFactory.getLog(this.getClass()).error(e);
045                    }
046            }
047    
048            /**
049             * Performs the Hold-Out evaluation. 
050             * @param testPercent Percent of the case base used as queries. The case base is splited randomly in each repetition.
051             * @param repetitions Number of repetitions. 
052             */
053            public void HoldOut(int testPercent, int repetitions) {
054                    try {
055                            // Obtain the time
056                            long t = (new Date()).getTime();
057                            int numberOfCycles = 0;
058                            // Run the precycle to load the case base
059                            LogFactory.getLog(this.getClass()).info("Running precycle()");
060                            CBRCaseBase caseBase = app.preCycle();
061    
062                            if (!(caseBase instanceof jcolibri.casebase.CachedLinealCaseBase))
063                                    LogFactory
064                                                    .getLog(this.getClass())
065                                                    .warn(
066                                                                    "Evaluation should be executed using a cached case base");
067    
068                            ArrayList<CBRCase> originalCases = new ArrayList<CBRCase>(
069                                            caseBase.getCases());
070                            
071                            int totalSteps = ((originalCases.size() * testPercent) / 100);
072                            totalSteps = totalSteps*repetitions;
073                            jcolibri.util.ProgressController.init(getClass(),"Hold Out Evaluation", totalSteps);
074                            
075                            // For each repetition
076                            for (int rep = 0; rep < repetitions; rep++) {
077                                    ArrayList<CBRCase> querySet = new ArrayList<CBRCase>();
078                                    ArrayList<CBRCase> caseBaseSet = new ArrayList<CBRCase>();
079                                    // Split the case base
080                                    splitCaseBase(originalCases, querySet, caseBaseSet, testPercent);
081    
082                                    // Clear the caseBase
083                                    caseBase.forgetCases(originalCases);
084    
085                                    // Set the cases that acts as case base in this repetition
086                                    caseBase.learnCases(caseBaseSet);
087    
088                                    // Run cycle for each case in querySet
089                                    for (CBRCase c : querySet) {
090                                            // Run the cycle
091                                            LogFactory.getLog(this.getClass()).info(
092                                                            "Running cycle() " + numberOfCycles);
093                                            app.cycle(c);
094    
095                                            jcolibri.util.ProgressController.step(getClass());
096                                            numberOfCycles++;
097                                    }
098                            }
099    
100                            jcolibri.util.ProgressController.finish(getClass());
101                            
102                            //Revert case base to original state
103                            caseBase.forgetCases(originalCases);
104                            caseBase.learnCases(originalCases);
105                            
106                            // Run the poscycle to finish the application
107                            LogFactory.getLog(this.getClass()).info("Running postcycle()");
108                            app.postCycle();
109    
110                            t = (new Date()).getTime() - t;
111    
112                            // Obtain and complete the evaluation result
113                            report.setTotalTime(t);
114                            report.setNumberOfCycles(numberOfCycles);
115    
116                    } catch (Exception e) {
117                            LogFactory.getLog(this.getClass()).error(e);
118                    }
119    
120            }
121    
122            /**
123             * Splits the case base in two sets: queries and case base
124             * 
125             * @param holeCaseBase
126             *            Complete original case base
127             * @param querySet
128             *            Output param where queries are stored
129             * @param casebaseSet
130             *            Output param where case base is stored
131             * @param testPercent
132             *            Percentage of cases used as queries
133             */
134            protected void splitCaseBase(Collection<CBRCase> holeCaseBase,
135                            List<CBRCase> querySet, List<CBRCase> casebaseSet, int testPercent) {
136                    querySet.clear();
137                    casebaseSet.clear();
138    
139                    int querySetSize = (holeCaseBase.size() * testPercent) / 100;
140                    casebaseSet.addAll(holeCaseBase);
141    
142                    for (int i = 0; i < querySetSize; i++) {
143                            int random = (int) (Math.random() * casebaseSet.size());
144                            CBRCase _case = casebaseSet.get(random);
145                            casebaseSet.remove(random);
146                            querySet.add(_case);
147                    }
148            }
149    }