001    /**
002     * DetailedHoldOutEvaluator.java
003     * jCOLIBRI2 framework. 
004     * @author Juan A. Recio-García.
005     * @author Lisa Cummins.
006     * GAIA - Group for Artificial Intelligence Applications
007     * http://gaia.fdi.ucm.es
008     * 23/07/2007
009     */
010    package jcolibri.extensions.maintenance_evaluation.evaluators;
011    
012    import java.util.ArrayList;
013    import java.util.Collection;
014    import java.util.Date;
015    import java.util.List;
016    
017    import jcolibri.cbrcore.CBRCase;
018    import jcolibri.cbrcore.CBRCaseBase;
019    import jcolibri.extensions.maintenance_evaluation.MaintenanceEvaluator;
020    
021    import org.apache.commons.logging.LogFactory;
022    
023    /**
024     * This evaluation splits the case base in two sets: a training set and a test set. 
025     * The training set is used as a case-base. It is maintained and then the cases in the
026     * test set are used as queries to evaluate the training set. 
027     * This process is performed serveral times.
028     * 
029     * @author Lisa Cummins.
030     * @author Juan A. Recio García - GAIA http://gaia.fdi.ucm.es
031     */
032    public class MaintenanceHoldOutEvaluator extends MaintenanceEvaluator 
033    {
034            /**
035             * Performs the Hold-Out evaluation. 
036             * @param testPercent percentage of the case base used as queries. 
037             * The case base is split randomly in each repetition.
038             * @param repetitions number of repetitions. 
039             */
040            public void HoldOut(int testPercent, int repetitions) {
041                    try {
042                            // Obtain the time
043                            long t = (new Date()).getTime();
044                            int numberOfCycles = 0;
045    
046                            // Run the precycle to load the case base
047                            LogFactory.getLog(this.getClass()).info("Running precycle()");
048                            CBRCaseBase caseBase = app.preCycle();
049    
050                            if (!(caseBase instanceof jcolibri.casebase.CachedLinealCaseBase))
051                                    LogFactory
052                                                    .getLog(this.getClass())
053                                                    .warn(
054                                                                    "Evaluation should be executed using a cached case base");
055    
056                            ArrayList<CBRCase> originalCases = new ArrayList<CBRCase>(
057                                            caseBase.getCases());
058                            
059                            int totalSteps = ((originalCases.size() * testPercent) / 100);
060                            totalSteps = totalSteps*repetitions;
061                            jcolibri.util.ProgressController.init(getClass(),"Hold Out Evaluation", totalSteps);
062    
063                            // For each repetition
064                            for (int rep = 0; rep < repetitions; rep++) 
065                            {       ArrayList<CBRCase> querySet = new ArrayList<CBRCase>();
066                                    prepareCases(originalCases, querySet, testPercent, caseBase);
067                                    
068                                    // Run cycle for each case in querySet
069                                    for (CBRCase c : querySet) 
070                                    {       // Run the cycle
071                                            LogFactory.getLog(this.getClass()).info(
072                                                            "Running cycle() " + numberOfCycles);
073                            
074                                            app.cycle(c);
075                                            
076                                            jcolibri.util.ProgressController.step(getClass());
077                                            numberOfCycles++;
078                                    }
079                            }
080    
081                            jcolibri.util.ProgressController.finish(getClass());
082                            
083                            //Revert case base to original state
084                            caseBase.forgetCases(originalCases);
085                            caseBase.learnCases(originalCases);
086                            
087                            // Run the poscycle to finish the application
088                            LogFactory.getLog(this.getClass()).info("Running postcycle()");
089                            app.postCycle();
090    
091                            t = (new Date()).getTime() - t;
092    
093                            // Obtain and complete the evaluation result
094                            report.setTotalTime(t);
095                            report.setNumberOfCycles(numberOfCycles);
096    
097                    } catch (Exception e) {
098                            System.out.println(e);
099                    }
100            }
101            
102            /**
103             * Prepares the cases for evaluation by setting up test and training sets
104             * @param originalCases Complete original set of cases
105             * @param querySet Where queries are to be stored
106             * @param testPercent Percentage of cases used as queries
107             * @param caseBase The case base
108             */
109            protected void prepareCases(Collection<CBRCase> originalCases, List<CBRCase> querySet, 
110                    int testPercent, CBRCaseBase caseBase)
111            {       
112                    ArrayList<CBRCase> caseBaseSet = new ArrayList<CBRCase>();
113    
114                    // Split the case base
115                    splitCaseBase(originalCases, querySet, caseBaseSet, testPercent);
116    
117                    // Clear the caseBase
118                    caseBase.forgetCases(originalCases);
119    
120                    // Set the cases that acts as case base in this repetition
121                    caseBase.learnCases(caseBaseSet);
122                    
123                    if(this.simConfig != null && this.editMethod != null)
124                    {       // Perform maintenance on this case base
125                            editCaseBase(caseBase);
126                    }
127            }
128    
129            /**
130             * Splits the case base in two sets: queries and case base
131             * 
132             * @param wholeCaseBase
133             *            Complete original case base
134             * @param querySet
135             *            Output param where queries are stored
136             * @param casebaseSet
137             *            Output param where case base is stored
138             * @param testPercent
139             *            Percentage of cases used as queries
140             */
141            public static void splitCaseBase(Collection<CBRCase> wholeCaseBase,
142                    List<CBRCase> querySet, List<CBRCase> casebaseSet, int testPercent) 
143            {       querySet.clear();
144                    casebaseSet.clear();
145    
146                    int querySetSize = (wholeCaseBase.size() * testPercent) / 100;
147                    casebaseSet.addAll(wholeCaseBase);
148    
149                    for (int i = 0; i < querySetSize; i++) 
150                    {       int random = (int) (Math.random() * casebaseSet.size());
151                            CBRCase _case = casebaseSet.get(random);
152                            casebaseSet.remove(random);
153                            querySet.add(_case);
154                    }
155            }
156    }