001    package jcolibri.extensions.maintenance_evaluation.evaluators;
002    
003    import java.util.ArrayList;
004    import java.util.Collection;
005    import java.util.Date;
006    import java.util.List;
007    
008    import jcolibri.cbrcore.CBRCase;
009    import jcolibri.cbrcore.CBRCaseBase;
010    import jcolibri.extensions.maintenance_evaluation.MaintenanceEvaluator;
011    
012    import org.apache.commons.logging.LogFactory;
013    
014    /**
015     * This evaluation divides the case base into several random folds 
016     * (indicated by the user). 
017     * For each fold, their cases are used as queries and the remaining folds are 
018     * used together as case base. Maintenance is performed on the case-base before
019     * running the queries.
020     * This process is performed several times.
021     * 
022     * @author Lisa Cummins.
023     * @author Juan A. Recio García - GAIA http://gaia.fdi.ucm.es
024     */
025    public class MaintenanceNFoldEvaluator extends MaintenanceEvaluator
026    {
027        /**
028         * Executes the N-Fold evaluation.
029         * @param numFolds the number of randomly generated folds.
030         * @param repetitions the number of repetitions
031         */
032        public void NFoldEvaluation(int numFolds, int repetitions)
033        {   try
034            {   //Get the time
035                long t = (new Date()).getTime();
036                int numberOfCycles = 0;
037    
038                // Run the precycle to load the case base
039                LogFactory.getLog(this.getClass()).info("Running precycle()");
040                CBRCaseBase caseBase = app.preCycle();
041    
042                if (!(caseBase instanceof jcolibri.casebase.CachedLinealCaseBase))
043                    LogFactory.getLog(this.getClass()).warn(
044                            "Evaluation should be executed using a cached case base");
045                
046                Collection<CBRCase> cases = new ArrayList<CBRCase>(caseBase.getCases());
047                
048                //For each repetition
049                for(int r=0; r<repetitions; r++)
050                {   //Create the folds
051                    ArrayList<ArrayList<CBRCase>> folds = createFolds(cases, numFolds);
052                    
053                    //For each fold
054                    for(int f=0; f<numFolds; f++)
055                    {   ArrayList<CBRCase> querySet = new ArrayList<CBRCase>();
056                        prepareCases(cases, querySet, f, caseBase, folds);
057                    
058                        //Run cycle for each case in querySet (current fold)
059                        for(CBRCase c: querySet)
060                        {   LogFactory.getLog(this.getClass()).info(
061                                "Running cycle() " + numberOfCycles);
062                            app.cycle(c);
063                            numberOfCycles++;
064                        }          
065                    } 
066                }
067    
068                //Revert case base to original state
069                caseBase.forgetCases(cases);
070                caseBase.learnCases(cases);
071                
072                //Run the poscycle to finish the application
073                LogFactory.getLog(this.getClass()).info("Running postcycle()");
074                app.postCycle();
075    
076                //Complete the evaluation result
077                report.setTotalTime(t);
078                report.setNumberOfCycles(numberOfCycles);
079                
080            } catch (Exception e) 
081            {       LogFactory.getLog(this.getClass()).error(e);
082            }
083    
084        }
085    
086        /**
087         * Prepares the cases for evaluation by setting up test and training sets    
088         * @param originalCases Complete original set of cases
089         * @param querySet Where queries are to be stored
090         * @param fold The fold number
091         * @param caseBase The case base
092         */
093        protected void prepareCases(Collection<CBRCase> originalCases, List<CBRCase> querySet, 
094            int fold, CBRCaseBase caseBase, ArrayList<ArrayList<CBRCase>> folds)
095        {   ArrayList<CBRCase> caseBaseSet = new ArrayList<CBRCase>();
096                    
097            //Obtain the query and casebase sets
098            getFolds(fold, querySet, caseBaseSet, folds);
099                
100            //Clear the caseBase
101            caseBase.forgetCases(originalCases);
102                
103            //Set the cases that acts as casebase in this cycle
104            caseBase.learnCases(caseBaseSet);
105            
106            if(this.simConfig != null && this.editMethod != null)
107            {       // Perform maintenance on this case base
108                    editCaseBase(caseBase);
109            }
110        }
111    
112        /**
113         * Divides the given cases into the given number of folds.
114         * @param cases the original cases.
115         * @param numFolds the number of folds.
116         */
117        protected ArrayList<ArrayList<CBRCase>> createFolds(Collection<CBRCase> cases, int numFolds)
118        {   ArrayList<ArrayList<CBRCase>> folds = new ArrayList<ArrayList<CBRCase>>();
119            int foldsize = cases.size() / numFolds;
120            ArrayList<CBRCase> copy = new ArrayList<CBRCase>(cases);
121            
122            for(int f=0; f<numFolds; f++)
123            {   ArrayList<CBRCase> fold = new ArrayList<CBRCase>();
124                for(int i=0; (i<foldsize)&&(copy.size()>0); i++)
125                {   int random = (int) (Math.random() * copy.size());
126                    CBRCase _case = copy.get( random );
127                    copy.remove(random);
128                    fold.add(_case);
129                }
130                folds.add(fold);
131            }
132            return folds;
133        }
134    
135        /**
136         * Clears the current query and case base sets and populates the query set with fold
137         * f and the case base set with the cases not contained in fold f.
138         * @param f the fold to use.
139         * @param querySet the set of queries.
140         * @param caseBaseSet the set of cases.
141         */
142        public static void getFolds(int f, List<CBRCase> querySet, List<CBRCase> caseBaseSet, ArrayList<ArrayList<CBRCase>> folds)
143        {   querySet.clear();
144            caseBaseSet.clear();
145            
146            querySet.addAll(folds.get(f));
147            
148            for(int i=0; i<folds.size(); i++)
149                if(i!=f)
150                    caseBaseSet.addAll(folds.get(i));
151        }
152    }