001    package jcolibri.extensions.maintenance_evaluation.evaluators;
002    
003    import java.io.BufferedReader;
004    import java.io.FileReader;
005    import java.util.ArrayList;
006    import java.util.Collection;
007    import java.util.Date;
008    import java.util.Iterator;
009    import java.util.List;
010    
011    import jcolibri.cbrcore.CBRCase;
012    import jcolibri.cbrcore.CBRCaseBase;
013    import jcolibri.extensions.maintenance_evaluation.MaintenanceEvaluator;
014    import jcolibri.util.FileIO;
015    
016    import org.apache.commons.logging.LogFactory;
017    
018    /**
019     * This evaluation splits the case base in two sets: one used for testing where each
020     * case is used as query, and another that acts as normal case base.
021     * It uses queries from a file so that the evaluation can be repeated with the 
022     * same test/training set split.
023     * The generateSplit() method does the initial random split and saves the query set in a file. 
024     * Later, the  HoldOutfromFile() method uses that file to load the query set and 
025     * perform the evaluation.
026     * 
027     * @author Juan A. Recio García & Lisa Cummins
028     */
029    public class MaintenanceSameSplitEvaluator extends MaintenanceEvaluator {
030    
031            /**
032             * Perform HoldOut evaluation using the queries contained in the specified file.
033             * @param file the file containing the queries.
034             */
035            public void HoldOutfromFile(String file) 
036            {       try 
037                    {       // Obtain the time
038                            long t = (new Date()).getTime();
039                            int numberOfCycles = 0;
040    
041                            // Run the precycle to load the case base
042                            LogFactory.getLog(this.getClass()).info("Running precycle()");
043                            CBRCaseBase caseBase = app.preCycle();
044    
045                            if (!(caseBase instanceof jcolibri.casebase.CachedLinealCaseBase))
046                                    LogFactory
047                                                    .getLog(this.getClass())
048                                                    .warn(
049                                                                    "Evaluation should be executed using a cached case base");
050    
051                            ArrayList<CBRCase> originalCases = new ArrayList<CBRCase>(caseBase.getCases());
052                            ArrayList<CBRCase> querySet = new ArrayList<CBRCase>();
053    
054                            prepareCases(originalCases, querySet, file, caseBase);
055                            
056                            int totalSteps = querySet.size();
057                            jcolibri.util.ProgressController.init(getClass(),
058                                            "Same Split - Hold Out Evaluation", totalSteps);
059                            
060                            // Run cycle for each case in querySet
061                            for (CBRCase c : querySet) {
062                                    // Run the cycle
063                                    LogFactory.getLog(this.getClass()).info(
064                                                    "Running cycle() " + numberOfCycles);
065    
066                            //      report.storeQueryNum();
067    
068                                    app.cycle(c);
069    
070                                    jcolibri.util.ProgressController.step(getClass());
071                                    numberOfCycles++;
072                            }
073    
074                            jcolibri.util.ProgressController.finish(getClass());
075    
076                            // Revert case base to original state
077                            caseBase.forgetCases(originalCases);
078                            caseBase.learnCases(originalCases);
079    
080                            // Run the poscycle to finish the application
081                            LogFactory.getLog(this.getClass()).info("Running postcycle()");
082                            app.postCycle();
083    
084                            t = (new Date()).getTime() - t;
085    
086                            // Obtain and complete the evaluation result
087                            report.setTotalTime(t);
088                            report.setNumberOfCycles(numberOfCycles);
089    
090                    } catch (Exception e) 
091                    {       LogFactory.getLog(this.getClass()).error(e);
092                    }
093            }
094            
095            
096            /**
097             * Prepares the cases for evaluation by setting up test and training sets
098             * @param originalCases Complete original set of cases
099             * @param querySet Where queries are to be stored
100             * @param caseBase The case base
101             */
102            protected void prepareCases(Collection<CBRCase> originalCases, List<CBRCase> querySet, 
103                    String file, CBRCaseBase caseBase)
104            {       
105                    ArrayList<CBRCase> caseBaseSet = new ArrayList<CBRCase>();
106    
107                // Split the case base
108                    splitCaseBaseFromFile(originalCases, querySet, caseBaseSet, file);
109    
110                    // Clear the caseBase
111                    caseBase.forgetCases(originalCases);
112    
113                    // Set the cases that acts as case base in this repetition
114                    caseBase.learnCases(caseBaseSet);
115                    
116                    if(this.simConfig != null && this.editMethod != null)
117                    {       // Perform maintenance on this case base
118                            editCaseBase(caseBase);
119                    }
120            }
121            
122            /**
123             * Splits the case base in two sets: queries and case base, with the
124             * queries contained in the given file
125             * 
126             * @param wholeCaseBase
127             *            Complete original case base
128             * @param querySet
129             *            Output param where queries are stored
130             * @param casebaseSet
131             *            Output param where case base is stored
132             * @param filename
133             *            File which contains the queries
134             */
135            public static void splitCaseBaseFromFile(Collection<CBRCase> wholeCaseBase,
136                    List<CBRCase> querySet, List<CBRCase> casebaseSet, String filename)
137            {       querySet.clear();
138                    casebaseSet.clear();
139                    
140                    casebaseSet.addAll(wholeCaseBase);
141                    
142                    try 
143                    {       BufferedReader br = null;
144                            br = new BufferedReader(new FileReader(FileIO.findFile(filename).getFile()));
145                            if (br == null)
146                                    throw new Exception("Error opening file: " + filename);
147            
148                            String line = "";
149                            while ((line = br.readLine()) != null) 
150                            {       CBRCase c = null;
151                                    int pos=0;
152                                    boolean found = false;
153                                    for(Iterator<CBRCase> iter = casebaseSet.iterator(); iter.hasNext() && (!found); )
154                                    {       c = iter.next();
155                                            if(c.getID().toString().equals(line))
156                                                    found = true;
157                                            else
158                                                    pos++;
159                                    }
160                                    if(c==null)
161                                    {       System.out.println("Case "+line+" not found into case base");
162                                            continue;
163                                    }
164                                    
165                                    casebaseSet.remove(pos);
166                                    querySet.add(c);
167                            }
168                            br.close();
169                    } catch (Exception e)
170                    {       System.out.println(e);
171                    }
172            }
173    }