001    /**
002     * SameSplitEvaluator.java
003     * jCOLIBRI2 framework. 
004     * @author Juan A. Recio-García.
005     * GAIA - Group for Artificial Intelligence Applications
006     * http://gaia.fdi.ucm.es
007     * 07/05/2007
008     */
009    package jcolibri.evaluation.evaluators;
010    
011    import java.io.BufferedReader;
012    import java.io.BufferedWriter;
013    import java.io.FileReader;
014    import java.io.FileWriter;
015    import java.util.ArrayList;
016    import java.util.Collection;
017    import java.util.Date;
018    import java.util.Iterator;
019    import java.util.List;
020    
021    import jcolibri.cbraplications.StandardCBRApplication;
022    import jcolibri.cbrcore.CBRCase;
023    import jcolibri.cbrcore.CBRCaseBase;
024    import jcolibri.evaluation.EvaluationReport;
025    import jcolibri.evaluation.Evaluator;
026    import jcolibri.exception.ExecutionException;
027    import jcolibri.util.FileIO;
028    
029    import org.apache.commons.logging.LogFactory;
030    
031    /**
032     * This method splits the case base in two sets: one used for testing where each
033     * case is used as query, and another that acts as normal case base.
034     * <br>
035     * This method is different of the other evaluators beacuse the split is stored in a file that can be used in following evaluations.
036     * This way, the same set is used as queries for each evaluation.
037     * <br>
038     * The generateSplit() method does the initial random split and saves the query set in a file. 
039     * Later, the  HoldOutfromFile() method uses that file to load the queries set and perform the evaluation.
040     * 
041     * @author Juan A. Recio García & Lisa Cummins
042     * @version 2.0
043     */
044    
045    public class SameSplitEvaluator extends Evaluator {
046    
047            protected StandardCBRApplication app;
048    
049            public void init(StandardCBRApplication cbrApp) {
050    
051                    report = new EvaluationReport();
052                    app = cbrApp;
053                    try {
054                            app.configure();
055                    } catch (ExecutionException e) {
056                            LogFactory.getLog(this.getClass()).error(e);
057                    }
058            }
059    
060            public void generateSplit(int testPercent,  String filename)
061            {
062                    //Run the precycle to load the case base
063                    LogFactory.getLog(this.getClass()).info("Running precycle()");
064                    CBRCaseBase caseBase = null;
065                    try {
066                            caseBase = app.preCycle();
067                    } catch (ExecutionException e) {
068                            org.apache.commons.logging.LogFactory.getLog(this.getClass()).error(e);
069                    }
070    
071                    if (!(caseBase instanceof jcolibri.casebase.CachedLinealCaseBase))
072                            LogFactory
073                                            .getLog(this.getClass())
074                                            .warn(
075                                                            "Evaluation should be executed using a cached case base");
076    
077                    ArrayList<CBRCase> originalCases = new ArrayList<CBRCase>(
078                                    caseBase.getCases());
079                                    
080                    ArrayList<CBRCase> querySet = new ArrayList<CBRCase>();
081                    ArrayList<CBRCase> caseBaseSet = new ArrayList<CBRCase>();
082                            
083                    // Split the case base
084                    splitCaseBase(originalCases, querySet, caseBaseSet, testPercent);
085                    
086                    save(querySet, filename);
087            }
088            
089            protected void save(Collection<CBRCase> queries, String filename)
090            {
091                    try {
092                            BufferedWriter br = null;
093                            br = new BufferedWriter(new FileWriter(filename));
094                            if (br == null)
095                                    throw new Exception("Error opening file for writing: "+ filename);
096                            
097                            for (CBRCase _case : queries) {
098                                    br.write(_case.getID().toString());
099                                    br.newLine();
100                            }
101                            br.close();
102                    } catch (Exception e) {
103                            org.apache.commons.logging.LogFactory.getLog(this.getClass()).error(e);
104                    }
105                            
106    
107            }
108            
109            public void HoldOutfromFile(String file) {
110                    try {
111                            // Obtain the time
112                            long t = (new Date()).getTime();
113                            int numberOfCycles = 0;
114    
115                            // Run the precycle to load the case base
116                            LogFactory.getLog(this.getClass()).info("Running precycle()");
117                            CBRCaseBase caseBase = app.preCycle();
118    
119                            if (!(caseBase instanceof jcolibri.casebase.CachedLinealCaseBase))
120                                    LogFactory
121                                                    .getLog(this.getClass())
122                                                    .warn(
123                                                                    "Evaluation should be executed using a cached case base");
124    
125                            ArrayList<CBRCase> originalCases = new ArrayList<CBRCase>(caseBase.getCases());
126                            ArrayList<CBRCase> querySet = new ArrayList<CBRCase>();
127                            ArrayList<CBRCase> caseBaseSet = new ArrayList<CBRCase>();
128                            
129                            // Split the case base
130                            splitCaseBaseFromFile(originalCases, querySet, caseBaseSet, file);
131                            
132                            int totalSteps = querySet.size();
133                            jcolibri.util.ProgressController.init(getClass(),
134                                            "Same Split - Hold Out Evaluation", totalSteps);
135    
136    
137                            // Clear the caseBase
138                            caseBase.forgetCases(originalCases);
139    
140                            // Set the cases that acts as case base in this repetition
141                            caseBase.learnCases(caseBaseSet);
142    
143                            // Run cycle for each case in querySet
144                            for (CBRCase c : querySet) {
145                                    // Run the cycle
146                                    LogFactory.getLog(this.getClass()).info(
147                                                    "Running cycle() " + numberOfCycles);
148                                    app.cycle(c);
149    
150                                    jcolibri.util.ProgressController.step(getClass());
151                                    numberOfCycles++;
152                            }
153    
154                            jcolibri.util.ProgressController.finish(getClass());
155    
156                            // Revert case base to original state
157                            caseBase.forgetCases(originalCases);
158                            caseBase.learnCases(originalCases);
159    
160                            // Run the poscycle to finish the application
161                            LogFactory.getLog(this.getClass()).info("Running postcycle()");
162                            app.postCycle();
163    
164                            t = (new Date()).getTime() - t;
165    
166                            // Obtain and complete the evaluation result
167                            report.setTotalTime(t);
168                            report.setNumberOfCycles(numberOfCycles);
169    
170                    } catch (Exception e) {
171                            LogFactory.getLog(this.getClass()).error(e);
172                    }
173    
174            }
175    
176            /**
177             * Splits the case base in two sets: queries and case base
178             * 
179             * @param wholeCaseBase
180             *            Complete original case base
181             * @param querySet
182             *            Output param where queries are stored
183             * @param casebaseSet
184             *            Output param where case base is stored
185             * @param testPercent
186             *            Percentage of cases used as queries
187             */
188            protected void splitCaseBase(Collection<CBRCase> wholeCaseBase,
189                            List<CBRCase> querySet, List<CBRCase> casebaseSet, int testPercent) {
190                    querySet.clear();
191                    casebaseSet.clear();
192    
193                    int querySetSize = (wholeCaseBase.size() * testPercent) / 100;
194                    casebaseSet.addAll(wholeCaseBase);
195    
196                    for (int i = 0; i < querySetSize; i++) {
197                            int random = (int) (Math.random() * casebaseSet.size());
198                            CBRCase _case = casebaseSet.get(random);
199                            casebaseSet.remove(random);
200                            querySet.add(_case);
201                    }
202            }
203            
204            protected void splitCaseBaseFromFile(Collection<CBRCase> wholeCaseBase,
205                            List<CBRCase> querySet, List<CBRCase> casebaseSet, String filename)
206            {
207                    querySet.clear();
208                    casebaseSet.clear();
209                    
210                    casebaseSet.addAll(wholeCaseBase);
211                    
212                    try {
213                            BufferedReader br = null;
214                            br = new BufferedReader(new FileReader(FileIO.findFile(filename).getFile()));
215                            if (br == null)
216                                    throw new Exception("Error opening file: " + filename);
217    
218                            String line = "";
219                            while ((line = br.readLine()) != null) {
220                                    CBRCase c = null;
221                                    int pos=0;
222                                    boolean found = false;
223                                    for(Iterator<CBRCase> iter = casebaseSet.iterator(); iter.hasNext() && (!found); )
224                                    {
225                                            c = iter.next();
226                                            if(c.getID().toString().equals(line))
227                                                    found = true;
228                                            else
229                                                    pos++;
230                                    }
231                                    if(c==null)
232                                    {
233                                            org.apache.commons.logging.LogFactory.getLog(this.getClass()).error("Case "+line+" not found into case base");
234                                            continue;
235                                    }
236                                    
237                                    casebaseSet.remove(pos);
238                                    querySet.add(c);
239                            }
240                            br.close();
241                    } catch (Exception e) {
242                            org.apache.commons.logging.LogFactory.getLog(this.getClass()).error(e);
243                    }
244            }
245    
246    }