001 /** 002 * Restaurant7.java 003 * jCOLIBRI2 framework. 004 * @author Juan A. Recio-García. 005 * GAIA - Group for Artificial Intelligence Applications 006 * http://gaia.fdi.ucm.es 007 * 02/11/2007 008 */ 009 package jcolibri.test.recommenders.rec7; 010 011 import java.util.Collection; 012 013 import jcolibri.casebase.LinealCaseBase; 014 import jcolibri.cbraplications.StandardCBRApplication; 015 import jcolibri.cbrcore.Attribute; 016 import jcolibri.cbrcore.CBRCase; 017 import jcolibri.cbrcore.CBRCaseBase; 018 import jcolibri.cbrcore.CBRQuery; 019 import jcolibri.cbrcore.Connector; 020 import jcolibri.exception.ExecutionException; 021 import jcolibri.extensions.recommendation.ContentBasedProfile.ObtainQueryFromProfile; 022 import jcolibri.extensions.textual.IE.common.BasicInformationExtractor; 023 import jcolibri.extensions.textual.IE.common.DomainTopicClassifier; 024 import jcolibri.extensions.textual.IE.common.FeaturesExtractor; 025 import jcolibri.extensions.textual.IE.common.GlossaryLinker; 026 import jcolibri.extensions.textual.IE.common.PhrasesExtractor; 027 import jcolibri.extensions.textual.IE.common.StopWordsDetector; 028 import jcolibri.extensions.textual.IE.common.TextStemmer; 029 import jcolibri.extensions.textual.IE.common.ThesaurusLinker; 030 import jcolibri.extensions.textual.IE.opennlp.OpennlpMainNamesExtractor; 031 import jcolibri.extensions.textual.IE.opennlp.OpennlpPOStagger; 032 import jcolibri.extensions.textual.IE.opennlp.OpennlpSplitter; 033 import jcolibri.method.retrieve.RetrievalResult; 034 import jcolibri.method.retrieve.NNretrieval.NNConfig; 035 import jcolibri.method.retrieve.NNretrieval.NNScoringMethod; 036 import jcolibri.method.retrieve.NNretrieval.similarity.global.Average; 037 import jcolibri.method.retrieve.NNretrieval.similarity.local.Equal; 038 import jcolibri.method.retrieve.NNretrieval.similarity.local.textual.OverlapCoefficient; 039 import jcolibri.method.retrieve.selection.SelectCases; 040 import jcolibri.test.main.SwingProgressBar; 041 import jcolibri.test.test13.gui.ResultFrame; 042 import jcolibri.test.test13.similarity.AverageMultipleTextValues; 043 import jcolibri.test.test13.similarity.TokensContained; 044 045 /** 046 * Single-Shot restaurants recommender using profiles, Nearest Neighbour retrieval and top k selection . 047 * <br> 048 * This is the typical recommender that obtains the user preferences from a profile, 049 * then computes Nearest Neigbour retrieval + top K selection, displays the retrieved 050 * items and finishes. 051 * <br>Summary: 052 * <ul> 053 * <li>Type: Single-Shot 054 * <li>Case base: restaurants 055 * <li>One off Preference Elicitation: Profile 056 * <li>Retrieval: Nearest Neighbour+ selectTopK 057 * <li>Display: Custom window. 058 * </ul> 059 * This recommender implements the following template:<br> 060 * <center><img src="../Template7_Cycle.jpg"/></center> 061 * 062 * <br>Read the documentation of the recommenders extension for details about templates 063 * and recommender strategies: {@link jcolibri.extensions.recommendation} 064 * 065 * @see jcolibri.extensions.recommendation.ContentBasedProfile.ObtainQueryFromProfile 066 * @see jcolibri.method.retrieve.NNretrieval.NNScoringMethod 067 * @see jcolibri.method.retrieve.selection.SelectCases 068 * 069 * @author Juan A. Recio-Garcia 070 * @author Developed at University College Cork (Ireland) in collaboration with Derek Bridge. 071 * @version 1.0 072 * 073 */ 074 public class Restaurant7 implements StandardCBRApplication 075 { 076 Connector _connector; 077 CBRCaseBase _caseBase; 078 079 080 /* 081 * (non-Javadoc) 082 * 083 * @see jcolibri.cbraplications.BasicCBRApplication#configure() 084 */ 085 public void configure() throws ExecutionException 086 { 087 try 088 { 089 //Use a custom connector 090 _connector = new RestaurantsConnector("jcolibri/test/test13/restaurants-large-v2.txt"); 091 _caseBase = new LinealCaseBase(); 092 093 //To show the progress 094 jcolibri.util.ProgressController.clear(); 095 SwingProgressBar pb = new SwingProgressBar(); 096 jcolibri.util.ProgressController.register(pb); 097 } catch (Exception e) 098 { 099 throw new ExecutionException(e); 100 } 101 } 102 103 /* 104 * (non-Javadoc) 105 * 106 * @see jcolibri.cbraplications.StandardCBRApplication#preCycle() 107 */ 108 public CBRCaseBase preCycle() throws ExecutionException 109 { 110 //In the precycle we pre-compute the information extraction in the case base 111 112 //Initialize Wordnet 113 ThesaurusLinker.loadWordNet(); 114 //Load user-specific glossary 115 GlossaryLinker.loadGlossary("jcolibri/test/test13/glossary.txt"); 116 //Load phrases rules 117 PhrasesExtractor.loadRules("jcolibri/test/test13/phrasesRules.txt"); 118 //Load features rules 119 FeaturesExtractor.loadRules("jcolibri/test/test13/featuresRules.txt"); 120 //Load topic rules 121 DomainTopicClassifier.loadRules("jcolibri/test/test13/domainRules.txt"); 122 123 //Obtain cases 124 _caseBase.init(_connector); 125 Collection<CBRCase> cases = _caseBase.getCases(); 126 127 //Perform IE methods in the cases 128 129 //Organize cases into paragraphs, sentences and tokens 130 OpennlpSplitter.split(cases); 131 //Detect stopwords 132 StopWordsDetector.detectStopWords(cases); 133 //Stem text 134 TextStemmer.stem(cases); 135 //Perform POS tagging 136 OpennlpPOStagger.tag(cases); 137 //Extract main names 138 OpennlpMainNamesExtractor.extractMainNames(cases); 139 //Extract phrases 140 PhrasesExtractor.extractPhrases(cases); 141 //Extract features 142 FeaturesExtractor.extractFeatures(cases); 143 //Classify with a topic 144 DomainTopicClassifier.classifyWithTopic(cases); 145 //Perform IE copying extracted features or phrases into other attributes of the case 146 BasicInformationExtractor.extractInformation(cases); 147 148 return _caseBase; 149 } 150 151 /* 152 * (non-Javadoc) 153 * 154 * @see jcolibri.cbraplications.StandardCBRApplication#cycle(jcolibri.cbrcore.CBRQuery) 155 */ 156 public void cycle(CBRQuery query) throws ExecutionException 157 { 158 query = ObtainQueryFromProfile.obtainQueryFromProfile( "src/jcolibri/test/recommenders/rec7/profile.xml"); 159 160 Collection<CBRCase> cases = _caseBase.getCases(); 161 162 //Perform IE methods in the cases 163 164 //Organize the query into paragraphs, sentences and tokens 165 OpennlpSplitter.split(query); 166 //Detect stopwords 167 StopWordsDetector.detectStopWords(query); 168 //Stem query 169 TextStemmer.stem(query); 170 //Perform POS tagging in the query 171 OpennlpPOStagger.tag(query); 172 //Extract main names 173 OpennlpMainNamesExtractor.extractMainNames(query); 174 175 //Now that we have the query we relate cases tokens with the query tokens 176 //Using the user-defined glossary 177 GlossaryLinker.LinkWithGlossary(cases, query); 178 //Using wordnet 179 ThesaurusLinker.linkWithWordNet(cases, query); 180 181 //Extract phrases 182 PhrasesExtractor.extractPhrases(query); 183 //Extract features 184 FeaturesExtractor.extractFeatures(query); 185 //Classify with a topic 186 DomainTopicClassifier.classifyWithTopic(query); 187 //Perform IE copying extracted features or phrases into other attributes of the query 188 BasicInformationExtractor.extractInformation(query); 189 190 //Now we configure the KNN method with some user-defined similarity measures 191 NNConfig knnConfig = new NNConfig(); 192 knnConfig.setDescriptionSimFunction(new Average()); 193 194 knnConfig.addMapping(new Attribute("location", RestaurantDescription.class), new Equal()); 195 196 //To compare text we use the OverlapCofficient 197 knnConfig.addMapping(new Attribute("description", RestaurantDescription.class), new OverlapCoefficient()); 198 //This function takes a string with several numerical values and computes the average 199 knnConfig.addMapping(new Attribute("price", RestaurantDescription.class), new AverageMultipleTextValues(1000)); 200 //This function takes a string with several words separated by whitespaces, converts it to a set of tokens and 201 //computes the size of the intersecction of the query set and the case set normalized with the case set 202 knnConfig.addMapping(new Attribute("foodType", RestaurantDescription.class), new TokensContained()); 203 knnConfig.addMapping(new Attribute("food", RestaurantDescription.class), new TokensContained()); 204 knnConfig.addMapping(new Attribute("alcohol", RestaurantDescription.class), new Equal()); 205 knnConfig.addMapping(new Attribute("takeout", RestaurantDescription.class), new Equal()); 206 knnConfig.addMapping(new Attribute("delivery", RestaurantDescription.class), new Equal()); 207 knnConfig.addMapping(new Attribute("parking", RestaurantDescription.class), new Equal()); 208 knnConfig.addMapping(new Attribute("catering", RestaurantDescription.class), new Equal()); 209 210 System.out.println("RESULT:"); 211 212 Collection<RetrievalResult> res = NNScoringMethod.evaluateSimilarity(cases, query, knnConfig); 213 res = SelectCases.selectTopKRR(res, 5); 214 215 for(RetrievalResult rr: res) 216 System.out.println(rr); 217 218 //Show the result 219 RestaurantDescription qrd = (RestaurantDescription)query.getDescription(); 220 CBRCase mostSimilar = res.iterator().next().get_case(); 221 RestaurantDescription rrd = (RestaurantDescription)mostSimilar.getDescription(); 222 new ResultFrame(qrd.getDescription().toString(), rrd.getName(), rrd.getAddress(), rrd.getDescription().toString()); 223 224 225 } 226 227 /* 228 * (non-Javadoc) 229 * 230 * @see jcolibri.cbraplications.StandardCBRApplication#postCycle() 231 */ 232 public void postCycle() throws ExecutionException 233 { 234 jcolibri.extensions.textual.wordnet.WordNetBridge.deInit(); 235 _connector.close(); 236 237 } 238 239 public static void main(String[] args) 240 { 241 Restaurant7 test = new Restaurant7(); 242 try 243 { 244 test.configure(); 245 246 CBRCaseBase caseBase = test.preCycle(); 247 248 System.out.println("CASE BASE: "); 249 for(CBRCase c: caseBase.getCases()) 250 System.out.println(c); 251 System.out.println("Total: "+caseBase.getCases().size()+" cases"); 252 253 254 test.cycle(null); 255 256 test.postCycle(); 257 258 } catch (ExecutionException e) 259 { 260 org.apache.commons.logging.LogFactory.getLog(Restaurant7.class).error(e); 261 } 262 } 263 264 }