001    /**
002     * Restaurant7.java
003     * jCOLIBRI2 framework. 
004     * @author Juan A. Recio-García.
005     * GAIA - Group for Artificial Intelligence Applications
006     * http://gaia.fdi.ucm.es
007     * 02/11/2007
008     */
009    package jcolibri.test.recommenders.rec7;
010    
011    import java.util.Collection;
012    
013    import jcolibri.casebase.LinealCaseBase;
014    import jcolibri.cbraplications.StandardCBRApplication;
015    import jcolibri.cbrcore.Attribute;
016    import jcolibri.cbrcore.CBRCase;
017    import jcolibri.cbrcore.CBRCaseBase;
018    import jcolibri.cbrcore.CBRQuery;
019    import jcolibri.cbrcore.Connector;
020    import jcolibri.exception.ExecutionException;
021    import jcolibri.extensions.recommendation.ContentBasedProfile.ObtainQueryFromProfile;
022    import jcolibri.extensions.textual.IE.common.BasicInformationExtractor;
023    import jcolibri.extensions.textual.IE.common.DomainTopicClassifier;
024    import jcolibri.extensions.textual.IE.common.FeaturesExtractor;
025    import jcolibri.extensions.textual.IE.common.GlossaryLinker;
026    import jcolibri.extensions.textual.IE.common.PhrasesExtractor;
027    import jcolibri.extensions.textual.IE.common.StopWordsDetector;
028    import jcolibri.extensions.textual.IE.common.TextStemmer;
029    import jcolibri.extensions.textual.IE.common.ThesaurusLinker;
030    import jcolibri.extensions.textual.IE.opennlp.OpennlpMainNamesExtractor;
031    import jcolibri.extensions.textual.IE.opennlp.OpennlpPOStagger;
032    import jcolibri.extensions.textual.IE.opennlp.OpennlpSplitter;
033    import jcolibri.method.retrieve.RetrievalResult;
034    import jcolibri.method.retrieve.NNretrieval.NNConfig;
035    import jcolibri.method.retrieve.NNretrieval.NNScoringMethod;
036    import jcolibri.method.retrieve.NNretrieval.similarity.global.Average;
037    import jcolibri.method.retrieve.NNretrieval.similarity.local.Equal;
038    import jcolibri.method.retrieve.NNretrieval.similarity.local.textual.OverlapCoefficient;
039    import jcolibri.method.retrieve.selection.SelectCases;
040    import jcolibri.test.main.SwingProgressBar;
041    import jcolibri.test.test13.gui.ResultFrame;
042    import jcolibri.test.test13.similarity.AverageMultipleTextValues;
043    import jcolibri.test.test13.similarity.TokensContained;
044    
045    /**
046     * Single-Shot restaurants recommender using profiles, Nearest Neighbour retrieval and top k selection .
047     * <br>
048     * This is the typical recommender that obtains the user preferences from a profile, 
049     * then computes Nearest Neigbour retrieval + top K selection, displays the retrieved
050     * items and finishes.
051     * <br>Summary:
052     * <ul>
053     * <li>Type: Single-Shot
054     * <li>Case base: restaurants
055     * <li>One off Preference Elicitation: Profile
056     * <li>Retrieval:  Nearest Neighbour+ selectTopK
057     * <li>Display: Custom window.
058     * </ul>
059     * This recommender implements the following template:<br>
060     * <center><img src="../Template7_Cycle.jpg"/></center>
061     * 
062     * <br>Read the documentation of the recommenders extension for details about templates
063     * and recommender strategies: {@link jcolibri.extensions.recommendation}
064     * 
065     * @see jcolibri.extensions.recommendation.ContentBasedProfile.ObtainQueryFromProfile
066     * @see jcolibri.method.retrieve.NNretrieval.NNScoringMethod
067     * @see jcolibri.method.retrieve.selection.SelectCases
068     * 
069     * @author Juan A. Recio-Garcia
070     * @author Developed at University College Cork (Ireland) in collaboration with Derek Bridge.
071     * @version 1.0
072     *
073     */
074    public class Restaurant7 implements StandardCBRApplication
075    {
076        Connector _connector;
077        CBRCaseBase _caseBase;
078    
079        
080        /*
081         * (non-Javadoc)
082         * 
083         * @see jcolibri.cbraplications.BasicCBRApplication#configure()
084         */
085        public void configure() throws ExecutionException
086        {
087            try
088            {
089                //Use a custom connector
090                _connector = new RestaurantsConnector("jcolibri/test/test13/restaurants-large-v2.txt");
091                _caseBase = new LinealCaseBase();
092                
093                //To show the progress
094                jcolibri.util.ProgressController.clear();
095                SwingProgressBar pb = new SwingProgressBar();
096                jcolibri.util.ProgressController.register(pb);   
097            } catch (Exception e)
098            {
099                throw new ExecutionException(e);
100            }
101        }
102    
103        /*
104         * (non-Javadoc)
105         * 
106         * @see jcolibri.cbraplications.StandardCBRApplication#preCycle()
107         */
108        public CBRCaseBase preCycle() throws ExecutionException
109        {
110            //In the precycle we pre-compute the information extraction in the case base
111            
112            //Initialize Wordnet
113            ThesaurusLinker.loadWordNet();
114            //Load user-specific glossary
115            GlossaryLinker.loadGlossary("jcolibri/test/test13/glossary.txt");
116            //Load phrases rules
117            PhrasesExtractor.loadRules("jcolibri/test/test13/phrasesRules.txt");
118            //Load features rules
119            FeaturesExtractor.loadRules("jcolibri/test/test13/featuresRules.txt");
120            //Load topic rules
121            DomainTopicClassifier.loadRules("jcolibri/test/test13/domainRules.txt");
122            
123            //Obtain cases
124            _caseBase.init(_connector);
125            Collection<CBRCase> cases = _caseBase.getCases();
126    
127            //Perform IE methods in the cases
128            
129            //Organize cases into paragraphs, sentences and tokens
130            OpennlpSplitter.split(cases);
131            //Detect stopwords
132            StopWordsDetector.detectStopWords(cases);
133            //Stem text
134            TextStemmer.stem(cases);
135            //Perform POS tagging
136            OpennlpPOStagger.tag(cases);
137            //Extract main names
138            OpennlpMainNamesExtractor.extractMainNames(cases);
139            //Extract phrases
140            PhrasesExtractor.extractPhrases(cases);
141            //Extract features
142            FeaturesExtractor.extractFeatures(cases);
143            //Classify with a topic
144            DomainTopicClassifier.classifyWithTopic(cases);
145            //Perform IE copying extracted features or phrases into other attributes of the case
146            BasicInformationExtractor.extractInformation(cases);
147            
148            return _caseBase;
149        }
150    
151        /*
152         * (non-Javadoc)
153         * 
154         * @see jcolibri.cbraplications.StandardCBRApplication#cycle(jcolibri.cbrcore.CBRQuery)
155         */
156        public void cycle(CBRQuery query) throws ExecutionException
157        {
158            query = ObtainQueryFromProfile.obtainQueryFromProfile( "src/jcolibri/test/recommenders/rec7/profile.xml");
159            
160            Collection<CBRCase> cases = _caseBase.getCases();
161            
162            //Perform IE methods in the cases
163            
164            //Organize the query into paragraphs, sentences and tokens
165            OpennlpSplitter.split(query);
166            //Detect stopwords
167            StopWordsDetector.detectStopWords(query);
168            //Stem query
169            TextStemmer.stem(query);
170            //Perform POS tagging in the query
171            OpennlpPOStagger.tag(query);
172            //Extract main names
173            OpennlpMainNamesExtractor.extractMainNames(query);
174            
175            //Now that we have the query we relate cases tokens with the query tokens
176            //Using the user-defined glossary
177            GlossaryLinker.LinkWithGlossary(cases, query);
178            //Using wordnet
179            ThesaurusLinker.linkWithWordNet(cases, query);
180            
181            //Extract phrases
182            PhrasesExtractor.extractPhrases(query);
183            //Extract features
184            FeaturesExtractor.extractFeatures(query);
185            //Classify with a topic
186            DomainTopicClassifier.classifyWithTopic(query);
187            //Perform IE copying extracted features or phrases into other attributes of the query
188            BasicInformationExtractor.extractInformation(query);
189            
190            //Now we configure the KNN method with some user-defined similarity measures
191            NNConfig knnConfig = new NNConfig();
192            knnConfig.setDescriptionSimFunction(new Average());
193            
194            knnConfig.addMapping(new Attribute("location", RestaurantDescription.class), new Equal());
195            
196            //To compare text we use the OverlapCofficient
197            knnConfig.addMapping(new Attribute("description", RestaurantDescription.class), new OverlapCoefficient());
198            //This function takes a string with several numerical values and computes the average
199            knnConfig.addMapping(new Attribute("price", RestaurantDescription.class), new AverageMultipleTextValues(1000));
200            //This function takes a string with several words separated by whitespaces, converts it to a set of tokens and
201            //computes the size of the intersecction of the query set and the case set normalized with the case set
202            knnConfig.addMapping(new Attribute("foodType", RestaurantDescription.class), new TokensContained());
203            knnConfig.addMapping(new Attribute("food", RestaurantDescription.class), new TokensContained());
204            knnConfig.addMapping(new Attribute("alcohol", RestaurantDescription.class), new Equal());
205            knnConfig.addMapping(new Attribute("takeout", RestaurantDescription.class), new Equal());
206            knnConfig.addMapping(new Attribute("delivery", RestaurantDescription.class), new Equal());
207            knnConfig.addMapping(new Attribute("parking", RestaurantDescription.class), new Equal());
208            knnConfig.addMapping(new Attribute("catering", RestaurantDescription.class), new Equal());
209            
210            System.out.println("RESULT:");
211            
212            Collection<RetrievalResult> res = NNScoringMethod.evaluateSimilarity(cases, query, knnConfig);
213            res = SelectCases.selectTopKRR(res, 5);
214            
215            for(RetrievalResult rr: res)
216                System.out.println(rr);
217            
218            //Show the result
219            RestaurantDescription qrd = (RestaurantDescription)query.getDescription();
220            CBRCase mostSimilar = res.iterator().next().get_case();
221            RestaurantDescription rrd = (RestaurantDescription)mostSimilar.getDescription();
222            new ResultFrame(qrd.getDescription().toString(), rrd.getName(), rrd.getAddress(), rrd.getDescription().toString());
223            
224            
225        }
226    
227        /*
228         * (non-Javadoc)
229         * 
230         * @see jcolibri.cbraplications.StandardCBRApplication#postCycle()
231         */
232        public void postCycle() throws ExecutionException
233        {
234            jcolibri.extensions.textual.wordnet.WordNetBridge.deInit();
235            _connector.close();
236    
237        }
238        
239        public static void main(String[] args)
240        {
241            Restaurant7 test = new Restaurant7();
242            try
243            {
244                test.configure();
245                
246                CBRCaseBase caseBase = test.preCycle();
247               
248                System.out.println("CASE BASE: ");
249                for(CBRCase c: caseBase.getCases())
250                    System.out.println(c);
251                System.out.println("Total: "+caseBase.getCases().size()+" cases");
252    
253                
254                test.cycle(null);
255    
256                test.postCycle();
257                
258            } catch (ExecutionException e)
259            {
260                org.apache.commons.logging.LogFactory.getLog(Restaurant7.class).error(e);
261            }
262        }
263    
264    }