001    /**
002     * Test13a.java
003     * jCOLIBRI2 framework. 
004     * @author Juan A. Recio-García.
005     * GAIA - Group for Artificial Intelligence Applications
006     * http://gaia.fdi.ucm.es
007     * 23/06/2007
008     */
009    package jcolibri.test.test13;
010    
011    import java.util.Collection;
012    
013    import jcolibri.casebase.LinealCaseBase;
014    import jcolibri.cbraplications.StandardCBRApplication;
015    import jcolibri.cbrcore.Attribute;
016    import jcolibri.cbrcore.CBRCase;
017    import jcolibri.cbrcore.CBRCaseBase;
018    import jcolibri.cbrcore.CBRQuery;
019    import jcolibri.cbrcore.Connector;
020    import jcolibri.exception.ExecutionException;
021    import jcolibri.extensions.textual.IE.common.BasicInformationExtractor;
022    import jcolibri.extensions.textual.IE.common.DomainTopicClassifier;
023    import jcolibri.extensions.textual.IE.common.FeaturesExtractor;
024    import jcolibri.extensions.textual.IE.common.GlossaryLinker;
025    import jcolibri.extensions.textual.IE.common.PhrasesExtractor;
026    import jcolibri.extensions.textual.IE.common.StopWordsDetector;
027    import jcolibri.extensions.textual.IE.common.TextStemmer;
028    import jcolibri.extensions.textual.IE.common.ThesaurusLinker;
029    import jcolibri.extensions.textual.IE.opennlp.IETextOpenNLP;
030    import jcolibri.extensions.textual.IE.opennlp.OpennlpMainNamesExtractor;
031    import jcolibri.extensions.textual.IE.opennlp.OpennlpPOStagger;
032    import jcolibri.extensions.textual.IE.opennlp.OpennlpSplitter;
033    import jcolibri.method.retrieve.RetrievalResult;
034    import jcolibri.method.retrieve.NNretrieval.NNConfig;
035    import jcolibri.method.retrieve.NNretrieval.NNScoringMethod;
036    import jcolibri.method.retrieve.NNretrieval.similarity.global.Average;
037    import jcolibri.method.retrieve.NNretrieval.similarity.local.Equal;
038    import jcolibri.method.retrieve.NNretrieval.similarity.local.textual.OverlapCoefficient;
039    import jcolibri.method.retrieve.selection.SelectCases;
040    import jcolibri.test.main.SwingProgressBar;
041    import jcolibri.test.test13.connector.RestaurantsConnector;
042    import jcolibri.test.test13.gui.ResultFrame;
043    import jcolibri.test.test13.similarity.AverageMultipleTextValues;
044    import jcolibri.test.test13.similarity.TokensContained;
045    
046    /**
047     * This test shows how to use the Textual CBR extension in a Restaurant recommender. See the jcolibri.extensions.textual.IE package documentation for
048     * details about this extension. This example uses the OpenNLP implementation.
049     * <br>
050     * It uses a custum connector (RestaurantConnector) and similarity functions (AverageMultipleTextValues and TokensContained).
051     * The connector loads cases from a normal txt file and the similarity functions work with the information extracted by the textual CBR methods.
052     * These methods extract information from text and store it in the other attributes of the description. That information is stored as a string with
053     * several values separated with white spaces, so specific similarity measures are requiered to compare those attributes.
054     * See their javadoc for more information.
055     * <br>
056     * To compare the texts it uses a textual similarity function from the jcolibri.method.retrieve.NNretrieval.similarity.local.textual package.
057     * Test13b uses the Lucene similarity function instead that one.
058     * 
059     * @author Juan A. Recio-Garcia
060     * @version 1.0
061     * 
062     * @see jcolibri.test.test13.similarity.AverageMultipleTextValues
063     * @see jcolibri.test.test13.similarity.TokensContained
064     * @see jcolibri.test.test13.connector.RestaurantsConnector
065     * @see jcolibri.extensions.textual.IE
066     */
067    public class Test13a implements StandardCBRApplication
068    {
069    
070        Connector _connector;
071        CBRCaseBase _caseBase;
072    
073        
074        /*
075         * (non-Javadoc)
076         * 
077         * @see jcolibri.cbraplications.BasicCBRApplication#configure()
078         */
079        public void configure() throws ExecutionException
080        {
081            try
082            {
083                //Use a custom connector
084                _connector = new RestaurantsConnector("jcolibri/test/test13/restaurants-large-v2.txt");
085                _caseBase = new LinealCaseBase();
086                
087                //To show the progress
088                jcolibri.util.ProgressController.clear();
089                SwingProgressBar pb = new SwingProgressBar();
090                jcolibri.util.ProgressController.register(pb);   
091            } catch (Exception e)
092            {
093                throw new ExecutionException(e);
094            }
095        }
096    
097        /*
098         * (non-Javadoc)
099         * 
100         * @see jcolibri.cbraplications.StandardCBRApplication#preCycle()
101         */
102        public CBRCaseBase preCycle() throws ExecutionException
103        {
104            //In the precycle we pre-compute the information extraction in the case base
105            
106            //Initialize Wordnet
107            ThesaurusLinker.loadWordNet();
108            //Load user-specific glossary
109            GlossaryLinker.loadGlossary("jcolibri/test/test13/glossary.txt");
110            //Load phrases rules
111            PhrasesExtractor.loadRules("jcolibri/test/test13/phrasesRules.txt");
112            //Load features rules
113            FeaturesExtractor.loadRules("jcolibri/test/test13/featuresRules.txt");
114            //Load topic rules
115            DomainTopicClassifier.loadRules("jcolibri/test/test13/domainRules.txt");
116            
117            //Obtain cases
118            _caseBase.init(_connector);
119            Collection<CBRCase> cases = _caseBase.getCases();
120    
121            //Perform IE methods in the cases
122            
123            //Organize cases into paragraphs, sentences and tokens
124            OpennlpSplitter.split(cases);
125            //Detect stopwords
126            StopWordsDetector.detectStopWords(cases);
127            //Stem text
128            TextStemmer.stem(cases);
129            //Perform POS tagging
130            OpennlpPOStagger.tag(cases);
131            //Extract main names
132            OpennlpMainNamesExtractor.extractMainNames(cases);
133            //Extract phrases
134            PhrasesExtractor.extractPhrases(cases);
135            //Extract features
136            FeaturesExtractor.extractFeatures(cases);
137            //Classify with a topic
138            DomainTopicClassifier.classifyWithTopic(cases);
139            //Perform IE copying extracted features or phrases into other attributes of the case
140            BasicInformationExtractor.extractInformation(cases);
141            
142            return _caseBase;
143        }
144    
145        /*
146         * (non-Javadoc)
147         * 
148         * @see jcolibri.cbraplications.StandardCBRApplication#cycle(jcolibri.cbrcore.CBRQuery)
149         */
150        public void cycle(CBRQuery query) throws ExecutionException
151        {
152            Collection<CBRCase> cases = _caseBase.getCases();
153            
154            //Perform IE methods in the cases
155            
156            //Organize the query into paragraphs, sentences and tokens
157            OpennlpSplitter.split(query);
158            //Detect stopwords
159            StopWordsDetector.detectStopWords(query);
160            //Stem query
161            TextStemmer.stem(query);
162            //Perform POS tagging in the query
163            OpennlpPOStagger.tag(query);
164            //Extract main names
165            OpennlpMainNamesExtractor.extractMainNames(query);
166            
167            //Now that we have the query we relate cases tokens with the query tokens
168            //Using the user-defined glossary
169            GlossaryLinker.LinkWithGlossary(cases, query);
170            //Using wordnet
171            ThesaurusLinker.linkWithWordNet(cases, query);
172            
173            //Extract phrases
174            PhrasesExtractor.extractPhrases(query);
175            //Extract features
176            FeaturesExtractor.extractFeatures(query);
177            //Classify with a topic
178            DomainTopicClassifier.classifyWithTopic(query);
179            //Perform IE copying extracted features or phrases into other attributes of the query
180            BasicInformationExtractor.extractInformation(query);
181            
182            //Now we configure the NN method with some user-defined similarity measures
183            NNConfig nnConfig = new NNConfig();
184            nnConfig.setDescriptionSimFunction(new Average());
185            
186            nnConfig.addMapping(new Attribute("location", RestaurantDescription.class), new Equal());
187            
188            //To compare text we use the OverlapCofficient
189            nnConfig.addMapping(new Attribute("description", RestaurantDescription.class), new OverlapCoefficient());
190            //This function takes a string with several numerical values and computes the average
191            nnConfig.addMapping(new Attribute("price", RestaurantDescription.class), new AverageMultipleTextValues(1000));
192            //This function takes a string with several words separated by whitespaces, converts it to a set of tokens and
193            //computes the size of the intersection of the query set and the case set normalized with the case set
194            nnConfig.addMapping(new Attribute("foodType", RestaurantDescription.class), new TokensContained());
195            nnConfig.addMapping(new Attribute("food", RestaurantDescription.class), new TokensContained());
196            nnConfig.addMapping(new Attribute("alcohol", RestaurantDescription.class), new Equal());
197            nnConfig.addMapping(new Attribute("takeout", RestaurantDescription.class), new Equal());
198            nnConfig.addMapping(new Attribute("delivery", RestaurantDescription.class), new Equal());
199            nnConfig.addMapping(new Attribute("parking", RestaurantDescription.class), new Equal());
200            nnConfig.addMapping(new Attribute("catering", RestaurantDescription.class), new Equal());
201            
202            
203            System.out.println("RESULT:");
204            Collection<RetrievalResult> res = NNScoringMethod.evaluateSimilarity(cases, query, nnConfig);
205            res = SelectCases.selectTopKRR(res, 5);
206            
207            for(RetrievalResult rr: res)
208                System.out.println(rr);
209            
210            //Show the result
211            RestaurantDescription qrd = (RestaurantDescription)query.getDescription();
212            CBRCase mostSimilar = res.iterator().next().get_case();
213            RestaurantDescription rrd = (RestaurantDescription)mostSimilar.getDescription();
214            new ResultFrame(qrd.getDescription().getRAWContent(), rrd.getName(), rrd.getAddress(), rrd.getDescription().getRAWContent());
215            
216            
217        }
218    
219        /*
220         * (non-Javadoc)
221         * 
222         * @see jcolibri.cbraplications.StandardCBRApplication#postCycle()
223         */
224        public void postCycle() throws ExecutionException
225        {
226            jcolibri.extensions.textual.wordnet.WordNetBridge.deInit();
227            _connector.close();
228    
229        }
230    
231        
232        public static void main(String[] args)
233        {
234            Test13a test = new Test13a();
235            try
236            {
237                test.configure();
238                
239                CBRCaseBase caseBase = test.preCycle();
240               
241                System.out.println("CASE BASE: ");
242                for(CBRCase c: caseBase.getCases())
243                    System.out.println(c);
244                System.out.println("Total: "+caseBase.getCases().size()+" cases");
245    
246                boolean _continue = true;
247                while(_continue)
248                {
249                        String queryString = javax.swing.JOptionPane.showInputDialog("Please enter the restaurant description:");
250                        if(queryString == null)
251                            _continue = false;
252                        else
253                        {   
254                                CBRQuery query = new CBRQuery();
255                                RestaurantDescription queryDescription = new RestaurantDescription();
256                                queryDescription.setDescription(new IETextOpenNLP(queryString));
257                                query.setDescription(queryDescription);
258                                
259                                test.cycle(query);
260                        }
261                }
262                test.postCycle();
263                
264            } catch (ExecutionException e)
265            {
266                org.apache.commons.logging.LogFactory.getLog(Test13a.class).error(e);
267            }
268        }
269    }