001    /**
002     * TestOpenNLP.java
003     * jCOLIBRI2 framework. 
004     * @author Juan A. Recio-García.
005     * GAIA - Group for Artificial Intelligence Applications
006     * http://gaia.fdi.ucm.es
007     * 19/06/2007
008     */
009    package jcolibri.test.test12;
010    
011    import java.io.BufferedReader;
012    import java.io.InputStreamReader;
013    import java.util.Date;
014    
015    import jcolibri.extensions.textual.IE.common.DomainTopicClassifier;
016    import jcolibri.extensions.textual.IE.common.FeaturesExtractor;
017    import jcolibri.extensions.textual.IE.common.PhrasesExtractor;
018    import jcolibri.extensions.textual.IE.common.StopWordsDetector;
019    import jcolibri.extensions.textual.IE.common.TextStemmer;
020    import jcolibri.extensions.textual.IE.opennlp.IETextOpenNLP;
021    import jcolibri.extensions.textual.IE.opennlp.OpennlpMainNamesExtractor;
022    import jcolibri.extensions.textual.IE.opennlp.OpennlpPOStagger;
023    import jcolibri.extensions.textual.IE.opennlp.OpennlpSplitter;
024    import jcolibri.util.FileIO;
025    
026    
027    /**
028     * This example shows how to use the OpenNLP methods of the Textual CBR extension.
029     * <br>
030     * It just parses and extracts information from the simple text file (restest.txt)
031     * <p>
032     * For more information about the Textual CBR extension read the jcolibri.extensions.textual.IE documentation.
033     * 
034     * @author Juan A. Recio-Garcia
035     * @version 1.0
036     * @see jcolibri.extensions.textual.IE
037     */
038    public class TestOpenNLP
039    {
040    
041        public static void main(String[] args)
042            {
043                    try {
044                            // Load the textual file
045                            BufferedReader br = new BufferedReader( new InputStreamReader(FileIO.findFile("jcolibri/test/test12/restest.txt").openStream()));
046                            String content = "";
047                            String line;
048                            while ((line = br.readLine()) != null) {
049                                    content+=(line+"\n");
050                            }
051                            
052                            long startTime = new Date().getTime();
053                            
054                            //Create the IETextOpenNLP object
055                            IETextOpenNLP text = new IETextOpenNLP(content);
056    
057                            //Organize the text into paragraphs, sentences and tokens
058                            OpennlpSplitter.split(text);
059                            
060                            //Remove stopwords
061                            StopWordsDetector.detectStopWords(text); 
062                            
063                            //Stem the text
064                            TextStemmer.stem(text);
065                            
066                            //Perform POS tagging with OpenNLP
067                            OpennlpPOStagger.tag(text);
068                            
069                            //Extract main names of the sentence using OpenNLP
070                            OpennlpMainNamesExtractor.extractMainNames(text);
071                            
072                            //Extract phrases using the common implementation
073                            PhrasesExtractor.loadRules(FileIO.findFile("jcolibri/test/test12/phrasesRules.txt").getPath());
074                            PhrasesExtractor.extractPhrases(text);
075                            
076                            //Extract features using the common implementation
077                            FeaturesExtractor.loadRules("jcolibri/test/test12/featuresRules.txt");
078                            FeaturesExtractor.extractFeatures(text);
079    
080                            //Classify with topic
081                            DomainTopicClassifier.loadRules("jcolibri/test/test12/domainRules.txt");
082                            DomainTopicClassifier.classifyWithTopic(text);
083                            
084                            //Print result
085                            System.out.println(text.printAnnotations());
086                            long endTime = new Date().getTime();
087                            System.out.println("Total time: "+ (endTime-startTime)+" milliseconds");
088    
089                    } catch (Exception e) {
090                            org.apache.commons.logging.LogFactory.getLog(TestOpenNLP.class).error(e);
091                    }
092            }
093    
094    }