001    /**
002     * TestGate.java
003     * jCOLIBRI2 framework. 
004     * @author Juan A. Recio-García.
005     * GAIA - Group for Artificial Intelligence Applications
006     * http://gaia.fdi.ucm.es
007     * 19/06/2007
008     */
009    package jcolibri.test.test12;
010    
011    import java.io.BufferedReader;
012    import java.io.InputStreamReader;
013    import java.util.Date;
014    
015    import jcolibri.extensions.textual.IE.common.DomainTopicClassifier;
016    import jcolibri.extensions.textual.IE.common.FeaturesExtractor;
017    import jcolibri.extensions.textual.IE.common.PhrasesExtractor;
018    import jcolibri.extensions.textual.IE.common.StopWordsDetector;
019    import jcolibri.extensions.textual.IE.common.TextStemmer;
020    import jcolibri.extensions.textual.IE.gate.GateFeaturesExtractor;
021    import jcolibri.extensions.textual.IE.gate.GatePOStagger;
022    import jcolibri.extensions.textual.IE.gate.GatePhrasesExtractor;
023    import jcolibri.extensions.textual.IE.gate.GateSplitter;
024    import jcolibri.extensions.textual.IE.gate.IETextGate;
025    import jcolibri.util.FileIO;
026    
027    /**
028     * This example shows how to use the GATE methods of the Textual CBR extension.
029     * <br>
030     * It just parses and extracts information from the simple text file (restest.txt)
031     * <p>
032     * For more information about the Textual CBR extension read the jcolibri.extensions.textual.IE documentation.
033     * 
034     * @author Juan A. Recio-Garcia
035     * @version 1.0
036     * @see jcolibri.extensions.textual.IE
037     */
038    public class TestGate
039    {
040    
041            public static void main(String[] args)
042            {
043                    try {
044                            //Load the textual file
045                            BufferedReader br = new BufferedReader( new InputStreamReader(FileIO.findFile("jcolibri/test/test12/restest.txt").openStream()));
046                            String content = "";
047                            String line;
048                            while ((line = br.readLine()) != null) {
049                                    content+=(line+"\n");
050                            }
051                            
052                            long startTime = new Date().getTime();
053                            
054                            //Create the IETextGate object
055                            IETextGate  text = new IETextGate(content);
056                            
057                            //Organize the text into paragraphs, sentences and tokens
058                            GateSplitter.split(text);
059                            
060                            //Remove stopwords
061                            StopWordsDetector.detectStopWords(text); 
062                            
063                            //Stem the text
064                            TextStemmer.stem(text);
065                            
066                            //Perform POS tagging with GATE
067                            GatePOStagger.tag(text);
068                            
069                            //Extract phrases using the common implementation
070                            PhrasesExtractor.loadRules("jcolibri/test/test12/phrasesRules.txt");
071                            PhrasesExtractor.extractPhrases(text);
072                            
073                            //Extract phrases using the GATE specific method
074                            GatePhrasesExtractor.loadDefaultRules();
075                            GatePhrasesExtractor.extractPhrases(text);
076    
077                            //Extract features using the common implementation
078                            FeaturesExtractor.loadRules("jcolibri/test/test12/featuresRules.txt");
079                            FeaturesExtractor.extractFeatures(text);
080                            
081                            //Extract features using the GATE specific method
082                            GateFeaturesExtractor.loadDefaultRules();
083                            GateFeaturesExtractor.extractFeatures(text);
084                            
085                            //Classify with topic
086                            DomainTopicClassifier.loadRules("jcolibri/test/test12/domainRules.txt");
087                            DomainTopicClassifier.classifyWithTopic(text);
088                            
089                            //Print result
090                            System.out.println(text.printAnnotations());
091                            long endTime = new Date().getTime();
092                            System.out.println("Total time: "+ (endTime-startTime)+" milliseconds");
093                            
094                    } catch (Exception e) {
095                        org.apache.commons.logging.LogFactory.getLog(TestGate.class).error(e);
096                    }
097            }
098    }