001 /** 002 * TestGate.java 003 * jCOLIBRI2 framework. 004 * @author Juan A. Recio-García. 005 * GAIA - Group for Artificial Intelligence Applications 006 * http://gaia.fdi.ucm.es 007 * 19/06/2007 008 */ 009 package jcolibri.test.test12; 010 011 import java.io.BufferedReader; 012 import java.io.InputStreamReader; 013 import java.util.Date; 014 015 import jcolibri.extensions.textual.IE.common.DomainTopicClassifier; 016 import jcolibri.extensions.textual.IE.common.FeaturesExtractor; 017 import jcolibri.extensions.textual.IE.common.PhrasesExtractor; 018 import jcolibri.extensions.textual.IE.common.StopWordsDetector; 019 import jcolibri.extensions.textual.IE.common.TextStemmer; 020 import jcolibri.extensions.textual.IE.gate.GateFeaturesExtractor; 021 import jcolibri.extensions.textual.IE.gate.GatePOStagger; 022 import jcolibri.extensions.textual.IE.gate.GatePhrasesExtractor; 023 import jcolibri.extensions.textual.IE.gate.GateSplitter; 024 import jcolibri.extensions.textual.IE.gate.IETextGate; 025 import jcolibri.util.FileIO; 026 027 /** 028 * This example shows how to use the GATE methods of the Textual CBR extension. 029 * <br> 030 * It just parses and extracts information from the simple text file (restest.txt) 031 * <p> 032 * For more information about the Textual CBR extension read the jcolibri.extensions.textual.IE documentation. 033 * 034 * @author Juan A. Recio-Garcia 035 * @version 1.0 036 * @see jcolibri.extensions.textual.IE 037 */ 038 public class TestGate 039 { 040 041 public static void main(String[] args) 042 { 043 try { 044 //Load the textual file 045 BufferedReader br = new BufferedReader( new InputStreamReader(FileIO.findFile("jcolibri/test/test12/restest.txt").openStream())); 046 String content = ""; 047 String line; 048 while ((line = br.readLine()) != null) { 049 content+=(line+"\n"); 050 } 051 052 long startTime = new Date().getTime(); 053 054 //Create the IETextGate object 055 IETextGate text = new IETextGate(content); 056 057 //Organize the text into paragraphs, sentences and tokens 058 GateSplitter.split(text); 059 060 //Remove stopwords 061 StopWordsDetector.detectStopWords(text); 062 063 //Stem the text 064 TextStemmer.stem(text); 065 066 //Perform POS tagging with GATE 067 GatePOStagger.tag(text); 068 069 //Extract phrases using the common implementation 070 PhrasesExtractor.loadRules("jcolibri/test/test12/phrasesRules.txt"); 071 PhrasesExtractor.extractPhrases(text); 072 073 //Extract phrases using the GATE specific method 074 GatePhrasesExtractor.loadDefaultRules(); 075 GatePhrasesExtractor.extractPhrases(text); 076 077 //Extract features using the common implementation 078 FeaturesExtractor.loadRules("jcolibri/test/test12/featuresRules.txt"); 079 FeaturesExtractor.extractFeatures(text); 080 081 //Extract features using the GATE specific method 082 GateFeaturesExtractor.loadDefaultRules(); 083 GateFeaturesExtractor.extractFeatures(text); 084 085 //Classify with topic 086 DomainTopicClassifier.loadRules("jcolibri/test/test12/domainRules.txt"); 087 DomainTopicClassifier.classifyWithTopic(text); 088 089 //Print result 090 System.out.println(text.printAnnotations()); 091 long endTime = new Date().getTime(); 092 System.out.println("Total time: "+ (endTime-startTime)+" milliseconds"); 093 094 } catch (Exception e) { 095 org.apache.commons.logging.LogFactory.getLog(TestGate.class).error(e); 096 } 097 } 098 }