001 /** 002 * TestOpenNLP.java 003 * jCOLIBRI2 framework. 004 * @author Juan A. Recio-García. 005 * GAIA - Group for Artificial Intelligence Applications 006 * http://gaia.fdi.ucm.es 007 * 19/06/2007 008 */ 009 package jcolibri.test.test12; 010 011 import java.io.BufferedReader; 012 import java.io.InputStreamReader; 013 import java.util.Date; 014 015 import jcolibri.extensions.textual.IE.common.DomainTopicClassifier; 016 import jcolibri.extensions.textual.IE.common.FeaturesExtractor; 017 import jcolibri.extensions.textual.IE.common.PhrasesExtractor; 018 import jcolibri.extensions.textual.IE.common.StopWordsDetector; 019 import jcolibri.extensions.textual.IE.common.TextStemmer; 020 import jcolibri.extensions.textual.IE.opennlp.IETextOpenNLP; 021 import jcolibri.extensions.textual.IE.opennlp.OpennlpMainNamesExtractor; 022 import jcolibri.extensions.textual.IE.opennlp.OpennlpPOStagger; 023 import jcolibri.extensions.textual.IE.opennlp.OpennlpSplitter; 024 import jcolibri.util.FileIO; 025 026 027 /** 028 * This example shows how to use the OpenNLP methods of the Textual CBR extension. 029 * <br> 030 * It just parses and extracts information from the simple text file (restest.txt) 031 * <p> 032 * For more information about the Textual CBR extension read the jcolibri.extensions.textual.IE documentation. 033 * 034 * @author Juan A. Recio-Garcia 035 * @version 1.0 036 * @see jcolibri.extensions.textual.IE 037 */ 038 public class TestOpenNLP 039 { 040 041 public static void main(String[] args) 042 { 043 try { 044 // Load the textual file 045 BufferedReader br = new BufferedReader( new InputStreamReader(FileIO.findFile("jcolibri/test/test12/restest.txt").openStream())); 046 String content = ""; 047 String line; 048 while ((line = br.readLine()) != null) { 049 content+=(line+"\n"); 050 } 051 052 long startTime = new Date().getTime(); 053 054 //Create the IETextOpenNLP object 055 IETextOpenNLP text = new IETextOpenNLP(content); 056 057 //Organize the text into paragraphs, sentences and tokens 058 OpennlpSplitter.split(text); 059 060 //Remove stopwords 061 StopWordsDetector.detectStopWords(text); 062 063 //Stem the text 064 TextStemmer.stem(text); 065 066 //Perform POS tagging with OpenNLP 067 OpennlpPOStagger.tag(text); 068 069 //Extract main names of the sentence using OpenNLP 070 OpennlpMainNamesExtractor.extractMainNames(text); 071 072 //Extract phrases using the common implementation 073 PhrasesExtractor.loadRules(FileIO.findFile("jcolibri/test/test12/phrasesRules.txt").getPath()); 074 PhrasesExtractor.extractPhrases(text); 075 076 //Extract features using the common implementation 077 FeaturesExtractor.loadRules("jcolibri/test/test12/featuresRules.txt"); 078 FeaturesExtractor.extractFeatures(text); 079 080 //Classify with topic 081 DomainTopicClassifier.loadRules("jcolibri/test/test12/domainRules.txt"); 082 DomainTopicClassifier.classifyWithTopic(text); 083 084 //Print result 085 System.out.println(text.printAnnotations()); 086 long endTime = new Date().getTime(); 087 System.out.println("Total time: "+ (endTime-startTime)+" milliseconds"); 088 089 } catch (Exception e) { 090 org.apache.commons.logging.LogFactory.getLog(TestOpenNLP.class).error(e); 091 } 092 } 093 094 }