001 /** 002 * IETextOpenNLP.java 003 * jCOLIBRI2 framework. 004 * @author Juan A. Recio-García. 005 * GAIA - Group for Artificial Intelligence Applications 006 * http://gaia.fdi.ucm.es 007 * 21/06/2007 008 */ 009 package jcolibri.extensions.textual.IE.opennlp; 010 011 import java.util.Hashtable; 012 013 import jcolibri.extensions.textual.IE.representation.IEText; 014 import jcolibri.extensions.textual.IE.representation.Paragraph; 015 import jcolibri.extensions.textual.IE.representation.Sentence; 016 import jcolibri.extensions.textual.IE.representation.Token; 017 018 import org.jdom.Element; 019 020 /** 021 * Represents an IEText implemented using the OpenNLP package. 022 * <br>This object uses internally an NLPDocument object (from the OpenNLP package) 023 * that is an XML DOM document organized in paragraphs, sentences and tokens. 024 * The specific OpenNLP methods will decorate this DOM tree with information, 025 * so this class is a wrapper that implements the IEText superclass. 026 * 027 * @author Juan A. Recio-Garcia 028 * @version 1.0 029 * @see jcolibri.extensions.textual.IE.representation.IEText 030 */ 031 public class IETextOpenNLP extends IEText 032 { 033 034 protected opennlp.common.xml.NLPDocument doc; 035 036 /** 037 * Creates an empty IETextOpenNLP object 038 */ 039 public IETextOpenNLP() 040 { 041 042 } 043 044 /** 045 * Creates an IETextOpenNLP object with the given text 046 */ 047 public IETextOpenNLP(String content) 048 { 049 try 050 { 051 fromString(content); 052 } catch (Exception e) 053 { 054 org.apache.commons.logging.LogFactory.getLog(this.getClass()).error(e); 055 } 056 } 057 058 /** 059 * Stores the given text in the object 060 */ 061 public void fromString(String content) throws Exception 062 { 063 super.fromString(content); 064 opennlp.common.xml.NLPDocumentBuilder builder = new opennlp.common.xml.NLPDocumentBuilder(); 065 doc = builder.build(content); 066 } 067 068 /** 069 * Returns the internal OpenNLP object that stores the text. 070 */ 071 public opennlp.common.xml.NLPDocument getDocument() 072 { 073 return this.doc; 074 } 075 076 //Internal mapping between paragraphs and the paragraphs nodes in the DOM tree 077 private Hashtable<Paragraph, Element> parMapping = new Hashtable<Paragraph, Element>(); 078 079 //Internal mapping between sentences and the sentences nodes in the DOM tree 080 private Hashtable<Sentence, Element> sentMapping = new Hashtable<Sentence, Element>(); 081 082 //Internal mapping between tokens and the tokens nodes in the DOM tree 083 private Hashtable<Token, Element> tokMapping = new Hashtable<Token, Element>(); 084 085 /** 086 * Returns a mapping between a paragraph and the paragraph node in the DOM tree 087 */ 088 protected Element getParagraphMapping(Paragraph par) 089 { 090 return parMapping.get(par); 091 } 092 093 /** 094 * Sets a mapping between a paragraph and the paragraph node in the DOM tree 095 */ 096 protected void setParagraphMapping(Paragraph par, Element annot) 097 { 098 parMapping.put(par, annot); 099 } 100 101 /** 102 * Returns a mapping between a sentence and the sentence node in the DOM tree 103 */ 104 protected Element getSentenceMapping(Sentence sent) 105 { 106 return sentMapping.get(sent); 107 } 108 109 /** 110 * Sets a mapping between a sentence and the sentence node in the DOM tree 111 */ 112 protected void setSentenceMapping(Sentence sent, Element annot) 113 { 114 sentMapping.put(sent, annot); 115 } 116 117 /** 118 * Returns a mapping between a token and the token node in the DOM tree 119 */ 120 protected Element getTokenMapping(Token tok) 121 { 122 return tokMapping.get(tok); 123 } 124 125 /** 126 * Sets a mapping between a token and the token node in the DOM tree 127 */ 128 protected void setTokenMapping(Token tok, Element annot) 129 { 130 tokMapping.put(tok, annot); 131 } 132 133 }