001 /** 002 * IETextGate.java 003 * jCOLIBRI2 framework. 004 * @author Juan A. Recio-García. 005 * GAIA - Group for Artificial Intelligence Applications 006 * http://gaia.fdi.ucm.es 007 * 21/06/2007 008 */ 009 package jcolibri.extensions.textual.IE.gate; 010 011 import gate.Annotation; 012 import gate.DocumentFormat; 013 import gate.Factory; 014 015 import java.util.Hashtable; 016 017 import jcolibri.extensions.textual.IE.gate.gateinit.InitGate; 018 import jcolibri.extensions.textual.IE.representation.IEText; 019 import jcolibri.extensions.textual.IE.representation.Paragraph; 020 import jcolibri.extensions.textual.IE.representation.Sentence; 021 import jcolibri.extensions.textual.IE.representation.Token; 022 023 /* 024 * Represents an IEText implemented using the GATE package. 025 * It stores internally a gate Document object, so this class is a 026 * wrapper that implements the IEText superclass. 027 * <br> 028 * GATE organizes documents using annotations. Annotations have a label and keep any other information. 029 * They also have their begin and end position of the annotation within the text to obtain the text fragment that refer to. 030 * 031 * @author Juan A. Recio-Garcia 032 * @version 1.0 033 * @see jcolibri.extensions.textual.IE.representation.IEText 034 */ 035 public class IETextGate extends IEText 036 { 037 038 protected gate.Document doc; 039 040 /** 041 * Creates an empty IETextGate object. 042 * Initializes GATE if required. 043 */ 044 public IETextGate() 045 { 046 InitGate.initGate(); 047 } 048 049 /** 050 * Creates an IETextGate object with the given content. 051 * Initializes GATE if required. 052 */ 053 public IETextGate(String content) 054 { 055 InitGate.initGate(); 056 try 057 { 058 fromString(content); 059 } catch (Exception e) 060 { 061 org.apache.commons.logging.LogFactory.getLog(this.getClass()).error(e); 062 } 063 064 } 065 066 /** 067 * Stores the given text in the object 068 */ 069 public void fromString(String content) throws Exception 070 { 071 super.fromString(content); 072 doc = Factory.newDocument(content); 073 DocumentFormat df = DocumentFormat.getDocumentFormat(doc, "text"); 074 df.unpackMarkup(doc); 075 } 076 077 /** 078 * Returns the internal gate's document. 079 */ 080 protected gate.Document getDocument() 081 { 082 return doc; 083 } 084 085 //Internal mapping between paragraphs and the paragraphs annotations in the gate's document 086 private Hashtable<Paragraph,Annotation> parMapping = new Hashtable<Paragraph,Annotation>(); 087 //Internal mapping between sentences and the sentences annotations in the gate's document 088 private Hashtable<Sentence,Annotation> sentMapping = new Hashtable<Sentence,Annotation>(); 089 //Internal mapping between tokens and the tokens annotations in the gate's document 090 private Hashtable<Token,Annotation> tokMapping = new Hashtable<Token,Annotation>(); 091 092 /** 093 * Returns the annotation object for a given paragraph 094 */ 095 protected Annotation getParagraphMapping(Paragraph par) 096 { 097 return parMapping.get(par); 098 } 099 100 /** 101 * Sets the annotation object for a given paragraph 102 */ 103 protected void setParagraphMapping(Paragraph par, Annotation annot) 104 { 105 parMapping.put(par, annot); 106 } 107 108 /** 109 * Returns the annotation object for a given sentence 110 */ 111 protected Annotation getSentenceMapping(Sentence sent) 112 { 113 return sentMapping.get(sent); 114 } 115 116 /** 117 * Sets the annotation object for a given sentence 118 */ 119 protected void setSentenceMapping(Sentence sent, Annotation annot) 120 { 121 sentMapping.put(sent, annot); 122 } 123 124 /** 125 * Returns the annotation object for a given token 126 */ 127 protected Annotation getTokenMapping(Token tok) 128 { 129 return tokMapping.get(tok); 130 } 131 132 /** 133 * Sets the annotation object for a given token 134 */ 135 protected void setTokenMapping(Token tok, Annotation annot) 136 { 137 tokMapping.put(tok, annot); 138 } 139 140 141 142 }