001    /**
002     * IEText.java
003     * jCOLIBRI2 framework. 
004     * @author Juan A. Recio-García.
005     * GAIA - Group for Artificial Intelligence Applications
006     * http://gaia.fdi.ucm.es
007     * 15/06/2007
008     */
009    package jcolibri.extensions.textual.IE.representation;
010    
011    import java.util.ArrayList;
012    import java.util.List;
013    
014    import jcolibri.datatypes.Text;
015    import jcolibri.extensions.textual.IE.representation.info.FeatureInfo;
016    import jcolibri.extensions.textual.IE.representation.info.PhraseInfo;
017    
018    /**
019     * Represents a Textual attribute that will be processed to extract information.
020     * A text is composed by paragraphs, paragraphs by sentences and sentences by tokens:
021     * <p><center><img src="IETextRepresentation.jpg"/></center></p>
022     * This organization is created by a specific method.
023     * <br>
024     * This object stores a list of paragraphs in the order they appear in the text.
025     * <br>
026     * This class also stores the extracted information:
027     * <ul>
028     * <li>Phrases identified in the text.
029     * <li>Features: identifier-value pairs extracted from the text.
030     * <li>Topics: combining phrases and features a topic can be associated to a text. A topic is a classification of the text.
031     * </ul>
032     * 
033     * @author Juan A. Recio Garcia
034     * @version 1.0
035     * @see jcolibri.extensions.textual.IE.representation.Paragraph
036     * @see jcolibri.extensions.textual.IE.representation.Sentence
037     * @see jcolibri.extensions.textual.IE.representation.Token
038     */
039    public class IEText extends Text
040    {
041    
042        protected List<Paragraph> paragraphs;
043    
044        protected List<PhraseInfo> phrases;
045    
046        protected List<FeatureInfo> features;
047    
048        protected List<String> topics;
049    
050        /**
051         * Creates an empty IEText
052         */
053        public IEText()
054        {
055            paragraphs = new ArrayList<Paragraph>();
056            phrases = new ArrayList<PhraseInfo>();
057            features = new ArrayList<FeatureInfo>();
058            topics = new ArrayList<String>();
059        }
060    
061        /**
062         * Creates an IEText from a String
063         * @param content
064         */
065        public IEText(String content)
066        {
067            super(content);
068            paragraphs = new ArrayList<Paragraph>();
069            phrases = new ArrayList<PhraseInfo>();
070            features = new ArrayList<FeatureInfo>();
071            topics = new ArrayList<String>();
072        }
073    
074        /**
075         * Returns the original text of this IEText object
076         */
077        public String getRAWContent()
078        {
079            return rawContent;
080        }
081    
082        /**
083         * Returns the annotations extracted in this text
084         */
085        public String printAnnotations()
086        {
087            StringBuffer sb = new StringBuffer();
088            for (Paragraph par : paragraphs)
089                sb.append(par.toString());
090            return sb.toString() + "\nPHRASES: " + phrases.toString() + "\nFEATURES: " + features.toString();
091        }
092    
093        /**
094         * Returns the features
095         */
096        public List<FeatureInfo> getFeatures()
097        {
098            return features;
099        }
100    
101        /**
102         * Adds features
103         */
104        public void addFeatures(List<FeatureInfo> features)
105        {
106            features.addAll(features);
107        }
108    
109        /**
110         * Adds a feature
111         */
112        public void addFeature(FeatureInfo feature)
113        {
114            features.add(feature);
115        }
116    
117        /**
118         * Returns the paragraphs
119         */
120        public List<Paragraph> getParagraphs()
121        {
122            return paragraphs;
123        }
124    
125        /**
126         * Adds paragraphs
127         */
128        public void addParagraphs(List<Paragraph> paragraphs)
129        {
130            this.paragraphs.addAll(paragraphs);
131        }
132        
133        /**
134         * Adds a paragraph
135         */
136        public void addParagraph(Paragraph paragraph)
137        {
138            this.paragraphs.add(paragraph);
139        }
140    
141        /**
142         * Returns the phrases
143         */
144        public List<PhraseInfo> getPhrases()
145        {
146            return phrases;
147        }
148    
149        /**
150         * Adds phrases
151         */
152        public void addPhrases(List<PhraseInfo> phrases)
153        {
154            this.phrases.addAll(phrases);
155        }
156    
157        /**
158         * Adds a phrase
159         */
160        public void addPhrase(PhraseInfo phrase)
161        {
162            this.phrases.add(phrase);
163        }
164    
165        /**
166         * Returns the topcis
167         */
168        public List<String> getTopics()
169        {
170            return topics;
171        }
172    
173        /***
174         * Adds topics
175         */
176        public void addTopics(List<String> topics)
177        {
178            this.topics.addAll(topics);
179        }
180    
181        /**
182         * Adds a topic
183         */
184        public void addTopic(String topics)
185        {
186            this.topics.add(topics);
187        }
188    
189        /**
190         * Returns all the sentences of this texts iterating over all paragraphs
191         */
192        public List<Sentence> getAllSentences()
193        {
194            List<Sentence> sentences = new ArrayList<Sentence>();
195            for (Paragraph p : paragraphs)
196                sentences.addAll(p.getSentences());
197            return sentences;
198        }
199    
200        /**
201         * Returs all the tokens of this texts iterating over all paragraphs and their contained sentences.
202         */
203        public List<Token> getAllTokens()
204        {
205            List<Token> tokens = new ArrayList<Token>();
206            for (Paragraph p : paragraphs)
207                for (Sentence s : p.getSentences())
208                    tokens.addAll(s.getTokens());
209            return tokens;
210        }
211    
212    }