001    /**
002     * Sentence.java
003     * jCOLIBRI2 framework. 
004     * @author Juan A. Recio-García.
005     * GAIA - Group for Artificial Intelligence Applications
006     * http://gaia.fdi.ucm.es
007     * 15/06/2007
008     */
009    package jcolibri.extensions.textual.IE.representation;
010    
011    import java.util.ArrayList;
012    import java.util.List;
013    
014    import jcolibri.extensions.textual.IE.representation.info.WeightedRelation;
015    
016    
017    /**
018     * A token represents an elementary piece of text. It is usually a word or punctuation symbol.
019     * This object stores some flags extracted by specific methods:
020     * <ul>
021     * <li>If the token is a stop word (word without sense).
022     * <li>If the token is a main name inside the sentence.
023     * <li>The stemed word
024     * <li>The Part-Of-Speech tag of the token.
025     * <li>A list of relations with other similar tokens.
026     * </ul>
027     * @author Juan A. Recio-Garcia
028     * @version 1.0
029     *
030     */
031    public class Token {
032    
033            protected boolean stopWord;
034            protected boolean isMainName;
035            protected String stem;
036            protected String postag;
037            protected List<WeightedRelation> relations;
038    
039            protected String text;
040            
041            /**
042             * Creates a token from a string
043             */
044            public Token(String text)
045            {
046                    this.stopWord = false;
047                    this.isMainName = false;
048                    this.stem = null;
049                    this.postag = null;
050                    this.text = text;
051                    relations = new ArrayList<WeightedRelation>();
052            }
053            
054            /**
055             * Returns the original content of the token
056             */
057            public String getRawContent()
058            {
059                    return text;
060            }
061    
062            /**
063             * Adds a relation
064             */
065            public void addRelation(WeightedRelation relation)
066            {
067                relations.add(relation);
068            }
069            
070            /**
071             * Returns the relations
072             */
073            public List<WeightedRelation> getRelations()
074            {
075                return relations;
076            }
077            
078            /**
079             * Returns if the token is a Main Name
080             */
081            public boolean isMainName() {
082                    return isMainName;
083            }
084    
085            /**
086             * Sets if the token is a Main Name
087             */
088            public void setMainName(boolean isMainName) {
089                    this.isMainName = isMainName;
090            }
091    
092            /**
093             * Returns the POS tag
094             */
095            public String getPostag() {
096                    return postag;
097            }
098    
099            /**
100             * Sets the POS tag
101             */
102            public void setPostag(String postag) {
103                    this.postag = postag;
104            }
105    
106            /**
107             * Returns the stem
108             */
109            public String getStem() {
110                    return stem;
111            }
112    
113            /**
114             * Sets the stem
115             */
116            public void setStem(String stem) {
117                    this.stem = stem;
118            }
119    
120            /**
121             * Returns if the token is a stop word
122             */
123            public boolean isStopWord() {
124                    return stopWord;
125            }
126    
127            /**
128             * Sets if the token is a stop word
129             */
130            public void setStopWord(boolean stopWord) {
131                    this.stopWord = stopWord;
132            }
133            
134            /**
135             * Prints the content and annotations.
136             */
137            public String toString()
138            {
139                    return "    [TOKEN: "+getRawContent()+", stem: "+stem+", POSTAG: "+postag+", isStopWord?: "+stopWord+", isMainName?:"+isMainName+"]\n";
140            }
141    
142            
143    }