001 /** 002 * Sentence.java 003 * jCOLIBRI2 framework. 004 * @author Juan A. Recio-García. 005 * GAIA - Group for Artificial Intelligence Applications 006 * http://gaia.fdi.ucm.es 007 * 15/06/2007 008 */ 009 package jcolibri.extensions.textual.IE.representation; 010 011 import java.util.ArrayList; 012 import java.util.List; 013 014 import jcolibri.extensions.textual.IE.representation.info.WeightedRelation; 015 016 017 /** 018 * A token represents an elementary piece of text. It is usually a word or punctuation symbol. 019 * This object stores some flags extracted by specific methods: 020 * <ul> 021 * <li>If the token is a stop word (word without sense). 022 * <li>If the token is a main name inside the sentence. 023 * <li>The stemed word 024 * <li>The Part-Of-Speech tag of the token. 025 * <li>A list of relations with other similar tokens. 026 * </ul> 027 * @author Juan A. Recio-Garcia 028 * @version 1.0 029 * 030 */ 031 public class Token { 032 033 protected boolean stopWord; 034 protected boolean isMainName; 035 protected String stem; 036 protected String postag; 037 protected List<WeightedRelation> relations; 038 039 protected String text; 040 041 /** 042 * Creates a token from a string 043 */ 044 public Token(String text) 045 { 046 this.stopWord = false; 047 this.isMainName = false; 048 this.stem = null; 049 this.postag = null; 050 this.text = text; 051 relations = new ArrayList<WeightedRelation>(); 052 } 053 054 /** 055 * Returns the original content of the token 056 */ 057 public String getRawContent() 058 { 059 return text; 060 } 061 062 /** 063 * Adds a relation 064 */ 065 public void addRelation(WeightedRelation relation) 066 { 067 relations.add(relation); 068 } 069 070 /** 071 * Returns the relations 072 */ 073 public List<WeightedRelation> getRelations() 074 { 075 return relations; 076 } 077 078 /** 079 * Returns if the token is a Main Name 080 */ 081 public boolean isMainName() { 082 return isMainName; 083 } 084 085 /** 086 * Sets if the token is a Main Name 087 */ 088 public void setMainName(boolean isMainName) { 089 this.isMainName = isMainName; 090 } 091 092 /** 093 * Returns the POS tag 094 */ 095 public String getPostag() { 096 return postag; 097 } 098 099 /** 100 * Sets the POS tag 101 */ 102 public void setPostag(String postag) { 103 this.postag = postag; 104 } 105 106 /** 107 * Returns the stem 108 */ 109 public String getStem() { 110 return stem; 111 } 112 113 /** 114 * Sets the stem 115 */ 116 public void setStem(String stem) { 117 this.stem = stem; 118 } 119 120 /** 121 * Returns if the token is a stop word 122 */ 123 public boolean isStopWord() { 124 return stopWord; 125 } 126 127 /** 128 * Sets if the token is a stop word 129 */ 130 public void setStopWord(boolean stopWord) { 131 this.stopWord = stopWord; 132 } 133 134 /** 135 * Prints the content and annotations. 136 */ 137 public String toString() 138 { 139 return " [TOKEN: "+getRawContent()+", stem: "+stem+", POSTAG: "+postag+", isStopWord?: "+stopWord+", isMainName?:"+isMainName+"]\n"; 140 } 141 142 143 }