001 /** 002 * WordNetBridge.java 003 * jCOLIBRI2 framework. 004 * @author Juan A. Recio-García. 005 * GAIA - Group for Artificial Intelligence Applications 006 * http://gaia.fdi.ucm.es 007 * 10/04/2007 008 */ 009 package jcolibri.extensions.textual.wordnet; 010 011 import java.net.URL; 012 import java.util.ArrayList; 013 import java.util.List; 014 015 import jcolibri.util.FileIO; 016 import jcolibri.util.ProgressController; 017 018 import net.didion.jwnl.JWNL; 019 import net.didion.jwnl.data.IndexWord; 020 import net.didion.jwnl.data.PointerType; 021 import net.didion.jwnl.data.PointerUtils; 022 import net.didion.jwnl.data.Synset; 023 import net.didion.jwnl.data.Word; 024 import net.didion.jwnl.data.list.PointerTargetNode; 025 import net.didion.jwnl.data.list.PointerTargetNodeList; 026 import net.didion.jwnl.data.list.PointerTargetTree; 027 import net.didion.jwnl.data.relationship.RelationshipFinder; 028 import net.didion.jwnl.data.relationship.RelationshipList; 029 import net.didion.jwnl.dictionary.Dictionary; 030 031 /** 032 * WordNet wrapper that loads the dictionary into memory without having to install it in the file system. 033 * The JColibriPrincetonObjectDictionary is a modification of the WordNet original dictionary that allows to load 034 * the dictionary from the class-path or jar file into memory. That avoids having to install wordnet in the file system. 035 * <p> 036 * To avoid memory problems use the -Xms -Xmx VM params. For example to use a max of 1Gb of memory use: -Xms256m -Xmx1024m 037 * @author Juan A. Recio-Garcia 038 * @version 1.0 039 */ 040 public class WordNetBridge { 041 042 /** Part-Of-Speech tags */ 043 public enum POS {ADJECTIVE, ADVERB, NOUN, VERB}; 044 045 private static boolean initialized = false; 046 047 /** 048 * Initializes WordNet loading everything into memory. 049 * <br> 050 * To avoid memory problems use the -Xms -Xmx VM params. For example to use a max of 1Gb of memory use: -Xms256m -Xmx1024m 051 */ 052 public static void init() 053 { 054 URL propsFile = FileIO.findFile(WordNetBridge.class.getPackage().getName().replace('.', '/')+"/map_properties.xml"); 055 if(!initialized) 056 try { 057 org.apache.commons.logging.LogFactory.getLog(WordNetBridge.class).info("Initializing WordNet"); 058 ProgressController.init(WordNetBridge.class,"Initializing Wordnet", -1); 059 JWNL.initialize(propsFile.openStream()); 060 ProgressController.finish(WordNetBridge.class); 061 initialized=true; 062 } catch (Exception e) { 063 org.apache.commons.logging.LogFactory.getLog(WordNetBridge.class).error(e); 064 } 065 066 } 067 068 /** 069 * Releases all resources and dictionaries 070 */ 071 public static void deInit() 072 { 073 org.apache.commons.logging.LogFactory.getLog(WordNetBridge.class).info("DeInitializing WordNet"); 074 JWNL.shutdown(); 075 } 076 077 078 private static net.didion.jwnl.data.POS getPOS(POS pos) 079 { 080 if(pos==POS.ADJECTIVE) 081 return net.didion.jwnl.data.POS.ADJECTIVE; 082 else if (pos==POS.NOUN) 083 return net.didion.jwnl.data.POS.NOUN; 084 else if (pos==POS.ADVERB) 085 return net.didion.jwnl.data.POS.ADVERB; 086 else 087 return net.didion.jwnl.data.POS.VERB; 088 } 089 090 /** 091 * Checks if two words are in the same synset when the have the given POS tag 092 * @param w1 word 1 093 * @param pos1 POS tag of word 1 094 * @param w2 word 2 095 * @param pos2 POS tag of word 2 096 */ 097 public static boolean sameSynset(String w1, POS pos1, String w2, POS pos2) 098 { 099 try { 100 IndexWord start = Dictionary.getInstance().lookupIndexWord(getPOS(pos1), w1); 101 IndexWord end = Dictionary.getInstance().lookupIndexWord(getPOS(pos2), w2); 102 if((start==null)||(end==null)) 103 return false; 104 int INMEDIATEdistance = RelationshipFinder.getInstance() 105 .getImmediateRelationship(start, end); 106 if (INMEDIATEdistance > -1) 107 return true; 108 } catch (Exception e) { 109 org.apache.commons.logging.LogFactory.getLog(WordNetBridge.class).error(e); 110 } 111 112 return false; 113 } 114 115 /** 116 * Returs a list of words in the same synset that the given word 117 * @param word word to find the synset 118 * @param posw POS tag of the word 119 */ 120 public static String[] SynsetWords(String word, POS posw) 121 { 122 try { 123 java.util.ArrayList<String> res = new java.util.ArrayList<String>(); 124 IndexWord iw = Dictionary.getInstance().lookupIndexWord(getPOS(posw), word); 125 Synset[] synsets = iw.getSenses(); 126 for(Synset ss : synsets) 127 { 128 Word[] words = ss.getWords(); 129 for(Word w : words) 130 res.add(w.getLemma()); 131 } 132 String[] resArray = new String[res.size()]; 133 return res.toArray(resArray); 134 } catch (Exception e) { 135 org.apache.commons.logging.LogFactory.getLog(WordNetBridge.class).error(e); 136 } 137 138 return new String[0]; 139 } 140 141 /** 142 * Get all of the hypernyms (parents) of a word 143 */ 144 public static String[] getParents(String word, POS pos){ 145 try { 146 // Get all of the hypernyms (parents) of the first sense of <var>word</var> 147 java.util.HashSet<String> res = new java.util.HashSet<String>(); 148 149 IndexWord iw = Dictionary.getInstance().lookupIndexWord(getPOS(pos), word); 150 PointerTargetNodeList hypernyms = PointerUtils.getInstance().getDirectHypernyms(iw.getSense(1)); 151 for(int i=0; i<hypernyms.size(); i++) 152 { 153 PointerTargetNode ptn = (PointerTargetNode)hypernyms.get(i); 154 Word[] words = ptn.getSynset().getWords(); 155 for(Word w : words) 156 res.add(w.getLemma()); 157 } 158 String[] resArray = new String[res.size()]; 159 return res.toArray(resArray); 160 } catch (Exception e) { 161 org.apache.commons.logging.LogFactory.getLog(WordNetBridge.class).error(e); 162 } 163 return new String[0]; 164 } 165 166 /** 167 * Get all of the childrens of a word 168 */ 169 public static String[] getChildren(String word, POS pos){ 170 try { 171 // Get all of the hypernyms (parents) of the first sense of <var>word</var> 172 java.util.HashSet<String> res = new java.util.HashSet<String>(); 173 174 IndexWord iw = Dictionary.getInstance().lookupIndexWord(getPOS(pos), word); 175 PointerTargetTree hyponyms = PointerUtils.getInstance().getHyponymTree(iw.getSense(1)); 176 List list = (ArrayList)hyponyms.toList(); 177 for(int i=0; i<list.size(); i++) 178 { 179 PointerTargetNodeList ptnl = (PointerTargetNodeList)list.get(i); 180 for(int j=0; j<ptnl.size(); j++) 181 { 182 PointerTargetNode ptn = (PointerTargetNode)ptnl.get(0); 183 Word[] words = ptn.getSynset().getWords(); 184 for(Word w : words) 185 res.add(w.getLemma()); 186 } 187 } 188 String[] resArray = new String[res.size()]; 189 return res.toArray(resArray); 190 } catch (Exception e) { 191 org.apache.commons.logging.LogFactory.getLog(WordNetBridge.class).error(e); 192 } 193 return new String[0]; 194 } 195 196 /** 197 * Distance between two words using the SimilarTo relationship 198 */ 199 public static int similarToDistance(String w1, POS pos1, String w2, POS pos2) 200 { 201 return distance(w1,pos1,w2,pos2,PointerType.SIMILAR_TO); 202 } 203 204 /** 205 * Distance between two words using the Hypernym relationship 206 */ 207 public static int hypernymDistance(String w1, POS pos1, String w2, POS pos2) 208 { 209 return distance(w1,pos1,w2,pos2,PointerType.HYPERNYM); 210 } 211 212 private static int distance(String w1, POS pos1, String w2, POS pos2, PointerType type) 213 { 214 try { 215 IndexWord start = Dictionary.getInstance().lookupIndexWord(getPOS(pos1), w1); 216 IndexWord end = Dictionary.getInstance().lookupIndexWord(getPOS(pos2), w2); 217 218 if((start==null)||(end==null)) 219 return Integer.MAX_VALUE; 220 221 Synset startSynset = start.getSense(1); 222 Synset endSynset = end.getSense(1); 223 224 RelationshipList list = RelationshipFinder.getInstance().findRelationships(startSynset, endSynset, type); 225 //System.out.println("Synonym relationship between \"" + start.getLemma() + "\" and \"" + end.getLemma() + "\":"); 226 //for (Iterator itr = list.iterator(); itr.hasNext();) 227 //((Relationship) itr.next()).getNodeList().print(); 228 229 //if(!list.isEmpty()) 230 // System.out.println("Depth: " + ((Relationship) list.get(0)).getDepth()); 231 int res = list.size(); 232 if(res>0) 233 return list.size(); 234 else 235 return Integer.MAX_VALUE; 236 } catch (Exception e) { 237 org.apache.commons.logging.LogFactory.getLog(WordNetBridge.class).error(e); 238 e.printStackTrace(); 239 } 240 return -1; 241 } 242 243 /** 244 * Test case. 245 */ 246 public static void main(String[] args) { 247 try{ 248 init(); 249 System.out.println("Are in the same Synset summary and synopsis? "+sameSynset("summary", POS.NOUN, "synopsis", POS.NOUN)); 250 String[] synsetWords = SynsetWords("synopsis", POS.NOUN); 251 System.out.println("Sysnset of synopsis: "); 252 for(String s: synsetWords) System.out.println(s); 253 System.out.println("SimilarTo distance summary<->synopsis: "+similarToDistance("summary", POS.NOUN, "synopsis", POS.NOUN)); 254 System.out.println("Synonymous distance Findings<->Analysis: "+hypernymDistance("Findings", POS.NOUN, "Analysis", POS.NOUN)); 255 String[] parents = getParents("dog",POS.NOUN); 256 System.out.println("Parents of dog: "); 257 for(String s: parents) System.out.println(s); 258 System.out.println("Children of dog: "); 259 String[] children = getChildren("dog",POS.NOUN); 260 for(String s: children) System.out.println(s); 261 deInit(); 262 263 }catch(java.lang.OutOfMemoryError e){ 264 org.apache.commons.logging.LogFactory.getLog(WordNetBridge.class).error("WordNet requires more memory. Launch the JVM with these flags: java -Xms256m -Xmx512m ..."); 265 } 266 267 } 268 }