001    /**
002     * WordNetBridge.java
003     * jCOLIBRI2 framework. 
004     * @author Juan A. Recio-García.
005     * GAIA - Group for Artificial Intelligence Applications
006     * http://gaia.fdi.ucm.es
007     * 10/04/2007
008     */
009    package jcolibri.extensions.textual.wordnet;
010    
011    import java.net.URL;
012    import java.util.ArrayList;
013    import java.util.List;
014    
015    import jcolibri.util.FileIO;
016    import jcolibri.util.ProgressController;
017    
018    import net.didion.jwnl.JWNL;
019    import net.didion.jwnl.data.IndexWord;
020    import net.didion.jwnl.data.PointerType;
021    import net.didion.jwnl.data.PointerUtils;
022    import net.didion.jwnl.data.Synset;
023    import net.didion.jwnl.data.Word;
024    import net.didion.jwnl.data.list.PointerTargetNode;
025    import net.didion.jwnl.data.list.PointerTargetNodeList;
026    import net.didion.jwnl.data.list.PointerTargetTree;
027    import net.didion.jwnl.data.relationship.RelationshipFinder;
028    import net.didion.jwnl.data.relationship.RelationshipList;
029    import net.didion.jwnl.dictionary.Dictionary;
030    
031    /**
032     * WordNet wrapper that loads the dictionary into memory without having to install it in the file system.
033     * The JColibriPrincetonObjectDictionary is a modification of the WordNet original dictionary that allows to load 
034     * the dictionary from the class-path or jar file into memory. That avoids having to install wordnet in the file system.
035     * <p>
036     * To avoid memory problems use the -Xms -Xmx VM params. For example to use a max of 1Gb of memory use: -Xms256m -Xmx1024m 
037     * @author Juan A. Recio-Garcia
038     * @version 1.0
039     */
040    public class WordNetBridge {
041    
042            /** Part-Of-Speech tags */
043            public enum POS  {ADJECTIVE, ADVERB, NOUN, VERB};
044            
045            private static boolean initialized = false;
046            
047            /**
048             * Initializes WordNet loading everything into memory.
049             * <br>
050             * To avoid memory problems use the -Xms -Xmx VM params. For example to use a max of 1Gb of memory use: -Xms256m -Xmx1024m 
051             */
052            public static void init()
053            {
054                    URL propsFile = FileIO.findFile(WordNetBridge.class.getPackage().getName().replace('.', '/')+"/map_properties.xml");
055                    if(!initialized)
056                            try {
057                                    org.apache.commons.logging.LogFactory.getLog(WordNetBridge.class).info("Initializing WordNet");
058                                    ProgressController.init(WordNetBridge.class,"Initializing Wordnet", -1);
059                                    JWNL.initialize(propsFile.openStream());
060                                    ProgressController.finish(WordNetBridge.class);
061                                    initialized=true;
062                            } catch (Exception e) {
063                                    org.apache.commons.logging.LogFactory.getLog(WordNetBridge.class).error(e);
064                            }
065    
066            }
067            
068            /**
069             * Releases all resources and dictionaries
070             */
071            public static void deInit()
072            {
073                    org.apache.commons.logging.LogFactory.getLog(WordNetBridge.class).info("DeInitializing WordNet");
074                    JWNL.shutdown();
075            }
076            
077            
078            private static net.didion.jwnl.data.POS getPOS(POS pos)
079            {
080                    if(pos==POS.ADJECTIVE)
081                            return net.didion.jwnl.data.POS.ADJECTIVE;
082                    else if (pos==POS.NOUN)
083                            return net.didion.jwnl.data.POS.NOUN;
084                    else if (pos==POS.ADVERB)
085                            return net.didion.jwnl.data.POS.ADVERB;
086                    else 
087                            return net.didion.jwnl.data.POS.VERB;
088            }
089            
090            /**
091             * Checks if two words are in the same synset when the have the given POS tag
092             * @param w1 word 1
093             * @param pos1 POS tag of word 1
094             * @param w2 word 2
095             * @param pos2 POS tag of word 2
096             */
097            public static boolean sameSynset(String w1, POS pos1, String w2, POS pos2)
098            {
099                    try {
100                            IndexWord start = Dictionary.getInstance().lookupIndexWord(getPOS(pos1), w1);
101                            IndexWord end   = Dictionary.getInstance().lookupIndexWord(getPOS(pos2), w2);
102                            if((start==null)||(end==null)) 
103                                    return false;
104                            int INMEDIATEdistance = RelationshipFinder.getInstance()
105                            .getImmediateRelationship(start, end);
106                            if (INMEDIATEdistance > -1)
107                                    return true;                            
108                    } catch (Exception e) {
109                            org.apache.commons.logging.LogFactory.getLog(WordNetBridge.class).error(e);
110                    }
111                    
112                    return false;
113            }
114            
115            /**
116             * Returs a list of words in the same synset that the given word
117             * @param word word to find the synset
118             * @param posw POS tag of the word
119             */
120            public static String[] SynsetWords(String word, POS posw)
121            {
122                    try {
123                            java.util.ArrayList<String> res = new java.util.ArrayList<String>();
124                            IndexWord iw = Dictionary.getInstance().lookupIndexWord(getPOS(posw), word);
125                            Synset[] synsets = iw.getSenses();
126                            for(Synset ss : synsets)
127                            {
128                                    Word[] words = ss.getWords();
129                                    for(Word w : words)
130                                            res.add(w.getLemma());
131                            }
132                            String[] resArray = new String[res.size()];
133                            return res.toArray(resArray);
134                    } catch (Exception e) {
135                            org.apache.commons.logging.LogFactory.getLog(WordNetBridge.class).error(e);
136                    }       
137                    
138                    return new String[0];
139            }
140    
141            /**
142             * Get all of the hypernyms (parents) of  a word
143             */
144            public static String[] getParents(String word, POS pos){
145                    try {
146                            // Get all of the hypernyms (parents) of the first sense of <var>word</var>
147                            java.util.HashSet<String> res = new java.util.HashSet<String>();
148    
149                            IndexWord iw = Dictionary.getInstance().lookupIndexWord(getPOS(pos), word);
150                            PointerTargetNodeList hypernyms = PointerUtils.getInstance().getDirectHypernyms(iw.getSense(1));
151                            for(int i=0; i<hypernyms.size(); i++)
152                            {
153                                    PointerTargetNode ptn = (PointerTargetNode)hypernyms.get(i);
154                                    Word[] words = ptn.getSynset().getWords();
155                                    for(Word w : words)
156                                            res.add(w.getLemma());                          
157                            }
158                            String[] resArray = new String[res.size()];
159                            return res.toArray(resArray);                   
160                    } catch (Exception e) {
161                            org.apache.commons.logging.LogFactory.getLog(WordNetBridge.class).error(e);
162                    }
163                    return new String[0];
164            }
165    
166            /**
167             * Get all of the childrens of a word
168             */
169            public static String[] getChildren(String word, POS pos){
170                    try {
171                            // Get all of the hypernyms (parents) of the first sense of <var>word</var>
172                            java.util.HashSet<String> res = new java.util.HashSet<String>();
173    
174                            IndexWord iw = Dictionary.getInstance().lookupIndexWord(getPOS(pos), word);
175                            PointerTargetTree hyponyms = PointerUtils.getInstance().getHyponymTree(iw.getSense(1));
176                            List list = (ArrayList)hyponyms.toList();
177                            for(int i=0; i<list.size(); i++)
178                            {
179                                    PointerTargetNodeList ptnl = (PointerTargetNodeList)list.get(i);
180                                    for(int j=0; j<ptnl.size(); j++)
181                                    {
182                                            PointerTargetNode ptn = (PointerTargetNode)ptnl.get(0);
183                                            Word[] words = ptn.getSynset().getWords();
184                                            for(Word w : words)
185                                                    res.add(w.getLemma());
186                                    }
187                            }
188                            String[] resArray = new String[res.size()];
189                            return res.toArray(resArray);                   
190                    } catch (Exception e) {
191                            org.apache.commons.logging.LogFactory.getLog(WordNetBridge.class).error(e);
192                    }
193                    return new String[0];
194            }
195    
196            /**
197             * Distance between two words using the SimilarTo relationship
198             */     
199            public static int similarToDistance(String w1, POS pos1, String w2, POS pos2)
200            {
201                    return distance(w1,pos1,w2,pos2,PointerType.SIMILAR_TO);
202            }
203    
204            /**
205             * Distance between two words using the Hypernym relationship
206             */
207            public static int hypernymDistance(String w1, POS pos1, String w2, POS pos2)
208            {
209                    return distance(w1,pos1,w2,pos2,PointerType.HYPERNYM);
210            }
211            
212            private static int distance(String w1, POS pos1, String w2, POS pos2, PointerType type)
213            {
214                    try {
215                            IndexWord start = Dictionary.getInstance().lookupIndexWord(getPOS(pos1), w1);
216                            IndexWord end   = Dictionary.getInstance().lookupIndexWord(getPOS(pos2), w2);
217    
218                            if((start==null)||(end==null)) 
219                                    return Integer.MAX_VALUE;
220    
221                            Synset startSynset = start.getSense(1);
222                            Synset endSynset = end.getSense(1);
223                            
224                            RelationshipList list = RelationshipFinder.getInstance().findRelationships(startSynset, endSynset, type);
225                            //System.out.println("Synonym relationship between \"" + start.getLemma() + "\" and \"" + end.getLemma() + "\":");
226                            //for (Iterator itr = list.iterator(); itr.hasNext();)
227                                    //((Relationship) itr.next()).getNodeList().print();
228                            
229                            //if(!list.isEmpty())
230                            //      System.out.println("Depth: " + ((Relationship) list.get(0)).getDepth());
231                            int res = list.size();
232                            if(res>0)
233                                    return list.size();
234                            else 
235                                    return Integer.MAX_VALUE; 
236                    } catch (Exception e) {
237                            org.apache.commons.logging.LogFactory.getLog(WordNetBridge.class).error(e);
238                            e.printStackTrace();
239                    }
240                    return -1;
241            }
242    
243            /**
244             * Test case.
245             */
246            public static void main(String[] args) {
247                    try{
248                    init();
249                    System.out.println("Are in the same Synset summary and synopsis? "+sameSynset("summary", POS.NOUN, "synopsis", POS.NOUN));
250                    String[] synsetWords = SynsetWords("synopsis", POS.NOUN);
251                    System.out.println("Sysnset of synopsis: ");
252                    for(String s: synsetWords) System.out.println(s);
253                    System.out.println("SimilarTo distance summary<->synopsis: "+similarToDistance("summary", POS.NOUN, "synopsis", POS.NOUN));
254                    System.out.println("Synonymous distance Findings<->Analysis: "+hypernymDistance("Findings", POS.NOUN, "Analysis", POS.NOUN));
255                    String[] parents = getParents("dog",POS.NOUN);
256                    System.out.println("Parents of dog: ");
257                    for(String s: parents)  System.out.println(s);
258                    System.out.println("Children of dog: ");
259                    String[] children = getChildren("dog",POS.NOUN);
260                    for(String s: children) System.out.println(s);
261                    deInit();
262                    
263                    }catch(java.lang.OutOfMemoryError e){
264                            org.apache.commons.logging.LogFactory.getLog(WordNetBridge.class).error("WordNet requires more memory. Launch the JVM with these flags: java -Xms256m -Xmx512m ...");
265                    }
266                    
267            }
268    }