001 /** 002 * TextualSimUtils.java 003 * jCOLIBRI2 framework. 004 * @author Juan A. Recio-García. 005 * GAIA - Group for Artificial Intelligence Applications 006 * http://gaia.fdi.ucm.es 007 * 26/06/2007 008 */ 009 package jcolibri.method.retrieve.NNretrieval.similarity.local.textual; 010 011 import java.util.Collection; 012 import java.util.Set; 013 014 import jcolibri.extensions.textual.IE.representation.Token; 015 import jcolibri.extensions.textual.IE.representation.info.WeightedRelation; 016 017 /** 018 * Utilities to compute textual similarities 019 * @author Juan A. Recio-Garcia 020 * @version 1.0 021 * 022 */ 023 public class TextualSimUtils 024 { 025 /** 026 * Expands the tokens collections recived containing the tokens of a case and a query. 027 * The expansion means that new tokens are added to the returned sets depending on 028 * the WeightedRelations between the tokens of the case and the query 029 * @param caseTokens Input tokens of the case 030 * @param queryToken Input tokens of the query 031 * @param caseSet Output set containing the tokens of the case represented in WeightedString objects that contain a string (the token) and a weight 032 * @param querySet Output set containing the tokens of the query represented in WeightedString objects that contain a string (the token) and a weight 033 */ 034 public static void expandTokensSet(Collection<Token> caseTokens, Collection<Token> queryToken, Set<WeightedString> caseSet, Set<WeightedString> querySet) 035 { 036 caseSet.clear(); 037 querySet.clear(); 038 for(Token qTok : queryToken) 039 { 040 if(qTok.isStopWord()) 041 continue; 042 for(WeightedRelation rel: qTok.getRelations()) 043 { 044 Token destToken = rel.getDestination(); 045 if(caseTokens.contains(destToken)) 046 { 047 String newStem = qTok.getStem()+"_RELATED_"+destToken.getStem(); 048 WeightedString ws = new WeightedString(newStem, rel.getWeight()); 049 caseSet.add(ws); 050 querySet.add(ws); 051 } 052 } 053 querySet.add(new WeightedString(qTok.getStem(), 1)); 054 } 055 for(Token cTok : caseTokens) 056 if(!cTok.isStopWord()) 057 caseSet.add(new WeightedString(cTok.getStem(), 1)); 058 } 059 060 /** 061 * Returns the "size" of a collection having into account that each WeightedString object has an associated weight. 062 */ 063 public static double getSize(Collection<WeightedString> col) 064 { 065 double res = 0; 066 for(WeightedString ws: col) 067 res+=ws.getWeight(); 068 return res; 069 } 070 071 072 /** 073 * Represents a string with an asssociated weight. 074 * @author Juan A. Recio-Garcia 075 * @version 1.0 076 * 077 */ 078 public static class WeightedString 079 { 080 String string; 081 double weight; 082 083 public WeightedString(String string, double weight) 084 { 085 super(); 086 this.string = string; 087 this.weight = weight; 088 } 089 090 /** 091 * @return Returns the string. 092 */ 093 public String getString() 094 { 095 return string; 096 } 097 098 /** 099 * @return Returns the weight. 100 */ 101 public double getWeight() 102 { 103 return weight; 104 } 105 106 public int hashCode() 107 { 108 return string.hashCode(); 109 } 110 111 public boolean equals(Object o) 112 { 113 if(!(o instanceof WeightedString)) 114 return false; 115 else 116 { 117 WeightedString ws = (WeightedString) o; 118 return string.equals(ws.getString()); 119 } 120 } 121 122 123 } 124 }