001    /**
002     * TextualSimUtils.java
003     * jCOLIBRI2 framework. 
004     * @author Juan A. Recio-García.
005     * GAIA - Group for Artificial Intelligence Applications
006     * http://gaia.fdi.ucm.es
007     * 26/06/2007
008     */
009    package jcolibri.method.retrieve.NNretrieval.similarity.local.textual;
010    
011    import java.util.Collection;
012    import java.util.Set;
013    
014    import jcolibri.extensions.textual.IE.representation.Token;
015    import jcolibri.extensions.textual.IE.representation.info.WeightedRelation;
016    
017    /**
018     * Utilities to compute textual similarities
019     * @author Juan A. Recio-Garcia
020     * @version 1.0
021     *
022     */
023    public class TextualSimUtils
024    {
025        /**
026         * Expands the tokens collections recived containing the tokens of a case and a query. 
027         * The expansion means that new tokens are added to the returned sets depending on
028         * the WeightedRelations between the tokens of the case and the query
029         * @param caseTokens Input tokens of the case
030         * @param queryToken Input tokens of the query
031         * @param caseSet Output set containing the tokens of the case represented in WeightedString objects that contain a string (the token) and a weight
032         * @param querySet Output set containing the tokens of the query represented in WeightedString objects that contain a string (the token) and a weight
033         */
034        public static void expandTokensSet(Collection<Token> caseTokens, Collection<Token> queryToken, Set<WeightedString> caseSet, Set<WeightedString> querySet)
035        {
036            caseSet.clear();
037            querySet.clear();
038            for(Token qTok : queryToken)
039            {
040                if(qTok.isStopWord())
041                    continue;
042                for(WeightedRelation rel: qTok.getRelations())
043                {
044                    Token destToken = rel.getDestination();
045                    if(caseTokens.contains(destToken))
046                    {
047                        String newStem = qTok.getStem()+"_RELATED_"+destToken.getStem();
048                        WeightedString ws = new WeightedString(newStem, rel.getWeight());
049                        caseSet.add(ws);
050                        querySet.add(ws);
051                    }
052                }
053                querySet.add(new WeightedString(qTok.getStem(), 1));
054            }
055            for(Token cTok : caseTokens)
056                if(!cTok.isStopWord())
057                    caseSet.add(new WeightedString(cTok.getStem(), 1));
058        }
059        
060        /**
061         * Returns the "size" of a collection having into account that each WeightedString object has an associated weight.
062         */
063        public static double getSize(Collection<WeightedString> col)
064        {
065            double res = 0;
066            for(WeightedString ws: col)
067                res+=ws.getWeight();
068            return res;
069        }
070        
071        
072        /**
073         * Represents a string with an asssociated weight.
074         * @author Juan A. Recio-Garcia
075         * @version 1.0
076         *
077         */
078        public static class WeightedString
079        {
080            String string;
081            double weight;
082            
083            public WeightedString(String string, double weight)
084            {
085                super();
086                this.string = string;
087                this.weight = weight;
088            }
089    
090            /**
091             * @return Returns the string.
092             */
093            public String getString()
094            {
095                return string;
096            }
097    
098            /**
099             * @return Returns the weight.
100             */
101            public double getWeight()
102            {
103                return weight;
104            }
105            
106            public int hashCode()
107            {
108                return string.hashCode();
109            }
110            
111            public boolean equals(Object o)
112            {
113                if(!(o instanceof WeightedString))
114                    return false;
115                else
116                {
117                    WeightedString ws = (WeightedString) o;
118                    return string.equals(ws.getString());
119                }
120            }
121            
122            
123        }
124    }