001    package jcolibri.method.retrieve.NNretrieval.similarity.local.textual;
002    
003    import java.util.HashSet;
004    import java.util.Set;
005    
006    import jcolibri.exception.NoApplicableSimilarityFunctionException;
007    import jcolibri.extensions.textual.IE.representation.IEText;
008    import jcolibri.method.retrieve.NNretrieval.similarity.LocalSimilarityFunction;
009    import jcolibri.method.retrieve.NNretrieval.similarity.local.textual.TextualSimUtils.WeightedString;
010    
011    
012    /**
013     * Cossine Coefficient Similarity.
014     * <p>
015     * This function computes: |intersection(o1,o2)| / (sqrt(|o1|)*sqrt(|o2|)).
016     * </p>
017     * <p>It is applicable to any Text object.</p>
018     * <p>
019     * Developed at: Robert Gordon University - Aberdeen & Facultad Informática,
020     * Universidad Complutense de Madrid (GAIA)
021     * </p>
022     * 
023     * @author Juan Antonio Recio García
024     * @version 2.0
025     */
026    public class CosineCoefficient implements LocalSimilarityFunction {
027    
028            /**
029             * Applies the similarity
030             * @param caseObject
031             *            IEText
032             * @param queryObject
033             *            IEText
034             * @return the result of the similarity function
035             */ 
036            public double compute(Object caseObject, Object queryObject) throws NoApplicableSimilarityFunctionException{
037                    if ((caseObject == null) || (queryObject == null))
038                            return 0;
039                    if (!(caseObject instanceof IEText))
040                            throw new jcolibri.exception.NoApplicableSimilarityFunctionException(this.getClass(), caseObject.getClass());
041                    if (!(queryObject instanceof IEText))
042                            throw new jcolibri.exception.NoApplicableSimilarityFunctionException(this.getClass(), queryObject.getClass());
043    
044                    IEText caseText = (IEText) caseObject;
045                    IEText queryText = (IEText) queryObject;
046                    
047                    Set<WeightedString> caseSet = new HashSet<WeightedString>();
048                    Set<WeightedString> querySet = new HashSet<WeightedString>();
049                    
050                    TextualSimUtils.expandTokensSet(caseText.getAllTokens(), queryText.getAllTokens(), caseSet, querySet);
051                    
052                    double size1 = TextualSimUtils.getSize(caseSet);
053                    double size2 = TextualSimUtils.getSize(querySet);
054                    
055                    caseSet.retainAll(querySet);
056                    double intersectionSize = TextualSimUtils.getSize(caseSet);
057                    
058                    return intersectionSize / (Math.sqrt(size1)*Math.sqrt(size2));  
059            }
060    
061    
062            
063                /* (non-Javadoc)
064                 * @see jcolibri.method.retrieve.NNretrieval.similarity.LocalSimilarityFunction#isApplicable(java.lang.Object, java.lang.Object)
065                 */
066                public boolean isApplicable(Object o1, Object o2)
067                {
068                    if((o1==null)&&(o2==null))
069                            return true;
070                    else if(o1==null)
071                            return o2 instanceof IEText;
072                    else if(o2==null)
073                            return o1 instanceof IEText;
074                    else
075                            return (o1 instanceof IEText)&&(o2 instanceof IEText);
076                }
077    
078    }