001    package jcolibri.method.retrieve.NNretrieval.similarity.local.textual;
002    
003    import java.util.HashSet;
004    import java.util.Set;
005    
006    import jcolibri.exception.NoApplicableSimilarityFunctionException;
007    import jcolibri.extensions.textual.IE.representation.IEText;
008    import jcolibri.method.retrieve.NNretrieval.similarity.LocalSimilarityFunction;
009    import jcolibri.method.retrieve.NNretrieval.similarity.local.textual.TextualSimUtils.WeightedString;
010    
011    /**
012     * Jaccard Coefficient Similarity.
013     * <p>
014     * This function computes: |intersection(o1,o2)| / |union(o1,o2)|.
015     * </p>
016     * <p>It is applicable to any Text object.</p>
017     * <p>
018     * Developed at: Robert Gordon University - Aberdeen & Facultad Informática,
019     * Universidad Complutense de Madrid (GAIA)
020     * </p>
021     * 
022     * @author Juan Antonio Recio García
023     * @version 2.0
024     */
025    public class JaccardCoefficient implements LocalSimilarityFunction {
026        
027            /**
028             * Applies the similarity
029             * @param caseObject
030             *            IEText
031             * @param queryObject
032             *            IEText
033             * @return the result of the similarity function
034             */ 
035            public double compute(Object caseObject, Object queryObject) throws NoApplicableSimilarityFunctionException{
036                    if ((caseObject == null) || (queryObject == null))
037                            return 0;
038                    if (!(caseObject instanceof IEText))
039                            throw new jcolibri.exception.NoApplicableSimilarityFunctionException(this.getClass(), caseObject.getClass());
040                    if (!(queryObject instanceof IEText))
041                            throw new jcolibri.exception.NoApplicableSimilarityFunctionException(this.getClass(), queryObject.getClass());
042    
043                    IEText caseText = (IEText) caseObject;
044                    IEText queryText = (IEText) queryObject;
045                    
046                    Set<WeightedString> caseSet = new HashSet<WeightedString>();
047                    Set<WeightedString> querySet = new HashSet<WeightedString>();
048                    
049                    TextualSimUtils.expandTokensSet(caseText.getAllTokens(), queryText.getAllTokens(), caseSet, querySet);
050    
051                    Set<WeightedString> union = new HashSet<WeightedString>(caseSet);
052                    union.addAll(querySet);
053                    double unionSize = TextualSimUtils.getSize(union);
054                    
055                    
056                    caseSet.retainAll(querySet);
057                    double intersectionSize = TextualSimUtils.getSize(caseSet);
058                    
059                    
060                    return intersectionSize / unionSize;
061                    
062            }
063    
064    
065            
066                /* (non-Javadoc)
067                 * @see jcolibri.method.retrieve.NNretrieval.similarity.LocalSimilarityFunction#isApplicable(java.lang.Object, java.lang.Object)
068                 */
069                public boolean isApplicable(Object o1, Object o2)
070                {
071                    if((o1==null)&&(o2==null))
072                            return true;
073                    else if(o1==null)
074                            return o2 instanceof IEText;
075                    else if(o2==null)
076                            return o1 instanceof IEText;
077                    else
078                            return (o1 instanceof IEText)&&(o2 instanceof IEText);
079                }
080    }