001 package jcolibri.method.retrieve.NNretrieval.similarity.local.textual; 002 003 import java.util.HashSet; 004 import java.util.Set; 005 006 import jcolibri.exception.NoApplicableSimilarityFunctionException; 007 import jcolibri.extensions.textual.IE.representation.IEText; 008 import jcolibri.method.retrieve.NNretrieval.similarity.LocalSimilarityFunction; 009 import jcolibri.method.retrieve.NNretrieval.similarity.local.textual.TextualSimUtils.WeightedString; 010 011 /** 012 * Jaccard Coefficient Similarity. 013 * <p> 014 * This function computes: |intersection(o1,o2)| / |union(o1,o2)|. 015 * </p> 016 * <p>It is applicable to any Text object.</p> 017 * <p> 018 * Developed at: Robert Gordon University - Aberdeen & Facultad Informática, 019 * Universidad Complutense de Madrid (GAIA) 020 * </p> 021 * 022 * @author Juan Antonio Recio García 023 * @version 2.0 024 */ 025 public class JaccardCoefficient implements LocalSimilarityFunction { 026 027 /** 028 * Applies the similarity 029 * @param caseObject 030 * IEText 031 * @param queryObject 032 * IEText 033 * @return the result of the similarity function 034 */ 035 public double compute(Object caseObject, Object queryObject) throws NoApplicableSimilarityFunctionException{ 036 if ((caseObject == null) || (queryObject == null)) 037 return 0; 038 if (!(caseObject instanceof IEText)) 039 throw new jcolibri.exception.NoApplicableSimilarityFunctionException(this.getClass(), caseObject.getClass()); 040 if (!(queryObject instanceof IEText)) 041 throw new jcolibri.exception.NoApplicableSimilarityFunctionException(this.getClass(), queryObject.getClass()); 042 043 IEText caseText = (IEText) caseObject; 044 IEText queryText = (IEText) queryObject; 045 046 Set<WeightedString> caseSet = new HashSet<WeightedString>(); 047 Set<WeightedString> querySet = new HashSet<WeightedString>(); 048 049 TextualSimUtils.expandTokensSet(caseText.getAllTokens(), queryText.getAllTokens(), caseSet, querySet); 050 051 Set<WeightedString> union = new HashSet<WeightedString>(caseSet); 052 union.addAll(querySet); 053 double unionSize = TextualSimUtils.getSize(union); 054 055 056 caseSet.retainAll(querySet); 057 double intersectionSize = TextualSimUtils.getSize(caseSet); 058 059 060 return intersectionSize / unionSize; 061 062 } 063 064 065 066 /* (non-Javadoc) 067 * @see jcolibri.method.retrieve.NNretrieval.similarity.LocalSimilarityFunction#isApplicable(java.lang.Object, java.lang.Object) 068 */ 069 public boolean isApplicable(Object o1, Object o2) 070 { 071 if((o1==null)&&(o2==null)) 072 return true; 073 else if(o1==null) 074 return o2 instanceof IEText; 075 else if(o2==null) 076 return o1 instanceof IEText; 077 else 078 return (o1 instanceof IEText)&&(o2 instanceof IEText); 079 } 080 }