001 package jcolibri.method.retrieve.NNretrieval.similarity.local.textual; 002 003 import java.util.HashSet; 004 import java.util.Set; 005 006 import jcolibri.exception.NoApplicableSimilarityFunctionException; 007 import jcolibri.extensions.textual.IE.representation.IEText; 008 import jcolibri.method.retrieve.NNretrieval.similarity.LocalSimilarityFunction; 009 import jcolibri.method.retrieve.NNretrieval.similarity.local.textual.TextualSimUtils.WeightedString; 010 011 012 /** 013 * Cossine Coefficient Similarity. 014 * <p> 015 * This function computes: |intersection(o1,o2)| / (sqrt(|o1|)*sqrt(|o2|)). 016 * </p> 017 * <p>It is applicable to any Text object.</p> 018 * <p> 019 * Developed at: Robert Gordon University - Aberdeen & Facultad Informática, 020 * Universidad Complutense de Madrid (GAIA) 021 * </p> 022 * 023 * @author Juan Antonio Recio García 024 * @version 2.0 025 */ 026 public class CosineCoefficient implements LocalSimilarityFunction { 027 028 /** 029 * Applies the similarity 030 * @param caseObject 031 * IEText 032 * @param queryObject 033 * IEText 034 * @return the result of the similarity function 035 */ 036 public double compute(Object caseObject, Object queryObject) throws NoApplicableSimilarityFunctionException{ 037 if ((caseObject == null) || (queryObject == null)) 038 return 0; 039 if (!(caseObject instanceof IEText)) 040 throw new jcolibri.exception.NoApplicableSimilarityFunctionException(this.getClass(), caseObject.getClass()); 041 if (!(queryObject instanceof IEText)) 042 throw new jcolibri.exception.NoApplicableSimilarityFunctionException(this.getClass(), queryObject.getClass()); 043 044 IEText caseText = (IEText) caseObject; 045 IEText queryText = (IEText) queryObject; 046 047 Set<WeightedString> caseSet = new HashSet<WeightedString>(); 048 Set<WeightedString> querySet = new HashSet<WeightedString>(); 049 050 TextualSimUtils.expandTokensSet(caseText.getAllTokens(), queryText.getAllTokens(), caseSet, querySet); 051 052 double size1 = TextualSimUtils.getSize(caseSet); 053 double size2 = TextualSimUtils.getSize(querySet); 054 055 caseSet.retainAll(querySet); 056 double intersectionSize = TextualSimUtils.getSize(caseSet); 057 058 return intersectionSize / (Math.sqrt(size1)*Math.sqrt(size2)); 059 } 060 061 062 063 /* (non-Javadoc) 064 * @see jcolibri.method.retrieve.NNretrieval.similarity.LocalSimilarityFunction#isApplicable(java.lang.Object, java.lang.Object) 065 */ 066 public boolean isApplicable(Object o1, Object o2) 067 { 068 if((o1==null)&&(o2==null)) 069 return true; 070 else if(o1==null) 071 return o2 instanceof IEText; 072 else if(o2==null) 073 return o1 instanceof IEText; 074 else 075 return (o1 instanceof IEText)&&(o2 instanceof IEText); 076 } 077 078 }