001 package jcolibri.method.retrieve.NNretrieval.similarity.local.textual; 002 003 import java.util.HashSet; 004 import java.util.Set; 005 006 import jcolibri.exception.NoApplicableSimilarityFunctionException; 007 import jcolibri.extensions.textual.IE.representation.IEText; 008 import jcolibri.method.retrieve.NNretrieval.similarity.LocalSimilarityFunction; 009 import jcolibri.method.retrieve.NNretrieval.similarity.local.textual.TextualSimUtils.WeightedString; 010 011 /** 012 * Overlap Coefficient Similarity. 013 * <p> 014 * This function computes: |intersection(o1,o2)| / min(|o1|,|o2|). 015 * </p> 016 * <p>It is applicable to any Text object.</p> 017 * <p> 018 * Developed at: Robert Gordon University - Aberdeen & Facultad Informática, 019 * Universidad Complutense de Madrid (GAIA) 020 * </p> 021 * 022 * @author Juan Antonio Recio García 023 * @version 2.0 024 */ 025 public class OverlapCoefficient implements LocalSimilarityFunction { 026 027 /** 028 * Applies the similarity 029 * @param caseObject 030 * IEText 031 * @param queryObject 032 * IEText 033 * @return the result of the similarity function 034 */ 035 public double compute(Object caseObject, Object queryObject) throws NoApplicableSimilarityFunctionException{ 036 if ((caseObject == null) || (queryObject == null)) 037 return 0; 038 if (!(caseObject instanceof IEText)) 039 throw new jcolibri.exception.NoApplicableSimilarityFunctionException(this.getClass(), caseObject.getClass()); 040 if (!(queryObject instanceof IEText)) 041 throw new jcolibri.exception.NoApplicableSimilarityFunctionException(this.getClass(), queryObject.getClass()); 042 043 IEText caseText = (IEText) caseObject; 044 IEText queryText = (IEText) queryObject; 045 046 Set<WeightedString> caseSet = new HashSet<WeightedString>(); 047 Set<WeightedString> querySet = new HashSet<WeightedString>(); 048 049 TextualSimUtils.expandTokensSet(caseText.getAllTokens(), queryText.getAllTokens(), caseSet, querySet); 050 051 double size1 = TextualSimUtils.getSize(caseSet); 052 double size2 = TextualSimUtils.getSize(querySet); 053 054 double minSize = Math.min(size1, size2); 055 056 caseSet.retainAll(querySet); 057 double intersectionSize = TextualSimUtils.getSize(caseSet); 058 059 return intersectionSize / minSize; 060 061 } 062 063 064 065 /* (non-Javadoc) 066 * @see jcolibri.method.retrieve.NNretrieval.similarity.LocalSimilarityFunction#isApplicable(java.lang.Object, java.lang.Object) 067 */ 068 public boolean isApplicable(Object o1, Object o2) 069 { 070 if((o1==null)&&(o2==null)) 071 return true; 072 else if(o1==null) 073 return o2 instanceof IEText; 074 else if(o2==null) 075 return o1 instanceof IEText; 076 else 077 return (o1 instanceof IEText)&&(o2 instanceof IEText); 078 } 079 080 081 }