001    package jcolibri.method.retrieve.NNretrieval.similarity.local.textual;
002    
003    import java.util.HashSet;
004    import java.util.Set;
005    
006    import jcolibri.exception.NoApplicableSimilarityFunctionException;
007    import jcolibri.extensions.textual.IE.representation.IEText;
008    import jcolibri.method.retrieve.NNretrieval.similarity.LocalSimilarityFunction;
009    import jcolibri.method.retrieve.NNretrieval.similarity.local.textual.TextualSimUtils.WeightedString;
010    
011    /**
012     * Overlap Coefficient Similarity.
013     * <p>
014     * This function computes: |intersection(o1,o2)| / min(|o1|,|o2|).
015     * </p>
016     * <p>It is applicable to any Text object.</p>
017     * <p>
018     * Developed at: Robert Gordon University - Aberdeen & Facultad Informática,
019     * Universidad Complutense de Madrid (GAIA)
020     * </p>
021     * 
022     * @author Juan Antonio Recio García
023     * @version 2.0
024     */
025    public class OverlapCoefficient implements LocalSimilarityFunction {
026        
027            /**
028             * Applies the similarity
029             * @param caseObject
030             *            IEText
031             * @param queryObject
032             *            IEText
033             * @return the result of the similarity function
034             */ 
035            public double compute(Object caseObject, Object queryObject) throws NoApplicableSimilarityFunctionException{
036                    if ((caseObject == null) || (queryObject == null))
037                            return 0;
038                    if (!(caseObject instanceof IEText))
039                            throw new jcolibri.exception.NoApplicableSimilarityFunctionException(this.getClass(), caseObject.getClass());
040                    if (!(queryObject instanceof IEText))
041                            throw new jcolibri.exception.NoApplicableSimilarityFunctionException(this.getClass(), queryObject.getClass());
042    
043                    IEText caseText = (IEText) caseObject;
044                    IEText queryText = (IEText) queryObject;
045                    
046                    Set<WeightedString> caseSet = new HashSet<WeightedString>();
047                    Set<WeightedString> querySet = new HashSet<WeightedString>();
048                    
049                    TextualSimUtils.expandTokensSet(caseText.getAllTokens(), queryText.getAllTokens(), caseSet, querySet);
050                    
051                    double size1 = TextualSimUtils.getSize(caseSet);
052                    double size2 = TextualSimUtils.getSize(querySet);
053                    
054                    double minSize = Math.min(size1, size2);
055                    
056                    caseSet.retainAll(querySet);
057                    double intersectionSize = TextualSimUtils.getSize(caseSet);
058                    
059                    return intersectionSize / minSize;
060                    
061            }
062    
063    
064            
065                /* (non-Javadoc)
066                 * @see jcolibri.method.retrieve.NNretrieval.similarity.LocalSimilarityFunction#isApplicable(java.lang.Object, java.lang.Object)
067                 */
068                public boolean isApplicable(Object o1, Object o2)
069                {
070                    if((o1==null)&&(o2==null))
071                            return true;
072                    else if(o1==null)
073                            return o2 instanceof IEText;
074                    else if(o2==null)
075                            return o1 instanceof IEText;
076                    else
077                            return (o1 instanceof IEText)&&(o2 instanceof IEText);
078                }
079    
080    
081    }