001    package jcolibri.test.test13.similarity;
002    
003    import java.util.HashSet;
004    import java.util.Set;
005    import java.util.StringTokenizer;
006    
007    import jcolibri.exception.NoApplicableSimilarityFunctionException;
008    import jcolibri.method.retrieve.NNretrieval.similarity.LocalSimilarityFunction;
009    
010    /**
011     * This function returns a similarity value depending on the tokens (words) that
012     * appear in the query attribute and also are in the case attribute.
013     */
014    public class TokensContained implements LocalSimilarityFunction {
015    
016            /** Creates a new instance of TokensContained */
017            public TokensContained() {
018            }
019    
020            public double compute(Object caseObject, Object queryObject) throws NoApplicableSimilarityFunctionException
021            {
022                    if ((caseObject == null) || (queryObject == null))
023                            return 0;
024                    if (!(caseObject instanceof java.lang.String))
025                            throw new jcolibri.exception.NoApplicableSimilarityFunctionException(this.getClass(), caseObject.getClass());
026                    if (!(queryObject instanceof java.lang.String))
027                            throw new jcolibri.exception.NoApplicableSimilarityFunctionException(this.getClass(), queryObject.getClass());
028    
029    
030                    String caseS  = (String) caseObject;
031                    String queryS = (String) queryObject;
032                    
033                    Set<String> caseSet = new HashSet<String>();
034                    Set<String> querySet = new HashSet<String>();
035                    
036                    for(StringTokenizer ct = new StringTokenizer(caseS); ct.hasMoreTokens(); )
037                        caseSet.add(ct.nextToken());
038                    for(StringTokenizer ct = new StringTokenizer(queryS); ct.hasMoreTokens(); )
039                        querySet.add(ct.nextToken());                   
040                    
041                    double totalsize = caseSet.size();
042                    caseSet.retainAll(querySet);
043                    double foundsize = caseSet.size();
044                    
045                    return foundsize / totalsize;
046            }
047    
048            /** Applicable to String */
049            public boolean isApplicable(Object o1, Object o2)
050            {
051                    if((o1==null)&&(o2==null))
052                            return true;
053                    else if(o1==null)
054                            return o2 instanceof String;
055                    else if(o2==null)
056                            return o1 instanceof String;
057                    else
058                            return (o1 instanceof String)&&(o2 instanceof String);
059            }
060    
061    }