001 /** 002 * LuceneTextSimilarity.java 003 * jCOLIBRI2 framework. 004 * @author Juan A. Recio-García. 005 * GAIA - Group for Artificial Intelligence Applications 006 * http://gaia.fdi.ucm.es 007 * 25/06/2007 008 */ 009 package jcolibri.method.retrieve.NNretrieval.similarity.local.textual; 010 011 import jcolibri.cbrcore.Attribute; 012 import jcolibri.cbrcore.CBRQuery; 013 import jcolibri.datatypes.Text; 014 import jcolibri.exception.NoApplicableSimilarityFunctionException; 015 import jcolibri.extensions.textual.lucene.LuceneIndex; 016 import jcolibri.extensions.textual.lucene.LuceneSearchResult; 017 import jcolibri.extensions.textual.lucene.LuceneSearcher; 018 import jcolibri.method.retrieve.LuceneRetrieval.LuceneRetrieval; 019 import jcolibri.method.retrieve.NNretrieval.similarity.InContextLocalSimilarityFunction; 020 021 /** 022 * Computes the similarity between two texts using Lucene. 023 * <br> 024 * It is applicable to any Text object. 025 * <br> 026 * Requires the previous execution of the method jcolibri.method.precycle.LuceneIndexCreator to create 027 * a LuceneIndex. 028 * <br> 029 * Test 13 shows how to use this similarity measure. 030 * 031 * @author Juan A. Recio-Garcia 032 * @version 1.0 033 * @see jcolibri.datatypes.Text 034 * @see jcolibri.method.precycle.LuceneIndexCreator 035 * @see jcolibri.test.test13.Test13b 036 */ 037 public class LuceneTextSimilarity extends InContextLocalSimilarityFunction 038 { 039 LuceneSearchResult lsr = null; 040 boolean normalized = false; 041 042 /** 043 * Creates a LuceneTextSimilarity object. This constructor pre-computes the similarity of the query with 044 * the textaul attributes of the case (as these attributes are in the index). 045 * @param index Index that contains the attributes of the case 046 * @param query query that will be compared 047 * @param at textual attribute of the case or query object that is being compared 048 * @param normalized if the Lucene result must be normalized to [0..1] 049 */ 050 public LuceneTextSimilarity(LuceneIndex index, CBRQuery query, Attribute at, boolean normalized) 051 { 052 this.normalized = normalized; 053 Object queryString = jcolibri.util.AttributeUtils.findValue(at, query); 054 if(!(queryString instanceof Text)) 055 { 056 org.apache.commons.logging.LogFactory.getLog(LuceneRetrieval.class).error("Search field has not a Text value. Returning empty RetrievalResult list."); 057 return; 058 } 059 Text qs = (Text)queryString; 060 String sf = at.getName(); 061 lsr = LuceneSearcher.search(index, qs.toString(), sf); 062 063 } 064 065 /* (non-Javadoc) 066 * @see jcolibri.method.retrieve.NNretrieval.similarity.LocalSimilarityFunction#compute(java.lang.Object, java.lang.Object) 067 */ 068 public double compute(Object caseObject, Object queryObject) throws NoApplicableSimilarityFunctionException 069 { 070 if ((caseObject == null) || (queryObject == null)) 071 return 0; 072 if (!(caseObject instanceof Text)) 073 throw new jcolibri.exception.NoApplicableSimilarityFunctionException(this.getClass(), caseObject.getClass()); 074 if (!(queryObject instanceof Text)) 075 throw new jcolibri.exception.NoApplicableSimilarityFunctionException(this.getClass(), queryObject.getClass()); 076 077 return lsr.getDocScore(_case.getID().toString(), normalized); 078 } 079 080 /* (non-Javadoc) 081 * @see jcolibri.method.retrieve.NNretrieval.similarity.LocalSimilarityFunction#isApplicable(java.lang.Object, java.lang.Object) 082 */ 083 public boolean isApplicable(Object o1, Object o2) 084 { 085 if((o1==null)&&(o2==null)) 086 return true; 087 else if(o1==null) 088 return o2 instanceof Text; 089 else if(o2==null) 090 return o1 instanceof Text; 091 else 092 return (o1 instanceof Text)&&(o2 instanceof Text); 093 } 094 095 }