001 /** 002 * LuceneRetrieval.java 003 * jCOLIBRI2 framework. 004 * @author Juan A. Recio-García. 005 * GAIA - Group for Artificial Intelligence Applications 006 * http://gaia.fdi.ucm.es 007 * 09/04/2007 008 */ 009 package jcolibri.method.retrieve.LuceneRetrieval; 010 011 import java.util.ArrayList; 012 import java.util.Collection; 013 014 import jcolibri.casebase.IDIndexedLinealCaseBase; 015 import jcolibri.cbrcore.Attribute; 016 import jcolibri.cbrcore.CBRCase; 017 import jcolibri.cbrcore.CBRCaseBase; 018 import jcolibri.cbrcore.CBRQuery; 019 import jcolibri.datatypes.Text; 020 import jcolibri.extensions.textual.lucene.LuceneIndex; 021 import jcolibri.extensions.textual.lucene.LuceneSearchResult; 022 import jcolibri.extensions.textual.lucene.LuceneSearcher; 023 import jcolibri.method.retrieve.RetrievalResult; 024 025 /** 026 * Method to retrieve cases using Lucene to compute the similarity with the query. 027 * @author Juanan 028 * 029 */ 030 public class LuceneRetrieval { 031 032 /** 033 * This method retrieves cases using Lucene to compute the similarity with the query. 034 * It requires a LuceneIndex created with the LuceneIndexCreator method. 035 * @param casebase containing the cases 036 * @param query to compute the similarity with 037 * @param index precalculated lucene index 038 * @param searchField to invoke lucene (this attribute must be Text typed) 039 * @param normalized indicates if the results must be normalized to [0..1] 040 * @param k max number of retrieved cases 041 * @see jcolibri.method.precycle.LuceneIndexCreator 042 */ 043 public static Collection<RetrievalResult> LuceneRetrieve(CBRCaseBase casebase, CBRQuery query, LuceneIndex index, Attribute searchField, boolean normalized, int k) 044 { 045 ArrayList<RetrievalResult> res = new ArrayList<RetrievalResult>(); 046 Object queryString = jcolibri.util.AttributeUtils.findValue(searchField, query); 047 if(!(queryString instanceof Text)) 048 { 049 org.apache.commons.logging.LogFactory.getLog(LuceneRetrieval.class).error("Search field has not a Text value. Returning empty RetrievalResult list."); 050 return res; 051 } 052 Text qs = (Text)queryString; 053 String sf = searchField.getName(); 054 LuceneSearchResult lsr = LuceneSearcher.search(index, qs.toString(), sf); 055 056 int max = lsr.getResultLength(); 057 if(k < max) 058 max = k; 059 060 for(int i=0; i<max; i++) 061 res.add(new RetrievalResult(findCase(casebase, lsr.getDocAt(i).getDocID()), new Double(lsr.getDocScore(i, normalized)))); 062 return res; 063 } 064 065 private static CBRCase findCase(CBRCaseBase casebase, String descriptionID) 066 { 067 if(casebase instanceof IDIndexedLinealCaseBase) // O(1) 068 { 069 IDIndexedLinealCaseBase cb = (IDIndexedLinealCaseBase)casebase; 070 return cb.getCase(descriptionID); 071 } 072 else // O(n) 073 { 074 for(CBRCase c: casebase.getCases()) 075 { 076 try { 077 Object descIDObj = c.getDescription().getIdAttribute().getValue(c.getDescription()); 078 String descID = (String)descIDObj; 079 if(descID.equals(descriptionID)) 080 return c; 081 } catch (Exception e) { 082 org.apache.commons.logging.LogFactory.getLog(LuceneRetrieval.class).error(e); 083 } 084 085 } 086 } 087 return null; 088 } 089 }