001 /** 002 * LuceneRetrieval.java 003 * jCOLIBRI2 framework. 004 * @author Juan A. Recio-García. 005 * GAIA - Group for Artificial Intelligence Applications 006 * http://gaia.fdi.ucm.es 007 * 09/04/2007 008 */ 009 package jcolibri.method.retrieve; 010 011 import java.util.ArrayList; 012 import java.util.Collection; 013 014 import jcolibri.casebase.IDIndexedLinealCaseBase; 015 import jcolibri.cbrcore.Attribute; 016 import jcolibri.cbrcore.CBRCase; 017 import jcolibri.cbrcore.CBRCaseBase; 018 import jcolibri.cbrcore.CBRQuery; 019 import jcolibri.datatypes.Text; 020 import jcolibri.extensions.textual.lucene.LuceneIndex; 021 import jcolibri.extensions.textual.lucene.LuceneSearchResult; 022 import jcolibri.extensions.textual.lucene.LuceneSearcher; 023 024 /** 025 * Method to retrieve cases using Lucene to compute the similarity with the query. 026 * @author Juanan 027 * 028 */ 029 public class LuceneRetrieval { 030 031 /** 032 * This method retrieves cases using Lucene to compute the similarity with the query. 033 * It requires a LuceneIndex created with the LuceneIndexCreator method. 034 * @param casebase containing the cases 035 * @param query to compute the similarity with 036 * @param index precalculated lucene index 037 * @param searchField to invoke lucene (this attribute must be Text typed) 038 * @param normalized indicates if the results must be normalized to [0..1] 039 * @param k max number of retrieved cases 040 * @see jcolibri.method.precycle.LuceneIndexCreator 041 */ 042 public static Collection<RetrievalResult> LuceneRetrieve(CBRCaseBase casebase, CBRQuery query, LuceneIndex index, Attribute searchField, boolean normalized, int k) 043 { 044 ArrayList<RetrievalResult> res = new ArrayList<RetrievalResult>(); 045 Object queryString = jcolibri.util.AttributeUtils.findValue(searchField, query); 046 if(!(queryString instanceof Text)) 047 { 048 org.apache.commons.logging.LogFactory.getLog(LuceneRetrieval.class).error("Search field has not a Text value. Returning empty RetrievalResult list."); 049 return res; 050 } 051 Text qs = (Text)queryString; 052 String sf = searchField.getName(); 053 LuceneSearchResult lsr = LuceneSearcher.search(index, qs.toString(), sf); 054 055 int max = lsr.getResultLength(); 056 if(k < max) 057 max = k; 058 059 for(int i=0; i<max; i++) 060 res.add(new RetrievalResult(findCase(casebase, lsr.getDocAt(i).getDocID()), new Double(lsr.getDocScore(i, normalized)))); 061 return res; 062 } 063 064 private static CBRCase findCase(CBRCaseBase casebase, String descriptionID) 065 { 066 if(casebase instanceof IDIndexedLinealCaseBase) // O(1) 067 { 068 IDIndexedLinealCaseBase cb = (IDIndexedLinealCaseBase)casebase; 069 return cb.getCase(descriptionID); 070 } 071 else // O(n) 072 { 073 for(CBRCase c: casebase.getCases()) 074 { 075 try { 076 Object descIDObj = c.getDescription().getIdAttribute().getValue(c.getDescription()); 077 String descID = (String)descIDObj; 078 if(descID.equals(descriptionID)) 079 return c; 080 } catch (Exception e) { 081 org.apache.commons.logging.LogFactory.getLog(LuceneRetrieval.class).error(e); 082 } 083 084 } 085 } 086 return null; 087 } 088 }