001    /**
002     * LuceneRetrieval.java
003     * jCOLIBRI2 framework. 
004     * @author Juan A. Recio-García.
005     * GAIA - Group for Artificial Intelligence Applications
006     * http://gaia.fdi.ucm.es
007     * 09/04/2007
008     */
009    package jcolibri.method.retrieve.LuceneRetrieval;
010    
011    import java.util.ArrayList;
012    import java.util.Collection;
013    
014    import jcolibri.casebase.IDIndexedLinealCaseBase;
015    import jcolibri.cbrcore.Attribute;
016    import jcolibri.cbrcore.CBRCase;
017    import jcolibri.cbrcore.CBRCaseBase;
018    import jcolibri.cbrcore.CBRQuery;
019    import jcolibri.datatypes.Text;
020    import jcolibri.extensions.textual.lucene.LuceneIndex;
021    import jcolibri.extensions.textual.lucene.LuceneSearchResult;
022    import jcolibri.extensions.textual.lucene.LuceneSearcher;
023    import jcolibri.method.retrieve.RetrievalResult;
024    
025    /**
026     * Method to retrieve cases using Lucene to compute the similarity with the query.
027     * @author Juanan
028     *
029     */
030    public class LuceneRetrieval {
031    
032            /**
033             * This method retrieves cases using Lucene to compute the similarity with the query.
034             * It requires a LuceneIndex created with the LuceneIndexCreator method.
035             * @param casebase containing the cases
036             * @param query to compute the similarity with
037             * @param index precalculated lucene index
038             * @param searchField to invoke lucene (this attribute must be Text typed)
039             * @param normalized indicates if the results must be normalized to [0..1]
040             * @param k max number of retrieved cases
041             * @see jcolibri.method.precycle.LuceneIndexCreator
042             */
043            public static Collection<RetrievalResult> LuceneRetrieve(CBRCaseBase casebase, CBRQuery query, LuceneIndex index, Attribute searchField, boolean normalized, int k)
044            {
045                    ArrayList<RetrievalResult> res = new ArrayList<RetrievalResult>();
046                    Object queryString = jcolibri.util.AttributeUtils.findValue(searchField, query);
047                    if(!(queryString instanceof Text))
048                    {
049                            org.apache.commons.logging.LogFactory.getLog(LuceneRetrieval.class).error("Search field has not a Text value. Returning empty RetrievalResult list.");
050                            return res;
051                    }
052                    Text qs = (Text)queryString;
053                    String sf = searchField.getName();
054                    LuceneSearchResult lsr = LuceneSearcher.search(index, qs.toString(), sf);
055                    
056                    int max = lsr.getResultLength();
057                    if(k < max)
058                            max = k;
059                    
060                    for(int i=0; i<max; i++)
061                            res.add(new RetrievalResult(findCase(casebase, lsr.getDocAt(i).getDocID()), new Double(lsr.getDocScore(i, normalized))));
062                    return res;
063            }
064            
065            private static CBRCase findCase(CBRCaseBase casebase, String descriptionID)
066            {
067                    if(casebase instanceof IDIndexedLinealCaseBase) // O(1)
068                    {
069                            IDIndexedLinealCaseBase cb = (IDIndexedLinealCaseBase)casebase;
070                            return cb.getCase(descriptionID);
071                    }
072                    else // O(n)
073                    {
074                            for(CBRCase c: casebase.getCases())
075                            {
076                                    try {
077                                            Object descIDObj = c.getDescription().getIdAttribute().getValue(c.getDescription());
078                                            String descID = (String)descIDObj;
079                                            if(descID.equals(descriptionID))
080                                                    return c;
081                                    } catch (Exception e) {
082                                            org.apache.commons.logging.LogFactory.getLog(LuceneRetrieval.class).error(e);
083                                    }
084                                    
085                            }
086                    }
087                    return null;
088            }
089    }