001    /**
002     * LuceneRetrieval.java
003     * jCOLIBRI2 framework. 
004     * @author Juan A. Recio-García.
005     * GAIA - Group for Artificial Intelligence Applications
006     * http://gaia.fdi.ucm.es
007     * 09/04/2007
008     */
009    package jcolibri.method.retrieve;
010    
011    import java.util.ArrayList;
012    import java.util.Collection;
013    
014    import jcolibri.casebase.IDIndexedLinealCaseBase;
015    import jcolibri.cbrcore.Attribute;
016    import jcolibri.cbrcore.CBRCase;
017    import jcolibri.cbrcore.CBRCaseBase;
018    import jcolibri.cbrcore.CBRQuery;
019    import jcolibri.datatypes.Text;
020    import jcolibri.extensions.textual.lucene.LuceneIndex;
021    import jcolibri.extensions.textual.lucene.LuceneSearchResult;
022    import jcolibri.extensions.textual.lucene.LuceneSearcher;
023    
024    /**
025     * Method to retrieve cases using Lucene to compute the similarity with the query.
026     * @author Juanan
027     *
028     */
029    public class LuceneRetrieval {
030    
031            /**
032             * This method retrieves cases using Lucene to compute the similarity with the query.
033             * It requires a LuceneIndex created with the LuceneIndexCreator method.
034             * @param casebase containing the cases
035             * @param query to compute the similarity with
036             * @param index precalculated lucene index
037             * @param searchField to invoke lucene (this attribute must be Text typed)
038             * @param normalized indicates if the results must be normalized to [0..1]
039             * @param k max number of retrieved cases
040             * @see jcolibri.method.precycle.LuceneIndexCreator
041             */
042            public static Collection<RetrievalResult> LuceneRetrieve(CBRCaseBase casebase, CBRQuery query, LuceneIndex index, Attribute searchField, boolean normalized, int k)
043            {
044                    ArrayList<RetrievalResult> res = new ArrayList<RetrievalResult>();
045                    Object queryString = jcolibri.util.AttributeUtils.findValue(searchField, query);
046                    if(!(queryString instanceof Text))
047                    {
048                            org.apache.commons.logging.LogFactory.getLog(LuceneRetrieval.class).error("Search field has not a Text value. Returning empty RetrievalResult list.");
049                            return res;
050                    }
051                    Text qs = (Text)queryString;
052                    String sf = searchField.getName();
053                    LuceneSearchResult lsr = LuceneSearcher.search(index, qs.toString(), sf);
054                    
055                    int max = lsr.getResultLength();
056                    if(k < max)
057                            max = k;
058                    
059                    for(int i=0; i<max; i++)
060                            res.add(new RetrievalResult(findCase(casebase, lsr.getDocAt(i).getDocID()), new Double(lsr.getDocScore(i, normalized))));
061                    return res;
062            }
063            
064            private static CBRCase findCase(CBRCaseBase casebase, String descriptionID)
065            {
066                    if(casebase instanceof IDIndexedLinealCaseBase) // O(1)
067                    {
068                            IDIndexedLinealCaseBase cb = (IDIndexedLinealCaseBase)casebase;
069                            return cb.getCase(descriptionID);
070                    }
071                    else // O(n)
072                    {
073                            for(CBRCase c: casebase.getCases())
074                            {
075                                    try {
076                                            Object descIDObj = c.getDescription().getIdAttribute().getValue(c.getDescription());
077                                            String descID = (String)descIDObj;
078                                            if(descID.equals(descriptionID))
079                                                    return c;
080                                    } catch (Exception e) {
081                                            org.apache.commons.logging.LogFactory.getLog(LuceneRetrieval.class).error(e);
082                                    }
083                                    
084                            }
085                    }
086                    return null;
087            }
088    }