001    /**
002     * GreedySelection.java
003     * jCOLIBRI2 framework. 
004     * @author Juan A. Recio-García.
005     * GAIA - Group for Artificial Intelligence Applications
006     * http://gaia.fdi.ucm.es
007     * 05/11/2007
008     */
009    package jcolibri.method.retrieve.selection.diversity;
010    
011    import java.util.ArrayList;
012    import java.util.Collection;
013    
014    import jcolibri.cbrcore.CBRCase;
015    import jcolibri.cbrcore.CBRQuery;
016    import jcolibri.method.retrieve.RetrievalResult;
017    import jcolibri.method.retrieve.NNretrieval.NNConfig;
018    import jcolibri.method.retrieve.NNretrieval.NNScoringMethod;
019    import jcolibri.method.retrieve.selection.SelectCases;
020    
021    /**
022     * This method incrementally builds a retrieval set, R. During each step the
023     * remaining cases are ordered according to their quality with the highest 
024     * quality case added to R. 
025     * <br>
026     * The quality metric combines diversity and similarity. The quality of a case c 
027     * is proportional to the similarity between c and the query, and to the diversity
028     * of c relative to those cases so far selected in R.
029     * <br>
030     * This algorithm is very expensive. It should be applied to small case bases. 
031     * <p>See:
032     * <p>
033     * B. Smyth and P. McClave. Similarity vs. diversity. In ICCBR '01: Proceedings 
034     * of the 4th International Conference on Case-Based Reasoning, pages 347-361, 
035     * London, UK, 2001. Springer-Verlag.
036     * 
037     * 
038     * @author Juan A. Recio-Garcia
039     * @author Developed at University College Cork (Ireland) in collaboration with Derek Bridge.
040     * @version 1.0
041     *
042     */
043    public class GreedySelection
044    {
045        /******************************************************************************/
046        /**                           STATIC METHODS                                 **/
047        /******************************************************************************/   
048        
049        
050        /**
051         * Executes the greedy selection algorithm
052         * @param cases to select from
053         * @param query to compare
054         * @param simConfig is the knn similarity configuration. Its k determines the number of returned cases 
055         * @return k cases (k is defined in simConfig). 
056         */
057        public static Collection<CBRCase> greedySelection(Collection<RetrievalResult> cases, CBRQuery query, NNConfig simConfig, int k)
058        {
059            Collection<CBRCase> C = SelectCases.selectAll(cases);
060            Collection<CBRCase> R = new ArrayList<CBRCase>();
061            for(int i=0; i<k; i++)
062            {
063                CBRCase best = getMoreQuality(query, C, R, simConfig);
064                R.add(best);
065                C.remove(best);
066            }
067            return R;
068        }
069        
070        /**
071         * Returns the case with more quality.
072         * @param query to compare
073         * @param cases to compare
074         * @param R is the set of previous selected cases
075         * @param simConfig is the knn similarity config
076         * @return the case with more quality
077         */
078        public static CBRCase getMoreQuality(CBRQuery query, Collection<CBRCase> cases, Collection<CBRCase> R, NNConfig simConfig)
079        {
080            
081            Collection<RetrievalResult> nn = NNScoringMethod.evaluateSimilarity(cases, query, simConfig);
082            
083            CBRCase best = null;
084            double maxQuality = -Double.MAX_VALUE;
085            for(RetrievalResult rr: nn)
086            {
087                double quality = rr.getEval() * relDiversity(rr.get_case(), R, simConfig);
088                if(quality > maxQuality)
089                {
090                    maxQuality = quality;
091                    best = rr.get_case();
092                }
093            }
094            return best;
095        }
096        
097        public static double relDiversity(CBRCase c, Collection<CBRCase> R, NNConfig simConfig)
098        {
099            if(R.isEmpty())
100                return 1;
101            double sum = 0;
102            for(CBRCase _case : R)
103            {
104                Collection<CBRCase> aux = new ArrayList<CBRCase>();
105                aux.add(_case);
106                CBRQuery query = new CBRQuery();
107                query.setDescription(c.getDescription());
108                double sim = NNScoringMethod.evaluateSimilarity(aux, query, simConfig).iterator().next().getEval();
109                sum += sim;
110            }
111            sum = sum/(double)R.size();
112            
113            return sum;
114        }
115    }