001    /**
002     * SimilarityInfluence.java
003     * jCOLIBRI2 framework. 
004     * @author Juan A. Recio-García.
005     * GAIA - Group for Artificial Intelligence Applications
006     * http://gaia.fdi.ucm.es
007     * 30/10/2007
008     */
009    package jcolibri.extensions.recommendation.navigationByAsking;
010    
011    import java.util.ArrayList;
012    import java.util.Collection;
013    import java.util.HashSet;
014    import java.util.Hashtable;
015    
016    import jcolibri.cbrcore.Attribute;
017    import jcolibri.cbrcore.CBRCase;
018    import jcolibri.cbrcore.CBRQuery;
019    import jcolibri.exception.ExecutionException;
020    import jcolibri.method.retrieve.RetrievalResult;
021    import jcolibri.method.retrieve.NNretrieval.NNConfig;
022    import jcolibri.method.retrieve.NNretrieval.NNScoringMethod;
023    import jcolibri.util.AttributeUtils;
024    import jcolibri.util.CopyUtils;
025    import jcolibri.util.ProgressController;
026    
027    /**
028     * Selects the attribute that has the highest infuence on the KNN similarity.
029     * The inuence on the similarity can be measured by the expected variance
030     * of the similarities of a set of selected cases.
031     * 
032     * This method is not recommended with large case bases.
033     * 
034     * <p>See:
035     * <p>
036     * R. Bergmann. Experience Management: Foundations, Development Methodology, 
037     * and Internet-Based Applications. Springer-Verlag New York, Inc.,Secaucus,  
038     * NJ, USA, 2002.
039     * <p>
040     * A. Kohlmaier, S. Schmitt, and R. Bergmann. A similarity-based approach to
041     * attribute selection in user-adaptive sales dialogs. In D. W. Aha and I. Watson,
042     * editors, Proceedings of the 4th International Conference on Case-Based
043     * Reasoning, pages 306320, Seattle, Washington, 2001. Springer-Verlag.
044     * <p>
045     * S. Schmitt, P. Dopichaj, and P. Domínguez-Marín. Entropy-based vs.
046     * similarity-inuenced: Attribute selection methods for dialogs tested on different
047     * electronic commerce domains. In S. Craw and A. Preece, editors,
048     * Proceedings of the 6th European Conference on Case-Based Reasoning, pages
049     * 380-394, Aberdeen, Scotland, 2002. Springer-Verlag.
050     * 
051     * @author Juan A. Recio-Garcia
052     * @author Developed at University College Cork (Ireland) in collaboration with Derek Bridge.
053     * @version 1.0
054     *
055     */
056    public class SimilarityInfluence implements SelectAttributeMethod
057    {
058        private static ArrayList<Attribute> asked;
059        
060        /******************************************************************************/
061        /**                           STATIC METHODS                                 **/
062        /******************************************************************************/
063        
064        /**
065         * Selects the attribute with more expected influence in the NN scoring.
066         * @param cases Set of working cases
067         * @param query Query to compare with the cases
068         * @param simConfig is the NN similiarity configuration
069         * @param init indicates if this is the first time that the algorithm is executed.
070         * This way, in following iterations past chosen attributes are not computed.
071         * @return the selected attribute or null if there are not more attributes to ask.
072         */
073        public static Attribute getMoreSimVarAttribute(Collection<CBRCase> cases, CBRQuery query, NNConfig simConfig, boolean init) throws ExecutionException
074        {
075            if(init)
076                asked = new ArrayList<Attribute>();
077            if(asked ==null)
078                throw new ExecutionException("Similarity Influence method must be initialized each cycle");
079            CBRCase acase = cases.iterator().next();
080            Collection<Attribute> atts = AttributeUtils.getAttributes(acase.getDescription());
081            atts.remove(acase.getDescription().getIdAttribute());
082            
083            atts.removeAll(asked);
084            if(atts.isEmpty())
085            {
086                asked = new ArrayList<Attribute>();
087                atts = AttributeUtils.getAttributes(acase.getDescription());
088            }
089            
090            ProgressController.init(SimilarityInfluence.class,"Similarity Influence selection", ProgressController.UNKNOWN_STEPS);
091            System.out.println("Computing SimVar for "+cases.size()+" cases");
092            
093            double maxSimVar = 0;
094            Attribute maxSimVaratt = null;
095            for(Attribute a: atts)
096            {
097                double simVar = computeSimVar(a,cases,query,simConfig);
098                System.out.println("SimVar("+a.getName()+") = "+simVar);
099                if(simVar>maxSimVar)
100                {
101                    maxSimVar = simVar;
102                    maxSimVaratt = a;
103                }
104            }
105            
106            ProgressController.finish(SimilarityInfluence.class);
107            
108            asked.add(maxSimVaratt);
109            return maxSimVaratt;
110        }
111       
112        /**
113         * Computes the simVar of an attribute with respect to a set of cases and a query.
114         */
115        private static double computeSimVar(Attribute a, Collection<CBRCase> cases, CBRQuery query, NNConfig simConfig)
116        {
117            double Csize = cases.size();
118            
119            Hashtable<Object,HashSet<CBRCase>> clases = new Hashtable<Object,HashSet<CBRCase>>();
120            for(CBRCase c: cases)
121            {
122                Object value = AttributeUtils.findValue(a, c.getDescription());
123                HashSet<CBRCase> set = clases.get(value);
124                if(set==null)
125                {
126                    set = new HashSet<CBRCase>();
127                    clases.put(value, set);
128                }
129                set.add(c);
130            }
131            
132            int i=0;
133            double simVar = 0;
134            for(Object v : clases.keySet())
135            {
136                double pv = ((double)clases.get(v).size()) / Csize;
137    
138                CBRQuery newQuery = new CBRQuery();
139                newQuery.setDescription(CopyUtils.copyCaseComponent(query.getDescription()));
140                AttributeUtils.setValue(a, newQuery, v);
141                double var = computeVar(newQuery, cases, simConfig);
142                simVar += (pv * var);
143                i++;
144                ProgressController.step(SimilarityInfluence.class);
145    
146            }
147            
148            return simVar;
149        }
150        
151        /**
152         * Computes the Var formulae
153         */
154        private static double computeVar(CBRQuery query, Collection<CBRCase> cases, NNConfig simConfig)
155        {
156            Collection<RetrievalResult> sim = NNScoringMethod.evaluateSimilarity(cases, query, simConfig);
157            
158            double niu = 0;
159            for(RetrievalResult rr : sim)
160                niu += rr.getEval();
161            niu = niu / sim.size();
162            
163            double res = 0;
164            for(RetrievalResult rr : sim)
165                res += ( (rr.getEval()-niu)*(rr.getEval()-niu) );
166            
167            
168            return res / ((double)cases.size());
169        }
170    
171        /******************************************************************************/
172        /**                           OBJECT METHODS                                 **/
173        /******************************************************************************/
174    
175        /** KNN configuration */
176        private NNConfig simConfig;
177        
178        /**
179         * Constructor
180         * @param simConfig is the KNN configuration
181         */
182        public SimilarityInfluence(NNConfig simConfig)
183        {
184            this.simConfig = simConfig; 
185        }
186        
187        /**
188         * Selects the attribute to be asked
189         * @param cases list of working cases
190         * @param query is the current query
191         * @return selected attribute
192         * @throws ExecutionException
193         */
194        public Attribute getAttribute(Collection<CBRCase> cases, CBRQuery query) throws ExecutionException
195        {
196            return  getMoreSimVarAttribute(cases, query, simConfig, false);
197        }
198    }