001    /**
002     * ExpertClerkMedianScoring.java
003     * jCOLIBRI2 framework. 
004     * @author Juan A. Recio-García.
005     * GAIA - Group for Artificial Intelligence Applications
006     * http://gaia.fdi.ucm.es
007     * 04/11/2007
008     */
009    package jcolibri.method.retrieve.DiverseByMedianRetrieval;
010    
011    import java.util.ArrayList;
012    import java.util.Collection;
013    import java.util.HashMap;
014    
015    import jcolibri.cbrcore.Attribute;
016    import jcolibri.cbrcore.CBRCase;
017    import jcolibri.cbrcore.CBRQuery;
018    import jcolibri.cbrcore.CaseComponent;
019    import jcolibri.method.retrieve.RetrievalResult;
020    import jcolibri.method.retrieve.NNretrieval.NNConfig;
021    import jcolibri.method.retrieve.NNretrieval.NNScoringMethod;
022    import jcolibri.util.AttributeUtils;
023    import jcolibri.util.CopyUtils;
024    
025    /**
026     * ExpertClerk Median algorithm. 
027     * This algorithm chooses the first case that is closed to the median of cases.
028     * Then the remaining are selected taking into account negative and possitive
029     * characteristics. A characteristic is an attribute that exceeds a predefined
030     * threshold. It is positive if is greater thatn the value of the median. And negative
031     * otherwise. The number of positive plus the negative characteristics is used
032     * to rank the cases and obtain the following k-1 cases.
033     * <p>See:
034     * <p>
035     * H. Shimazu. ExpertClerk: A Conversational Case-Based Reasoning Tool for 
036     * Developing Salesclerk Agents in E-Commerce Webshops. Artif. Intell. Rev., 
037     * 18(3-4):223-244, 2002.
038     * 
039     * @author Juan A. Recio-Garcia
040     * @author Developed at University College Cork (Ireland) in collaboration with Derek Bridge.
041     * @version 1.0
042     *
043     */
044    public class ExpertClerkMedianScoring
045    {
046        /******************************************************************************/
047        /**                           STATIC METHODS                                 **/
048        /******************************************************************************/    
049        
050        /**
051         * Returns diverse cases using the ExpertClerk median method.
052         * @param cases to retrieve from
053         * @param simConfig is the nn configuration
054         * @param thresholds to obtain the characteristics
055         * @return a collection of cases
056         */
057        @SuppressWarnings("unchecked")
058        public static Collection<RetrievalResult> getDiverseByMedian(Collection<CBRCase> cases, NNConfig simConfig, HashMap<Attribute,Double> thresholds)
059        {
060            CaseComponent median = calculateMedian(cases);
061            
062            CBRQuery query = new CBRQuery();
063            query.setDescription(median);
064            Collection<RetrievalResult> distancesToMedian = NNScoringMethod.evaluateSimilarity(cases,query,simConfig);
065            CBRCase first = distancesToMedian.iterator().next().get_case();
066            
067            ArrayList<RetrievalResult> characteristics = new ArrayList<RetrievalResult>();
068            double maxCharacteristics = AttributeUtils.getAttributes(cases.iterator().next().getDescription()).size();
069            for(CBRCase _case:cases)
070            {
071                if(_case.equals(first))
072                    continue;
073                int chars = computeCharacteristics(_case, median, simConfig, thresholds);
074                characteristics.add(new RetrievalResult(_case, ((double)chars)/maxCharacteristics));
075            }
076            characteristics.add(new RetrievalResult(first,1.0));
077            java.util.Collections.sort(characteristics);
078    
079            return characteristics;
080        }
081        
082        /**
083         * Computes the characteristics
084         */
085        private static int computeCharacteristics(CBRCase _case, CaseComponent median, NNConfig simConfig, HashMap<Attribute,Double> thresholds)
086        {
087            int characteristics = 0;
088            for(Attribute at: AttributeUtils.getAttributes(_case.getDescription()))
089            {
090                if(at.equals(_case.getDescription().getIdAttribute()))
091                    continue;
092                
093                Object value = AttributeUtils.findValue(at, _case.getDescription());
094                Object medValue = AttributeUtils.findValue(at, median);
095                
096                if((value==null)||(medValue==null))
097                    continue;
098                
099                if(value instanceof Number)
100                {
101                    double v = ((Number)value).doubleValue();
102                    double medV = ((Number)medValue).doubleValue();
103                    double ad = simConfig.getWeight(at) * (v - medV);
104                    
105                    Double threshold = thresholds.get(at);
106                    if(threshold == null)
107                        threshold = 0.5;
108                    if(Math.abs(ad) > threshold)  
109                            characteristics++;
110                }
111                else if(value instanceof Enum)
112                {
113                    double v = ((Enum)value).ordinal();
114                    double medV = ((Enum)medValue).ordinal();
115                    double ad = simConfig.getWeight(at) * (v - medV);
116                    
117                    Double threshold = thresholds.get(at);
118                    if(threshold == null)
119                        threshold = 1.0;
120                    if(Math.abs(ad) > threshold)   
121                            characteristics++;
122                }
123                else if(!value.equals(medValue))
124                    characteristics++;
125    
126            
127            }
128            return characteristics;
129        }
130        
131        /**
132         * Calculates the median
133         */
134        private static CaseComponent calculateMedian(Collection<CBRCase> cases)
135        {
136            HashMap<Attribute,HashMap<Object,Integer>> enumCount = new HashMap<Attribute, HashMap<Object,Integer>>();
137            HashMap<Attribute,Double> numValues = new HashMap<Attribute,Double>();
138            
139            for(CBRCase _case :cases)
140            {
141                for(Attribute at : AttributeUtils.getAttributes(_case.getDescription()))
142                {
143                    if(at.equals(_case.getDescription().getIdAttribute()))
144                        continue;
145                    Object value = AttributeUtils.findValue(at, _case.getDescription());
146                    if(value == null)
147                        continue;
148                    
149                    if(value instanceof Number)
150                    {
151                        Double sum = numValues.get(at);
152                        if(sum == null)
153                            numValues.put(at, ((Number)value).doubleValue());
154                        else
155                            numValues.put(at, sum+((Number)value).doubleValue());
156                    }
157                    else
158                    {
159                        HashMap<Object,Integer> enumValues = enumCount.get(at);
160                        if(enumValues == null)
161                        {
162                            enumValues = new HashMap<Object,Integer>();
163                            enumCount.put(at, enumValues);
164                        }
165                        Integer count = enumValues.get(value);
166                        if(count == null)
167                            enumValues.put(value, new Integer(0));
168                        else
169                            enumValues.put(value, new Integer(count+1));
170                    }
171                }
172            }
173            CaseComponent res = CopyUtils.copyCaseComponent(cases.iterator().next().getDescription());
174            for(Attribute at: AttributeUtils.getAttributes(res))
175            {
176                HashMap<Object,Integer> enumValues = enumCount.get(at);
177                if(enumValues != null)
178                {
179                    Object maxObject = null;
180                    int max = 0; 
181                    for(Object value : enumValues.keySet())
182                    {
183                        Integer appears = enumValues.get(value);
184                        if(appears > max)
185                        {
186                            max = appears;
187                            maxObject = value;
188                        }
189                    }
190                    AttributeUtils.setValue(at, res, maxObject);
191                    continue;
192                }
193                Double sum = numValues.get(at);
194                if(sum != null)
195                {
196                    if(at.getType().equals(Integer.class))
197                        AttributeUtils.setValue(at, res, ((Number)(sum/(double)cases.size())).intValue() );
198                    else if(at.getType().equals(Double.class))
199                        AttributeUtils.setValue(at, res, (sum/(double)cases.size()) );
200                    else if(at.getType().equals(Float.class))
201                        AttributeUtils.setValue(at, res, ((Number)(sum/(double)cases.size())).floatValue() );
202                    else if(at.getType().equals(Long.class))
203                        AttributeUtils.setValue(at, res, ((Number)(sum/(double)cases.size())).longValue() );
204                    else if(at.getType().equals(Short.class))
205                        AttributeUtils.setValue(at, res, ((Number)(sum/(double)cases.size())).shortValue() );
206                }
207                else
208                    AttributeUtils.setValue(at, res, null);
209            }
210            return res;
211        }
212    
213    
214        /*
215    
216        private static CaseComponent calculateMedian(Collection<CBRCase> cases)
217        {
218            HashMap<Attribute,HashMap<Object,Integer>> enumCount = new HashMap<Attribute, HashMap<Object,Integer>>();
219            HashMap<Attribute,Double> numValues = new HashMap<Attribute,Double>();
220            
221            for(CBRCase _case :cases)
222            {
223                for(Attribute at : AttributeUtils.getAttributes(_case.getDescription()))
224                {
225                    if(at.equals(_case.getDescription().getIdAttribute()))
226                        continue;
227                    Object value = AttributeUtils.findValue(at, _case.getDescription());
228                    if(value instanceof Enum)
229                    {
230                        HashMap<Object,Integer> enumValues = enumCount.get(at);
231                        if(enumValues == null)
232                        {
233                            enumValues = new HashMap<Object,Integer>();
234                            enumCount.put(at, enumValues);
235                        }
236                        Integer count = enumValues.get(value);
237                        if(count == null)
238                            enumValues.put(value, new Integer(0));
239                        else
240                            enumValues.put(value, new Integer(count+1));
241                    }
242                    else if(value instanceof Number)
243                    {
244                        Double sum = numValues.get(at);
245                        if(sum == null)
246                            numValues.put(at, ((Number)value).doubleValue());
247                        else
248                            numValues.put(at, sum+((Number)value).doubleValue());
249                    }
250                }
251            }
252            CaseComponent res = CopyUtils.copyCaseComponent(cases.iterator().next().getDescription());
253            for(Attribute at: AttributeUtils.getAttributes(res))
254            {
255                HashMap<Object,Integer> enumValues = enumCount.get(at);
256                if(enumValues != null)
257                {
258                    Object maxObject = null;
259                    int max = 0; 
260                    for(Object value : enumValues.keySet())
261                    {
262                        Integer appears = enumValues.get(value);
263                        if(appears > max)
264                        {
265                            max = appears;
266                            maxObject = value;
267                        }
268                    }
269                    AttributeUtils.setValue(at, res, maxObject);
270                    continue;
271                }
272                Double sum = numValues.get(at);
273                if(sum != null)
274                {
275                    if(at.getType().equals(Integer.class))
276                        AttributeUtils.setValue(at, res, ((Number)(sum/(double)cases.size())).intValue() );
277                    else if(at.getType().equals(Double.class))
278                        AttributeUtils.setValue(at, res, (sum/(double)cases.size()) );
279                    else if(at.getType().equals(Float.class))
280                        AttributeUtils.setValue(at, res, ((Number)(sum/(double)cases.size())).floatValue() );
281                    else if(at.getType().equals(Long.class))
282                        AttributeUtils.setValue(at, res, ((Number)(sum/(double)cases.size())).longValue() );
283                    else if(at.getType().equals(Short.class))
284                        AttributeUtils.setValue(at, res, ((Number)(sum/(double)cases.size())).shortValue() );
285                }
286                else
287                    AttributeUtils.setValue(at, res, null);
288            }
289            return res;
290        }
291     
292        
293         */
294        
295    }