001 /** 002 * ExpertClerkMedianScoring.java 003 * jCOLIBRI2 framework. 004 * @author Juan A. Recio-García. 005 * GAIA - Group for Artificial Intelligence Applications 006 * http://gaia.fdi.ucm.es 007 * 04/11/2007 008 */ 009 package jcolibri.method.retrieve.DiverseByMedianRetrieval; 010 011 import java.util.ArrayList; 012 import java.util.Collection; 013 import java.util.HashMap; 014 015 import jcolibri.cbrcore.Attribute; 016 import jcolibri.cbrcore.CBRCase; 017 import jcolibri.cbrcore.CBRQuery; 018 import jcolibri.cbrcore.CaseComponent; 019 import jcolibri.method.retrieve.RetrievalResult; 020 import jcolibri.method.retrieve.NNretrieval.NNConfig; 021 import jcolibri.method.retrieve.NNretrieval.NNScoringMethod; 022 import jcolibri.util.AttributeUtils; 023 import jcolibri.util.CopyUtils; 024 025 /** 026 * ExpertClerk Median algorithm. 027 * This algorithm chooses the first case that is closed to the median of cases. 028 * Then the remaining are selected taking into account negative and possitive 029 * characteristics. A characteristic is an attribute that exceeds a predefined 030 * threshold. It is positive if is greater thatn the value of the median. And negative 031 * otherwise. The number of positive plus the negative characteristics is used 032 * to rank the cases and obtain the following k-1 cases. 033 * <p>See: 034 * <p> 035 * H. Shimazu. ExpertClerk: A Conversational Case-Based Reasoning Tool for 036 * Developing Salesclerk Agents in E-Commerce Webshops. Artif. Intell. Rev., 037 * 18(3-4):223-244, 2002. 038 * 039 * @author Juan A. Recio-Garcia 040 * @author Developed at University College Cork (Ireland) in collaboration with Derek Bridge. 041 * @version 1.0 042 * 043 */ 044 public class ExpertClerkMedianScoring 045 { 046 /******************************************************************************/ 047 /** STATIC METHODS **/ 048 /******************************************************************************/ 049 050 /** 051 * Returns diverse cases using the ExpertClerk median method. 052 * @param cases to retrieve from 053 * @param simConfig is the nn configuration 054 * @param thresholds to obtain the characteristics 055 * @return a collection of cases 056 */ 057 @SuppressWarnings("unchecked") 058 public static Collection<RetrievalResult> getDiverseByMedian(Collection<CBRCase> cases, NNConfig simConfig, HashMap<Attribute,Double> thresholds) 059 { 060 CaseComponent median = calculateMedian(cases); 061 062 CBRQuery query = new CBRQuery(); 063 query.setDescription(median); 064 Collection<RetrievalResult> distancesToMedian = NNScoringMethod.evaluateSimilarity(cases,query,simConfig); 065 CBRCase first = distancesToMedian.iterator().next().get_case(); 066 067 ArrayList<RetrievalResult> characteristics = new ArrayList<RetrievalResult>(); 068 double maxCharacteristics = AttributeUtils.getAttributes(cases.iterator().next().getDescription()).size(); 069 for(CBRCase _case:cases) 070 { 071 if(_case.equals(first)) 072 continue; 073 int chars = computeCharacteristics(_case, median, simConfig, thresholds); 074 characteristics.add(new RetrievalResult(_case, ((double)chars)/maxCharacteristics)); 075 } 076 characteristics.add(new RetrievalResult(first,1.0)); 077 java.util.Collections.sort(characteristics); 078 079 return characteristics; 080 } 081 082 /** 083 * Computes the characteristics 084 */ 085 private static int computeCharacteristics(CBRCase _case, CaseComponent median, NNConfig simConfig, HashMap<Attribute,Double> thresholds) 086 { 087 int characteristics = 0; 088 for(Attribute at: AttributeUtils.getAttributes(_case.getDescription())) 089 { 090 if(at.equals(_case.getDescription().getIdAttribute())) 091 continue; 092 093 Object value = AttributeUtils.findValue(at, _case.getDescription()); 094 Object medValue = AttributeUtils.findValue(at, median); 095 096 if((value==null)||(medValue==null)) 097 continue; 098 099 if(value instanceof Number) 100 { 101 double v = ((Number)value).doubleValue(); 102 double medV = ((Number)medValue).doubleValue(); 103 double ad = simConfig.getWeight(at) * (v - medV); 104 105 Double threshold = thresholds.get(at); 106 if(threshold == null) 107 threshold = 0.5; 108 if(Math.abs(ad) > threshold) 109 characteristics++; 110 } 111 else if(value instanceof Enum) 112 { 113 double v = ((Enum)value).ordinal(); 114 double medV = ((Enum)medValue).ordinal(); 115 double ad = simConfig.getWeight(at) * (v - medV); 116 117 Double threshold = thresholds.get(at); 118 if(threshold == null) 119 threshold = 1.0; 120 if(Math.abs(ad) > threshold) 121 characteristics++; 122 } 123 else if(!value.equals(medValue)) 124 characteristics++; 125 126 127 } 128 return characteristics; 129 } 130 131 /** 132 * Calculates the median 133 */ 134 private static CaseComponent calculateMedian(Collection<CBRCase> cases) 135 { 136 HashMap<Attribute,HashMap<Object,Integer>> enumCount = new HashMap<Attribute, HashMap<Object,Integer>>(); 137 HashMap<Attribute,Double> numValues = new HashMap<Attribute,Double>(); 138 139 for(CBRCase _case :cases) 140 { 141 for(Attribute at : AttributeUtils.getAttributes(_case.getDescription())) 142 { 143 if(at.equals(_case.getDescription().getIdAttribute())) 144 continue; 145 Object value = AttributeUtils.findValue(at, _case.getDescription()); 146 if(value == null) 147 continue; 148 149 if(value instanceof Number) 150 { 151 Double sum = numValues.get(at); 152 if(sum == null) 153 numValues.put(at, ((Number)value).doubleValue()); 154 else 155 numValues.put(at, sum+((Number)value).doubleValue()); 156 } 157 else 158 { 159 HashMap<Object,Integer> enumValues = enumCount.get(at); 160 if(enumValues == null) 161 { 162 enumValues = new HashMap<Object,Integer>(); 163 enumCount.put(at, enumValues); 164 } 165 Integer count = enumValues.get(value); 166 if(count == null) 167 enumValues.put(value, new Integer(0)); 168 else 169 enumValues.put(value, new Integer(count+1)); 170 } 171 } 172 } 173 CaseComponent res = CopyUtils.copyCaseComponent(cases.iterator().next().getDescription()); 174 for(Attribute at: AttributeUtils.getAttributes(res)) 175 { 176 HashMap<Object,Integer> enumValues = enumCount.get(at); 177 if(enumValues != null) 178 { 179 Object maxObject = null; 180 int max = 0; 181 for(Object value : enumValues.keySet()) 182 { 183 Integer appears = enumValues.get(value); 184 if(appears > max) 185 { 186 max = appears; 187 maxObject = value; 188 } 189 } 190 AttributeUtils.setValue(at, res, maxObject); 191 continue; 192 } 193 Double sum = numValues.get(at); 194 if(sum != null) 195 { 196 if(at.getType().equals(Integer.class)) 197 AttributeUtils.setValue(at, res, ((Number)(sum/(double)cases.size())).intValue() ); 198 else if(at.getType().equals(Double.class)) 199 AttributeUtils.setValue(at, res, (sum/(double)cases.size()) ); 200 else if(at.getType().equals(Float.class)) 201 AttributeUtils.setValue(at, res, ((Number)(sum/(double)cases.size())).floatValue() ); 202 else if(at.getType().equals(Long.class)) 203 AttributeUtils.setValue(at, res, ((Number)(sum/(double)cases.size())).longValue() ); 204 else if(at.getType().equals(Short.class)) 205 AttributeUtils.setValue(at, res, ((Number)(sum/(double)cases.size())).shortValue() ); 206 } 207 else 208 AttributeUtils.setValue(at, res, null); 209 } 210 return res; 211 } 212 213 214 /* 215 216 private static CaseComponent calculateMedian(Collection<CBRCase> cases) 217 { 218 HashMap<Attribute,HashMap<Object,Integer>> enumCount = new HashMap<Attribute, HashMap<Object,Integer>>(); 219 HashMap<Attribute,Double> numValues = new HashMap<Attribute,Double>(); 220 221 for(CBRCase _case :cases) 222 { 223 for(Attribute at : AttributeUtils.getAttributes(_case.getDescription())) 224 { 225 if(at.equals(_case.getDescription().getIdAttribute())) 226 continue; 227 Object value = AttributeUtils.findValue(at, _case.getDescription()); 228 if(value instanceof Enum) 229 { 230 HashMap<Object,Integer> enumValues = enumCount.get(at); 231 if(enumValues == null) 232 { 233 enumValues = new HashMap<Object,Integer>(); 234 enumCount.put(at, enumValues); 235 } 236 Integer count = enumValues.get(value); 237 if(count == null) 238 enumValues.put(value, new Integer(0)); 239 else 240 enumValues.put(value, new Integer(count+1)); 241 } 242 else if(value instanceof Number) 243 { 244 Double sum = numValues.get(at); 245 if(sum == null) 246 numValues.put(at, ((Number)value).doubleValue()); 247 else 248 numValues.put(at, sum+((Number)value).doubleValue()); 249 } 250 } 251 } 252 CaseComponent res = CopyUtils.copyCaseComponent(cases.iterator().next().getDescription()); 253 for(Attribute at: AttributeUtils.getAttributes(res)) 254 { 255 HashMap<Object,Integer> enumValues = enumCount.get(at); 256 if(enumValues != null) 257 { 258 Object maxObject = null; 259 int max = 0; 260 for(Object value : enumValues.keySet()) 261 { 262 Integer appears = enumValues.get(value); 263 if(appears > max) 264 { 265 max = appears; 266 maxObject = value; 267 } 268 } 269 AttributeUtils.setValue(at, res, maxObject); 270 continue; 271 } 272 Double sum = numValues.get(at); 273 if(sum != null) 274 { 275 if(at.getType().equals(Integer.class)) 276 AttributeUtils.setValue(at, res, ((Number)(sum/(double)cases.size())).intValue() ); 277 else if(at.getType().equals(Double.class)) 278 AttributeUtils.setValue(at, res, (sum/(double)cases.size()) ); 279 else if(at.getType().equals(Float.class)) 280 AttributeUtils.setValue(at, res, ((Number)(sum/(double)cases.size())).floatValue() ); 281 else if(at.getType().equals(Long.class)) 282 AttributeUtils.setValue(at, res, ((Number)(sum/(double)cases.size())).longValue() ); 283 else if(at.getType().equals(Short.class)) 284 AttributeUtils.setValue(at, res, ((Number)(sum/(double)cases.size())).shortValue() ); 285 } 286 else 287 AttributeUtils.setValue(at, res, null); 288 } 289 return res; 290 } 291 292 293 */ 294 295 }