001 /** 002 * InformationGain.java 003 * jCOLIBRI2 framework. 004 * @author Juan A. Recio-García. 005 * GAIA - Group for Artificial Intelligence Applications 006 * http://gaia.fdi.ucm.es 007 * 29/10/2007 008 */ 009 package jcolibri.extensions.recommendation.navigationByAsking; 010 011 import java.util.ArrayList; 012 import java.util.Collection; 013 import java.util.Enumeration; 014 import java.util.HashSet; 015 import java.util.Hashtable; 016 017 import jcolibri.cbrcore.Attribute; 018 import jcolibri.cbrcore.CBRCase; 019 import jcolibri.cbrcore.CBRQuery; 020 import jcolibri.exception.ExecutionException; 021 import jcolibri.util.AttributeUtils; 022 023 /** 024 * Selects an attribute with the highest information gain. 025 * <p>See: 026 * <p>R. Bergmann. Experience Management: Foundations, Development Methodology, 027 * and Internet-Based Applications. Springer-Verlag New York, Inc.,Secaucus, 028 * NJ, USA, 2002. 029 * <p> 030 * S. Schulz. CBR-works: A state-of-the-art shell for case-based application 031 * building. In E. Melis, editor, Proceedings of the 7th German Workshop on 032 * Case-Based Reasoning, GWCBR'99, Würzburg, Germany, pages 166-175. 033 * University of Würzburg, 1999. 034 * 035 * @author Juan A. Recio-Garcia 036 * @author Developed at University College Cork (Ireland) in collaboration with Derek Bridge. 037 * @version 1.0 038 * 039 */ 040 public class InformationGain implements SelectAttributeMethod 041 { 042 /******************************************************************************/ 043 /** STATIC METHODS **/ 044 /******************************************************************************/ 045 046 047 048 private static ArrayList<Attribute> asked; 049 050 /** 051 * Selects an attribute with the highest information gain. 052 * @param cases are the working cases 053 * @param init indicates if this is the first time that the algorithm is executed. 054 * This way, in following iterations past chosen attributes are not computed. 055 * @param completeSetOfCases is the original case set used when there are not attributes 056 * with any information gain. This way, the method starts again with all the cases. 057 * @return the selected attribute or null if there are not more attributes to ask. 058 * @throws ExecutionException 059 */ 060 public static Attribute getMoreIGattribute(Collection<CBRCase> cases, boolean init, Collection<CBRCase> completeSetOfCases) throws ExecutionException 061 { 062 if(cases.isEmpty()) 063 { 064 cases.addAll(completeSetOfCases); 065 init = true; 066 } 067 if(init) 068 asked = new ArrayList<Attribute>(); 069 if(asked ==null) 070 throw new ExecutionException("InformationGain method must be initialized each cycle"); 071 CBRCase acase = cases.iterator().next(); 072 Collection<Attribute> atts = AttributeUtils.getAttributes(acase.getDescription()); 073 atts.remove(acase.getDescription().getIdAttribute()); 074 075 atts.removeAll(asked); 076 if(atts.isEmpty()) 077 { 078 asked = new ArrayList<Attribute>(); 079 atts = AttributeUtils.getAttributes(acase.getDescription()); 080 atts.remove(acase.getDescription().getIdAttribute()); 081 } 082 083 System.out.println("Computing IG for "+cases.size()+" cases"); 084 085 double maxIG = 0; 086 Attribute maxIGatt = null; 087 for(Attribute a: atts) 088 { 089 double ig = computeIG(a,cases); 090 System.out.println("IG "+a.getName()+" --> "+ig); 091 if(ig>maxIG) 092 { 093 maxIG = ig; 094 maxIGatt = a; 095 } 096 } 097 098 asked.add(maxIGatt); 099 return maxIGatt; 100 } 101 102 /** 103 * Computes the IG for an attribute 104 */ 105 private static double computeIG(Attribute a, Collection<CBRCase> cases) 106 { 107 Hashtable<Object,HashSet<CBRCase>> clases = new Hashtable<Object,HashSet<CBRCase>>(); 108 for(CBRCase c: cases) 109 { 110 Object value = AttributeUtils.findValue(a, c.getDescription()); 111 HashSet<CBRCase> set = clases.get(value); 112 if(set==null) 113 { 114 set = new HashSet<CBRCase>(); 115 clases.put(value, set); 116 } 117 set.add(c); 118 } 119 120 double casesSize = cases.size(); 121 double res = 0; 122 for(Enumeration<HashSet<CBRCase>> en = clases.elements(); en.hasMoreElements();) 123 { 124 double setSize = en.nextElement().size(); 125 double div = setSize/casesSize; 126 res+= div * (Math.log(div)/Math.log(2)); 127 } 128 return -res; 129 } 130 131 /******************************************************************************/ 132 /** OBJECT METHODS **/ 133 /******************************************************************************/ 134 135 /** 136 * Original set of cases, used when there is not IG 137 */ 138 private Collection<CBRCase> completeSetOfCases; 139 140 /** 141 * Constructor. 142 * @param completeset is the original set of cases, used when there is not IG 143 */ 144 public InformationGain(Collection<CBRCase> completeset) 145 { 146 completeSetOfCases = completeset; 147 asked = new ArrayList<Attribute>(); 148 } 149 150 /** 151 * Selects the attribute to be asked 152 * @param cases list of working cases 153 * @param query is the current query 154 * @return selected attribute 155 * @throws ExecutionException 156 */ 157 public Attribute getAttribute(Collection<CBRCase> cases, CBRQuery query) throws ExecutionException 158 { 159 return getMoreIGattribute(cases, false, completeSetOfCases); 160 161 } 162 }