001    /**
002     * InformationGain.java
003     * jCOLIBRI2 framework. 
004     * @author Juan A. Recio-García.
005     * GAIA - Group for Artificial Intelligence Applications
006     * http://gaia.fdi.ucm.es
007     * 29/10/2007
008     */
009    package jcolibri.extensions.recommendation.navigationByAsking;
010    
011    import java.util.ArrayList;
012    import java.util.Collection;
013    import java.util.Enumeration;
014    import java.util.HashSet;
015    import java.util.Hashtable;
016    
017    import jcolibri.cbrcore.Attribute;
018    import jcolibri.cbrcore.CBRCase;
019    import jcolibri.cbrcore.CBRQuery;
020    import jcolibri.exception.ExecutionException;
021    import jcolibri.util.AttributeUtils;
022    
023    /**
024     * Selects an attribute with the highest information gain.
025     * <p>See:
026     * <p>R. Bergmann. Experience Management: Foundations, Development Methodology, 
027     * and Internet-Based Applications. Springer-Verlag New York, Inc.,Secaucus,  
028     * NJ, USA, 2002.
029     * <p>
030     * S. Schulz. CBR-works: A state-of-the-art shell for case-based application
031     * building. In E. Melis, editor, Proceedings of the 7th German Workshop on
032     * Case-Based Reasoning, GWCBR'99, Würzburg, Germany, pages 166-175.
033     * University of Würzburg, 1999.
034     * 
035     * @author Juan A. Recio-Garcia
036     * @author Developed at University College Cork (Ireland) in collaboration with Derek Bridge.
037     * @version 1.0
038     *
039     */
040    public class InformationGain implements SelectAttributeMethod
041    {
042        /******************************************************************************/
043        /**                           STATIC METHODS                                 **/
044        /******************************************************************************/
045    
046        
047        
048        private static ArrayList<Attribute> asked;
049            
050        /**
051         * Selects an attribute with the highest information gain.
052         * @param cases are the working cases
053         * @param init indicates if this is the first time that the algorithm is executed.
054         * This way, in following iterations past chosen attributes are not computed.
055         * @param completeSetOfCases is the original case set used when there are not attributes
056         * with any information gain. This way, the method starts again with all the cases. 
057         * @return the selected attribute or null if there are not more attributes to ask.
058         * @throws ExecutionException
059         */
060        public static Attribute getMoreIGattribute(Collection<CBRCase> cases, boolean init, Collection<CBRCase> completeSetOfCases) throws ExecutionException
061        {
062            if(cases.isEmpty())
063            {
064                cases.addAll(completeSetOfCases);
065                init = true;
066            }
067            if(init)
068                asked = new ArrayList<Attribute>();
069            if(asked ==null)
070                throw new ExecutionException("InformationGain method must be initialized each cycle");
071            CBRCase acase = cases.iterator().next();
072            Collection<Attribute> atts = AttributeUtils.getAttributes(acase.getDescription());
073            atts.remove(acase.getDescription().getIdAttribute());
074            
075            atts.removeAll(asked);
076            if(atts.isEmpty())
077            {
078                asked = new ArrayList<Attribute>();
079                atts = AttributeUtils.getAttributes(acase.getDescription());
080                atts.remove(acase.getDescription().getIdAttribute());
081            }
082            
083            System.out.println("Computing IG for "+cases.size()+" cases");
084            
085            double maxIG = 0;
086            Attribute maxIGatt = null;
087            for(Attribute a: atts)
088            {
089                double ig = computeIG(a,cases);
090                System.out.println("IG "+a.getName()+" --> "+ig);
091                if(ig>maxIG)
092                {
093                    maxIG = ig;
094                    maxIGatt = a;
095                }
096            }
097            
098            asked.add(maxIGatt);
099            return maxIGatt;
100        }
101        
102        /**
103         * Computes the IG for an attribute
104         */
105        private static double computeIG(Attribute a, Collection<CBRCase> cases)
106        {
107            Hashtable<Object,HashSet<CBRCase>> clases = new Hashtable<Object,HashSet<CBRCase>>();
108            for(CBRCase c: cases)
109            {
110                Object value = AttributeUtils.findValue(a, c.getDescription());
111                HashSet<CBRCase> set = clases.get(value);
112                if(set==null)
113                {
114                    set = new HashSet<CBRCase>();
115                    clases.put(value, set);
116                }
117                set.add(c);
118            }
119            
120            double casesSize = cases.size();
121            double res = 0;
122            for(Enumeration<HashSet<CBRCase>> en = clases.elements(); en.hasMoreElements();)
123            {
124                double setSize = en.nextElement().size();
125                double div = setSize/casesSize;
126                res+= div * (Math.log(div)/Math.log(2));
127            }
128            return -res;
129        }
130    
131        /******************************************************************************/
132        /**                           OBJECT METHODS                                 **/
133        /******************************************************************************/
134        
135        /**
136         * Original set of cases, used when there is not IG
137         */
138        private Collection<CBRCase> completeSetOfCases;
139        
140        /**
141         * Constructor. 
142         * @param completeset is the original set of cases, used when there is not IG
143         */
144        public InformationGain(Collection<CBRCase> completeset)
145        {
146            completeSetOfCases = completeset;
147            asked = new ArrayList<Attribute>();
148        }
149        
150        /**
151         * Selects the attribute to be asked
152         * @param cases list of working cases
153         * @param query is the current query
154         * @return selected attribute
155         * @throws ExecutionException
156         */
157        public Attribute getAttribute(Collection<CBRCase> cases, CBRQuery query) throws ExecutionException 
158        {
159                return getMoreIGattribute(cases, false, completeSetOfCases);
160    
161        }
162    }