001    /**
002     * PearsonMatrixCaseBase.java
003     * jCOLIBRI2 framework. 
004     * @author Juan A. Recio-García.
005     * GAIA - Group for Artificial Intelligence Applications
006     * http://gaia.fdi.ucm.es
007     * 11/11/2007
008     */
009    package jcolibri.extensions.recommendation.collaborative;
010    
011    import java.util.ArrayList;
012    import java.util.Collection;
013    import java.util.HashMap;
014    import java.util.HashSet;
015    import java.util.Iterator;
016    import java.util.Map.Entry;
017    
018    
019    import jcolibri.cbrcore.Attribute;
020    
021    /**
022     * Extension of the MatrixCaseBase that computes similarities among neighbors 
023     * using the Pearson Correlation.
024     * <br>
025     * It uses a minCorrelateItems Factor to weight similar neighbors that have
026     * few common correlate items.
027     * 
028     * @author Juan A. Recio-Garcia
029     * @author Developed at University College Cork (Ireland) in collaboration with Derek Bridge.
030     * @version 1.0
031     * @see jcolibri.test.recommenders.rec12.MoviesRecommender
032     */
033    public class PearsonMatrixCaseBase extends MatrixCaseBase
034    {
035        /** 
036         * Factor that decreases the similarity between users 
037         * who have fewer than this number of co-rated items 
038         */
039        int minCorrelateItemsFactor;
040        
041        /**
042         * Constructor
043         * @param value is the attribute of the result part of the case that contains the rating
044         * @param minCorrelateItemsFactor factor that decreases the similarity between users 
045         * who have fewer than this number of co-rated items
046         */
047        public PearsonMatrixCaseBase(Attribute value, int minCorrelateItemsFactor)
048        {
049            super(value);
050            this.minCorrelateItemsFactor = minCorrelateItemsFactor;
051        }
052        
053        
054        @Override
055        /**
056         * Computes the similarity between users
057         */
058        protected void computeSimilarities()
059        {
060            this.computeAverages();
061            this.computeSimilarityByDescriptionId();
062            this.computeSimilarityLists();
063        }
064        
065        // stores the similarity lists
066        private HashMap<Integer,Collection<SimilarTuple>> similLists;
067        
068        @SuppressWarnings("unchecked")
069        private void computeSimilarityLists()
070        {
071            similLists = new HashMap<Integer,Collection<SimilarTuple>>();
072            for(Integer key: similarities.keySet())
073            {
074                ArrayList<SimilarTuple> list = new ArrayList<SimilarTuple>();
075                HashMap<Integer,Double> similMap = similarities.get(key);
076                for(Entry<Integer,Double> entry : similMap.entrySet())
077                    list.add(new SimilarTuple(entry.getKey(), entry.getValue()));
078                java.util.Collections.sort(list);
079                similLists.put(key, list);
080            }
081        }
082        
083        @Override
084        /**
085         * Returns a list of similar users to a given one in decreasing order 
086         */
087        public Collection<SimilarTuple> getSimilar(Integer id)
088        {
089            return similLists.get(id);
090        }
091        
092        
093        @Override
094        /**
095         * Returns the similarity between two users
096         */
097        public double getSimil(Integer id1, Integer id2)
098        {
099            return similarities.get(id1).get(id2);
100        }
101        
102        
103        // table to store the similarities
104        private HashMap<Integer,HashMap<Integer,Double>> similarities;
105     
106        
107    
108        @SuppressWarnings("unchecked")
109        /**
110         * Computes the Pearson Correlation between users in a smart and efficient way.
111         * This code is an adaptation of the one developed by Jerome Kelleher and Derek Bridge 
112         * for the Collaborative Movie Recommender project at University College Cork (Ireland).
113         */
114        private void computeSimilarityByDescriptionId()
115        {
116            org.apache.commons.logging.LogFactory.getLog(this.getClass()).info("Computing similarities");
117            similarities = new HashMap<Integer, HashMap<Integer,Double>>();
118            HashSet<Integer> keyCopy = new HashSet<Integer>(byDescriptionId.keySet());
119            for(Integer me : byDescriptionId.keySet())
120            {
121                keyCopy.remove(me);
122                for(Integer you : keyCopy)
123                {
124                    
125                          Iterator ratings = new CommonRatingsIterator(me, you, byDescriptionId.get(me), byDescriptionId.get(you));
126                          double sumX = 0.0;
127                          double sumXSquared = 0.0;
128                          double sumY = 0.0;
129                          double sumYSquared = 0.0;
130                          double sumXY = 0.0;
131                          double numDataPoints = 0;
132                          // X corresponds to active, Y to predictor.
133                          while (ratings.hasNext())
134                          {  
135                             CommonRatingTuple rt = (CommonRatingTuple) ratings.next();
136                             double x = rt.getRating1();
137                             double y = rt.getRating2();
138                             numDataPoints++;
139                             sumX += x;
140                             sumY += y;
141                             sumXSquared += square(x);
142                             sumYSquared += square(y);
143                             sumXY += (x * y);
144                          }
145                          // update AbstractMovieRecommender by the correct comparison count.
146                          // Modified to remove comparisons required for search
147                          // AbstractMovieRecommender.addToComparisonCount(numDataPoints);
148    
149                          double correlation = 0.0;
150                          if (numDataPoints != 0 )
151                          {  double numerator = sumXY - ((sumX * sumY) / numDataPoints);
152                             double sqrt =
153                                (sumXSquared - (square(sumX) / numDataPoints)) *
154                                (sumYSquared - (square(sumY) / numDataPoints));
155                             double denominator = Math.sqrt(sqrt);
156    
157                             // output 0 here according to Herlocker's recommendations,
158                             // also watch for negative square roots (extremely rare)
159                             correlation = denominator == 0.0 || sqrt < 0.0 ? 0.0 :
160                                numerator / denominator;
161                             correlation = correlation * numDataPoints / minCorrelateItemsFactor;
162                          }
163                    
164                    
165                    
166                    HashMap<Integer,Double> mySimilList = similarities.get(me);
167                    if(mySimilList == null)
168                    {
169                        mySimilList = new HashMap<Integer, Double>();
170                        similarities.put(me, mySimilList);
171                    }
172                    mySimilList.put(you, correlation);
173                    
174                    HashMap<Integer,Double> yourSimilList = similarities.get(you);
175                    if(yourSimilList == null)
176                    {
177                        yourSimilList = new HashMap<Integer, Double>();
178                        similarities.put(you, yourSimilList);
179                    }
180                    yourSimilList.put(me, correlation);
181                    
182                }
183            }
184        }
185    
186        // stores the averages
187        private HashMap<Integer,Double> averages;
188        
189        // computes the averages
190        private void computeAverages()
191        {
192            org.apache.commons.logging.LogFactory.getLog(this.getClass()).info("Computing Averages");
193            averages = new HashMap<Integer, Double>();
194            for(Integer i : byDescriptionId.keySet())
195            {
196                ArrayList<RatingTuple> list = byDescriptionId.get(i);
197                double acum = 0;
198                for(RatingTuple rt : list)
199                    acum += rt.getRating();
200                double size = list.size();
201                averages.put(i, acum/size);
202            }
203        }
204        
205        /**
206         * returns the ratings average for a given user
207         * @param id is the user
208         * @return the ratings average
209         */
210        public double getAverage(int id)
211        {
212            return averages.get(id);
213        }
214        
215        
216        
217        /**
218         * Return the square of the parameter number.
219         *
220         * @param n the number to be squared.
221         * @return n*n.
222         */
223        private static double square(double n)
224        {  return n * n;
225        }
226    
227    }