001 /** 002 * PearsonMatrixCaseBase.java 003 * jCOLIBRI2 framework. 004 * @author Juan A. Recio-García. 005 * GAIA - Group for Artificial Intelligence Applications 006 * http://gaia.fdi.ucm.es 007 * 11/11/2007 008 */ 009 package jcolibri.extensions.recommendation.collaborative; 010 011 import java.util.ArrayList; 012 import java.util.Collection; 013 import java.util.HashMap; 014 import java.util.HashSet; 015 import java.util.Iterator; 016 import java.util.Map.Entry; 017 018 019 import jcolibri.cbrcore.Attribute; 020 021 /** 022 * Extension of the MatrixCaseBase that computes similarities among neighbors 023 * using the Pearson Correlation. 024 * <br> 025 * It uses a minCorrelateItems Factor to weight similar neighbors that have 026 * few common correlate items. 027 * 028 * @author Juan A. Recio-Garcia 029 * @author Developed at University College Cork (Ireland) in collaboration with Derek Bridge. 030 * @version 1.0 031 * @see jcolibri.test.recommenders.rec12.MoviesRecommender 032 */ 033 public class PearsonMatrixCaseBase extends MatrixCaseBase 034 { 035 /** 036 * Factor that decreases the similarity between users 037 * who have fewer than this number of co-rated items 038 */ 039 int minCorrelateItemsFactor; 040 041 /** 042 * Constructor 043 * @param value is the attribute of the result part of the case that contains the rating 044 * @param minCorrelateItemsFactor factor that decreases the similarity between users 045 * who have fewer than this number of co-rated items 046 */ 047 public PearsonMatrixCaseBase(Attribute value, int minCorrelateItemsFactor) 048 { 049 super(value); 050 this.minCorrelateItemsFactor = minCorrelateItemsFactor; 051 } 052 053 054 @Override 055 /** 056 * Computes the similarity between users 057 */ 058 protected void computeSimilarities() 059 { 060 this.computeAverages(); 061 this.computeSimilarityByDescriptionId(); 062 this.computeSimilarityLists(); 063 } 064 065 // stores the similarity lists 066 private HashMap<Integer,Collection<SimilarTuple>> similLists; 067 068 @SuppressWarnings("unchecked") 069 private void computeSimilarityLists() 070 { 071 similLists = new HashMap<Integer,Collection<SimilarTuple>>(); 072 for(Integer key: similarities.keySet()) 073 { 074 ArrayList<SimilarTuple> list = new ArrayList<SimilarTuple>(); 075 HashMap<Integer,Double> similMap = similarities.get(key); 076 for(Entry<Integer,Double> entry : similMap.entrySet()) 077 list.add(new SimilarTuple(entry.getKey(), entry.getValue())); 078 java.util.Collections.sort(list); 079 similLists.put(key, list); 080 } 081 } 082 083 @Override 084 /** 085 * Returns a list of similar users to a given one in decreasing order 086 */ 087 public Collection<SimilarTuple> getSimilar(Integer id) 088 { 089 return similLists.get(id); 090 } 091 092 093 @Override 094 /** 095 * Returns the similarity between two users 096 */ 097 public double getSimil(Integer id1, Integer id2) 098 { 099 return similarities.get(id1).get(id2); 100 } 101 102 103 // table to store the similarities 104 private HashMap<Integer,HashMap<Integer,Double>> similarities; 105 106 107 108 @SuppressWarnings("unchecked") 109 /** 110 * Computes the Pearson Correlation between users in a smart and efficient way. 111 * This code is an adaptation of the one developed by Jerome Kelleher and Derek Bridge 112 * for the Collaborative Movie Recommender project at University College Cork (Ireland). 113 */ 114 private void computeSimilarityByDescriptionId() 115 { 116 org.apache.commons.logging.LogFactory.getLog(this.getClass()).info("Computing similarities"); 117 similarities = new HashMap<Integer, HashMap<Integer,Double>>(); 118 HashSet<Integer> keyCopy = new HashSet<Integer>(byDescriptionId.keySet()); 119 for(Integer me : byDescriptionId.keySet()) 120 { 121 keyCopy.remove(me); 122 for(Integer you : keyCopy) 123 { 124 125 Iterator ratings = new CommonRatingsIterator(me, you, byDescriptionId.get(me), byDescriptionId.get(you)); 126 double sumX = 0.0; 127 double sumXSquared = 0.0; 128 double sumY = 0.0; 129 double sumYSquared = 0.0; 130 double sumXY = 0.0; 131 double numDataPoints = 0; 132 // X corresponds to active, Y to predictor. 133 while (ratings.hasNext()) 134 { 135 CommonRatingTuple rt = (CommonRatingTuple) ratings.next(); 136 double x = rt.getRating1(); 137 double y = rt.getRating2(); 138 numDataPoints++; 139 sumX += x; 140 sumY += y; 141 sumXSquared += square(x); 142 sumYSquared += square(y); 143 sumXY += (x * y); 144 } 145 // update AbstractMovieRecommender by the correct comparison count. 146 // Modified to remove comparisons required for search 147 // AbstractMovieRecommender.addToComparisonCount(numDataPoints); 148 149 double correlation = 0.0; 150 if (numDataPoints != 0 ) 151 { double numerator = sumXY - ((sumX * sumY) / numDataPoints); 152 double sqrt = 153 (sumXSquared - (square(sumX) / numDataPoints)) * 154 (sumYSquared - (square(sumY) / numDataPoints)); 155 double denominator = Math.sqrt(sqrt); 156 157 // output 0 here according to Herlocker's recommendations, 158 // also watch for negative square roots (extremely rare) 159 correlation = denominator == 0.0 || sqrt < 0.0 ? 0.0 : 160 numerator / denominator; 161 correlation = correlation * numDataPoints / minCorrelateItemsFactor; 162 } 163 164 165 166 HashMap<Integer,Double> mySimilList = similarities.get(me); 167 if(mySimilList == null) 168 { 169 mySimilList = new HashMap<Integer, Double>(); 170 similarities.put(me, mySimilList); 171 } 172 mySimilList.put(you, correlation); 173 174 HashMap<Integer,Double> yourSimilList = similarities.get(you); 175 if(yourSimilList == null) 176 { 177 yourSimilList = new HashMap<Integer, Double>(); 178 similarities.put(you, yourSimilList); 179 } 180 yourSimilList.put(me, correlation); 181 182 } 183 } 184 } 185 186 // stores the averages 187 private HashMap<Integer,Double> averages; 188 189 // computes the averages 190 private void computeAverages() 191 { 192 org.apache.commons.logging.LogFactory.getLog(this.getClass()).info("Computing Averages"); 193 averages = new HashMap<Integer, Double>(); 194 for(Integer i : byDescriptionId.keySet()) 195 { 196 ArrayList<RatingTuple> list = byDescriptionId.get(i); 197 double acum = 0; 198 for(RatingTuple rt : list) 199 acum += rt.getRating(); 200 double size = list.size(); 201 averages.put(i, acum/size); 202 } 203 } 204 205 /** 206 * returns the ratings average for a given user 207 * @param id is the user 208 * @return the ratings average 209 */ 210 public double getAverage(int id) 211 { 212 return averages.get(id); 213 } 214 215 216 217 /** 218 * Return the square of the parameter number. 219 * 220 * @param n the number to be squared. 221 * @return n*n. 222 */ 223 private static double square(double n) 224 { return n * n; 225 } 226 227 }