001 /** 002 * MatrixCaseBase.java 003 * jCOLIBRI2 framework. 004 * @author Juan A. Recio-García. 005 * GAIA - Group for Artificial Intelligence Applications 006 * http://gaia.fdi.ucm.es 007 * 11/11/2007 008 */ 009 package jcolibri.extensions.recommendation.collaborative; 010 011 import java.util.ArrayList; 012 import java.util.Collection; 013 import java.util.HashMap; 014 import java.util.Iterator; 015 import java.util.NoSuchElementException; 016 import java.util.Set; 017 018 import jcolibri.cbrcore.Attribute; 019 import jcolibri.cbrcore.CBRCase; 020 import jcolibri.cbrcore.CBRCaseBase; 021 import jcolibri.cbrcore.CaseBaseFilter; 022 import jcolibri.cbrcore.CaseComponent; 023 import jcolibri.cbrcore.Connector; 024 import jcolibri.exception.AttributeAccessException; 025 import jcolibri.util.ProgressController; 026 027 /** 028 * Specific implementation of CBRCaseBase to allow collaborative recommendations. 029 * These kind of recommendations are based in the past experieces of other users.<br> 030 * This case base manages cases composed by a description, a solution and a result. 031 * The description usually contains the information about the user that made the recommendation, 032 * the solution contains the information about the recommended item, and the 033 * result stores the rating value for the item (the value that the user assigns to an item after evaluating it).<br> 034 * This way, this case base implementation stores cases as a table:<br> 035 * <table border="1" cellspacing="0" cellpadding="2" width="90"> 036 * <tr><td></td><td>Item1</td><td>Item2</td><td>Item3</td><td>Item4</td><td>Item5</td><td>...</td><td>ItemM</td></tr> 037 * <tr><td>User1</td><td></td><td>rating12</td><td></td><td>rating14</td><td></td><td></td><td></td></tr> 038 * <tr><td>User2</td><td>rating21</td><td></td><td>rating23</td><td></td><td></td><td></td><td>rating2N</td></tr> 039 * <tr><td>User3</td><td></td><td></td><td>rating33</td><td>rating34</td><td></td><td></td><td>rating3N</td></tr> 040 * <tr><td>...</td><td></td><td></td><td></td><td></td><td></td><td></td><td></td></tr> 041 * <tr><td>UserN</td><td></td><td>ratingN2</td><td></td><td></td><td>ratingN5</td><td></td><td>ratingNN</td></tr> 042 * </table> 043 * The values of the users column and the items row are the ids of the description and solution parts of the case. 044 * These ids must be integer values. 045 * The ratings are obtained from an attribute of the result part. 046 * <br> 047 * Note that these cases base allows having different cases with the same description (because each user can make several recommendations). 048 * <p> 049 * Collaborative recommenders can be separated into three steps: 050 * <ol> 051 * <li>Weight all users with respect to similarity with the active user. 052 * <li>Select a subset of users as a set of predictors. 053 * <li>Normalize ratings and compute a prediction from a weighted combination of selected neighbors' ratings. 054 * </ol> 055 * There are several ways to compute first step. And it must be implemented by subclassing this class as in PearsonMatrixCaseBase. 056 * The subclasses must implement the abstract methods defined here that return the similarity among neighbors. 057 * The similarity among users is stored through SimilarityTuple objects. 058 * <p> 059 * To allow an efficient comparison, ratings must be sorted by neighbors. In the above table this means that cases 060 * are organized by rows. And that is done internally in this class through the 061 * organizeByDescriptionId() method. Then, the ratings of each user can be accessed through the getRatingTuples() 062 * method. A RatingTuple object stores the id of the item and its rating (id of the solution and id of the result). 063 * <p> 064 * There are also 2 internal classes (CommonRatingTuple and CommonRatingsIterator) that allow to obtain efficiently the common 065 * ratings of two users. This will be used by subclasses when computing the neighbors similarity. The code of these classes 066 * is an adaptation of the one developed by Jerome Kelleher and Derek Bridge for the Collaborative Movie Recommender project at 067 * University College Cork (Ireland). 068 * <p> 069 * Recommender 12 shows how to use these classes. 070 * 071 * @author Juan A. Recio-Garcia 072 * @author Developed at University College Cork (Ireland) in collaboration with Derek Bridge. 073 * @version 1.0 074 * @see jcolibri.extensions.recommendation.collaborative.PearsonMatrixCaseBase 075 * @see jcolibri.test.recommenders.rec12.MoviesRecommender 076 */ 077 public abstract class MatrixCaseBase implements CBRCaseBase 078 { 079 080 ///////////////// ABSTRACT METHODS /////////////////////////////// 081 082 /** 083 * Computes the similarity of neighbors (each neighbor is defined by the description) 084 */ 085 protected abstract void computeSimilarities(); 086 087 /** 088 * Returns the similarity of two neighbors. 089 * @param id1 of the first neighbor 090 * @param id2 of the second neighbor. 091 * @return a double between 0 and 1 092 */ 093 public abstract double getSimil(Integer id1, Integer id2); 094 095 /** 096 * Returns the list of similar neighbors for a given one 097 * @param id of the neighbor 098 * @return a list of SimilarTuple (other neighbor id + similarity value). 099 */ 100 public abstract Collection<SimilarTuple> getSimilar(Integer id); 101 102 ////////////////////////////////////////////////////////////////// 103 104 105 // Connector 106 private jcolibri.cbrcore.Connector connector; 107 // Cases list 108 private java.util.Collection<CBRCase> cases; 109 110 // Attribute of the result that stores the ratings 111 private Attribute value; 112 113 /** 114 * Constructor. The value parameter defines the attribute of the result that stores the ratings. 115 * @param value 116 */ 117 public MatrixCaseBase(Attribute value) 118 { 119 this.value = value; 120 } 121 122 /** 123 * Initializes the case base. 124 * Organizes ratings by the description id of the cases and calls the computeSimilarities() abstract method. 125 */ 126 public void init(Connector connector) { 127 this.connector = connector; 128 ProgressController.init(MatrixCaseBase.class,"Organizing Collaborative Case Base", ProgressController.UNKNOWN_STEPS); 129 org.apache.commons.logging.LogFactory.getLog(this.getClass()).info("Loading cases from connector"); 130 cases = this.connector.retrieveAllCases(); 131 organizeByDescriptionId(); 132 computeSimilarities(); 133 ProgressController.finish(MatrixCaseBase.class); 134 } 135 136 /** 137 * Utility method to obtain the description object given its id. 138 * @param id of the description object 139 * @return the description object 140 */ 141 public CaseComponent getDescription(Integer id) 142 { 143 return descriptions.get(id); 144 } 145 146 /** 147 * Utility method to obtain all the descriptions stored in this case base 148 * @return a set of ids. 149 */ 150 public Set<Integer> getDescriptions() 151 { 152 return descriptions.keySet(); 153 } 154 155 /** 156 * Utility method to obtain the solution object given its id. 157 * @param id of the solution object 158 * @return the solution object 159 */ 160 public CaseComponent getSolution(Integer id) 161 { 162 return solutions.get(id); 163 } 164 165 /** 166 * Utility method to obtain all the solutions stored in this case base 167 * @return a set of ids. 168 */ 169 public Set<Integer> getSolutions() 170 { 171 return solutions.keySet(); 172 } 173 174 /** 175 * Returns the ratings that a user has done. 176 * @param descriptionId of the user 177 * @return collection of rating tuples (solution id + rating value) 178 */ 179 public Collection<RatingTuple> getRatingTuples(int descriptionId) 180 { 181 return byDescriptionId.get(descriptionId); 182 } 183 184 /** Table that stores the list of ratingTuples for each description id (user)*/ 185 HashMap<Integer,ArrayList<RatingTuple>> byDescriptionId; 186 /** Table that organizes description components by id */ 187 HashMap<Integer,CaseComponent> descriptions; 188 /** Table that organizes solution components by id */ 189 HashMap<Integer,CaseComponent> solutions; 190 191 @SuppressWarnings("unchecked") 192 /** 193 * Organizes cases by the description id. 194 */ 195 private void organizeByDescriptionId() 196 { 197 org.apache.commons.logging.LogFactory.getLog(this.getClass()).info("Organizing cases"); 198 try 199 { 200 byDescriptionId = new HashMap<Integer,ArrayList<RatingTuple>>(); 201 descriptions = new HashMap<Integer, CaseComponent>(); 202 solutions = new HashMap<Integer, CaseComponent>(); 203 for(CBRCase c: cases) 204 { 205 Integer descId = (Integer)c.getID(); 206 ArrayList<RatingTuple> list = byDescriptionId.get(descId); 207 if(list == null) 208 { 209 list = new ArrayList<RatingTuple>(); 210 byDescriptionId.put(descId, list); 211 descriptions.put(descId,c.getDescription()); 212 } 213 Integer solId = (Integer) c.getSolution().getIdAttribute().getValue(c.getSolution()); 214 solutions.put(solId, c.getSolution()); 215 Number resId = (Number) value.getValue(c.getResult()); 216 list.add(new RatingTuple(solId, resId.doubleValue())); 217 ProgressController.step(MatrixCaseBase.class); 218 219 } 220 for(ArrayList<RatingTuple> list: byDescriptionId.values()) 221 java.util.Collections.sort(list); 222 } catch (AttributeAccessException e) 223 { 224 org.apache.commons.logging.LogFactory.getLog(this.getClass()).error(e); 225 226 } 227 } 228 229 /** 230 * Closes the case base 231 */ 232 public void close() 233 { 234 connector.close(); 235 } 236 237 /** 238 * Forgets cases. It does nothing in this implementation 239 */ 240 public void forgetCases(Collection<CBRCase> cases) 241 { 242 } 243 244 /** 245 * Returns the stored cases 246 */ 247 public Collection<CBRCase> getCases() 248 { 249 return cases; 250 } 251 252 /** 253 * Returns selected cases. It does nothing in this implementation. 254 */ 255 public Collection<CBRCase> getCases(CaseBaseFilter filter) 256 { 257 return null; 258 } 259 260 /** 261 * Adds new cases to the case base, reorganizing the cases base and re-computing neighbors similarities. 262 */ 263 public void learnCases(Collection<CBRCase> cases) 264 { 265 this.cases.addAll(cases); 266 connector.storeCases(cases); 267 organizeByDescriptionId(); 268 computeSimilarities(); 269 } 270 271 272 ////////////////////////////////////////////////////////////////////////////////////// 273 //////////// Internal Classes ////////////////// 274 ////////////////////////////////////////////////////////////////////////////////////// 275 276 /** 277 * Stores a rating for an item. 278 * @author Juan A. Recio-Garcia 279 * @version 1.0 280 */ 281 public class RatingTuple implements Comparable 282 { 283 int solutionId; 284 double rating; 285 286 public RatingTuple(int solutionId, double rating) 287 { 288 super(); 289 this.solutionId = solutionId; 290 this.rating = rating; 291 } 292 293 public boolean equals(Object o) 294 { 295 return solutionId == ((RatingTuple)o).getSolutionId(); 296 } 297 298 public int compareTo(Object o) 299 { 300 return solutionId - ((RatingTuple)o).solutionId; 301 } 302 /** 303 * @return Returns the rating. 304 */ 305 public double getRating() 306 { 307 return rating; 308 } 309 /** 310 * @return Returns the solutionId. 311 */ 312 public int getSolutionId() 313 { 314 return solutionId; 315 } 316 317 public String toString() 318 { 319 return "("+solutionId+"->"+rating+")"; 320 } 321 } 322 323 /** 324 * Stores the similarity among neighbors. 325 * @author Juan A. Recio-Garcia 326 * @version 1.0 327 * 328 */ 329 public class SimilarTuple implements Comparable 330 { 331 int similarId; 332 double similarity; 333 334 public SimilarTuple(int similarId, double similarity) 335 { 336 super(); 337 this.similarId = similarId; 338 this.similarity = similarity; 339 } 340 341 /** 342 * @return Returns the similarId. 343 */ 344 public int getSimilarId() 345 { 346 return similarId; 347 } 348 349 /** 350 * @return Returns the similarity. 351 */ 352 public double getSimilarity() 353 { 354 return similarity; 355 } 356 357 public int compareTo(Object o) 358 { 359 return (int) (1000*(((SimilarTuple)o).similarity - similarity)); 360 } 361 362 public String toString() 363 { 364 return "["+similarId+","+similarity+"]"; 365 } 366 } 367 368 369 /////////////////////////////////////////////////////////////////////////////////// 370 // Following code is an adaptation of the Collaborative Movie Recommender // 371 // Project implemented by Jerome Kelleher at University College Cork (Ireland) // 372 /////////////////////////////////////////////////////////////////////////////////// 373 374 /** 375 * Stores the ratings of two different users for the same item. 376 * 377 */ 378 class CommonRatingTuple 379 { 380 int id1; 381 double rating1; 382 int id2; 383 double rating2; 384 int ratedId; 385 386 /** 387 * Constructor 388 * @param id1 of user1 389 * @param rating1 of user1 390 * @param id2 of user 2 391 * @param rating2 of user2 392 * @param ratedId is the id of the rated object. 393 */ 394 public CommonRatingTuple(int id1, double rating1, int id2, double rating2, int ratedId) 395 { 396 super(); 397 this.id1 = id1; 398 this.rating1 = rating1; 399 this.id2 = id2; 400 this.rating2 = rating2; 401 this.ratedId = ratedId; 402 } 403 public int getId1() 404 { 405 return id1; 406 } 407 public int getId2() 408 { 409 return id2; 410 } 411 public int getRatedId() 412 { 413 return ratedId; 414 } 415 public double getRating1() 416 { 417 return rating1; 418 } 419 public double getRating2() 420 { 421 return rating2; 422 } 423 } 424 425 /** 426 * Class that given two users returns an iterator over the common ratings. 427 * 428 * 429 */ 430 class CommonRatingsIterator implements Iterator 431 { 432 int idA; 433 int idB; 434 Collection<RatingTuple> rtsA; 435 Collection<RatingTuple> rtsB; 436 Iterator<RatingTuple> iterA; 437 Iterator<RatingTuple> iterB; 438 439 RatingTuple rka; 440 RatingTuple rkb; 441 boolean moreRatings; 442 CommonRatingTuple current; 443 444 public CommonRatingsIterator(int idA, int idB, Collection<RatingTuple> rtsA, Collection<RatingTuple> rtsB) 445 { 446 super(); 447 this.idA = idA; 448 this.idB = idB; 449 this.rtsA = rtsA; 450 this.rtsB = rtsB; 451 452 // initialise the iterators over the Correlatable's ratings 453 iterA = rtsA.iterator(); 454 iterB = rtsB.iterator(); 455 if (iterA.hasNext() && iterB.hasNext()) 456 { 457 rka = (RatingTuple) iterA.next(); 458 rkb = (RatingTuple) iterB.next(); 459 moreRatings = true; // MUST be before the first call to getNextMatch! 460 current = getNextMatch(); 461 } 462 else 463 { current = null; 464 moreRatings = false; 465 } 466 } 467 468 469 /** 470 * Return true iff there are more common ratings 471 * in this CommonRatingsIterator. 472 * 473 * @return true iff more matches exist; false otherwise. 474 */ 475 public boolean hasNext() 476 { return current != null; 477 } 478 479 /** 480 * Return the next match in this CommonRatingsIterator. 481 * This is in the form of a CommonRatingTuple. This allows both 482 * ratings concerned be returned to the client code, thus 483 * saving a scan through the entire ratings collection. 484 * 485 * @return a RatingTuple of the next two common ratings. 486 * @throws NoSuchElementException if no more common ratings 487 * exist. 488 */ 489 public Object next() 490 throws NoSuchElementException 491 { if (! hasNext()) 492 { throw new NoSuchElementException(); 493 } 494 CommonRatingTuple temp = current; 495 current = getNextMatch(); 496 return temp; 497 } 498 499 /** 500 * Throws UnsupportedOperationException if called. 501 */ 502 public void remove() 503 throws UnsupportedOperationException 504 { throw new UnsupportedOperationException("remove not supported in " + 505 "CommonRatingsIterator"); 506 } 507 508 509 /** 510 * Return the next common rating between the two users. 511 * Return null if no matches are left. 512 * 513 * @return a RatingTuple of the next match in the two user 514 * collections of ratings; null if no matches exist. 515 */ 516 private CommonRatingTuple getNextMatch() 517 { 518 CommonRatingTuple match = null; 519 while (moreRatings && match == null) 520 { 521 while (rka.compareTo(rkb) < 0 && moreRatings) 522 { 523 if (iterA.hasNext()) 524 { 525 rka = (RatingTuple) iterA.next(); 526 } 527 else 528 { 529 moreRatings = false; 530 } 531 } 532 if (rka.compareTo(rkb) == 0) 533 { 534 /* 535 * Match found. 536 */ 537 match = new CommonRatingTuple(idA, rka.getRating(), idB, rkb.getRating(), rka.getSolutionId()); 538 539 if (iterA.hasNext() && iterB.hasNext()) 540 { 541 rka = (RatingTuple) iterA.next(); 542 rkb = (RatingTuple) iterB.next(); 543 } 544 else 545 { 546 moreRatings = false; 547 } 548 } 549 while (rka.compareTo(rkb) > 0 && moreRatings) 550 { 551 if (iterB.hasNext()) 552 { 553 rkb = (RatingTuple) iterB.next(); 554 } 555 else 556 { 557 moreRatings = false; 558 } 559 } 560 } 561 return match; 562 } 563 } 564 565 566 ////////////////////////////////////////////////////////////////////////////////////////// 567 568 569 }