001 /** 002 * CreateMoviesDataBase.java 003 * jCOLIBRI2 framework. 004 * @author Juan A. Recio-García. 005 * GAIA - Group for Artificial Intelligence Applications 006 * http://gaia.fdi.ucm.es 007 * 11/11/2007 008 */ 009 package jcolibri.test.recommenders.rec12.moviesDB; 010 011 import java.io.BufferedReader; 012 import java.io.BufferedWriter; 013 import java.io.File; 014 import java.io.FileWriter; 015 import java.io.InputStreamReader; 016 import java.util.HashMap; 017 import java.util.StringTokenizer; 018 019 import jcolibri.test.recommenders.rec12.moviesDB.User.Gender; 020 import jcolibri.test.recommenders.rec12.moviesDB.User.Occupation; 021 import jcolibri.util.FileIO; 022 023 /** 024 * Class that generates the movies database in a proper format. 025 * @author Juan A. Recio-Garcia 026 * @author Developed at University College Cork (Ireland) in collaboration with Derek Bridge. 027 * @version 1.0 028 * 029 */ 030 public class CreateMoviesDataBase 031 { 032 private static HashMap<Integer,User> users = new HashMap<Integer, User>(); 033 private static HashMap<Integer,Movie> movies = new HashMap<Integer, Movie>(); 034 035 036 private static void parseUsers(String filename) throws Exception 037 { 038 BufferedReader br = null; 039 br = new BufferedReader( new InputStreamReader(FileIO.openFile(filename))); 040 if (br == null) 041 throw new Exception("Error opening file: " + filename); 042 043 String line = ""; 044 while ((line = br.readLine()) != null) 045 { 046 StringTokenizer st = new StringTokenizer(line,"|"); 047 User user = new User(); 048 user.setId(Integer.parseInt(st.nextToken())); 049 user.setAge(Integer.parseInt(st.nextToken())); 050 String gender = st.nextToken(); 051 if(gender.equals("M")) 052 user.setGender(Gender.Male); 053 else 054 user.setGender(Gender.Female); 055 user.setOccupation(Occupation.valueOf(st.nextToken())); 056 user.setZipCode(st.nextToken()); 057 058 users.put(user.getId(), user); 059 } 060 br.close(); 061 062 } 063 064 065 private static void parseMovies(String filename) throws Exception 066 { 067 BufferedReader br = null; 068 br = new BufferedReader( new InputStreamReader(FileIO.openFile(filename))); 069 if (br == null) 070 throw new Exception("Error opening file: " + filename); 071 072 String line = ""; 073 while ((line = br.readLine()) != null) 074 { 075 StringTokenizer st = new StringTokenizer(line,"|"); 076 Movie movie = new Movie(); 077 movie.setId(Integer.parseInt(st.nextToken())); 078 movie.setTitle(st.nextToken()); 079 movie.setReleaseDate(st.nextToken()); 080 movie.setVideoReleaseDate(st.nextToken()); 081 movie.setURL(st.nextToken()); 082 movie.setGenreUnknown(st.nextToken().equals("1")); 083 movie.setGenreAction(st.nextToken().equals("1")); 084 movie.setGenreAdventure(st.nextToken().equals("1")); 085 movie.setGenreAnimation(st.nextToken().equals("1")); 086 movie.setGenreChildren(st.nextToken().equals("1")); 087 movie.setGenreComedy(st.nextToken().equals("1")); 088 movie.setGenreCrime(st.nextToken().equals("1")); 089 movie.setGenreDocumentary(st.nextToken().equals("1")); 090 movie.setGenreDrama(st.nextToken().equals("1")); 091 movie.setGenreFantasy(st.nextToken().equals("1")); 092 movie.setGenreFilmNoir(st.nextToken().equals("1")); 093 movie.setGenreHorror(st.nextToken().equals("1")); 094 movie.setGenreMusical(st.nextToken().equals("1")); 095 movie.setGenreMystery(st.nextToken().equals("1")); 096 movie.setGenreRomance(st.nextToken().equals("1")); 097 movie.setGenreSciFi(st.nextToken().equals("1")); 098 movie.setGenreThriller(st.nextToken().equals("1")); 099 movie.setGenreWar(st.nextToken().equals("1")); 100 movie.setGenreWestern(st.nextToken().equals("1")); 101 102 movies.put(movie.getId(), movie); 103 } 104 br.close(); 105 } 106 107 private static String getUser(User user, String sep) 108 { 109 return user.getId()+sep+user.getAge()+sep+user.getGender()+sep+user.getOccupation()+sep+user.getZipCode(); 110 } 111 private static String getMovie(Movie movie,String sep) 112 { 113 return movie.getId()+sep+movie.getTitle()+sep+movie.getReleaseDate()+sep+movie.getVideoReleaseDate()+sep+movie.getURL()+sep+movie.getGenreUnknown()+sep+movie.getGenreAction()+sep+movie.getGenreAdventure()+sep+movie.getGenreAnimation()+sep+movie.getGenreChildren()+sep+movie.getGenreComedy()+sep+movie.getGenreCrime()+sep+movie.getGenreDocumentary()+sep+movie.getGenreDrama()+sep+movie.getGenreFantasy()+sep+movie.getGenreFilmNoir()+sep+movie.getGenreHorror()+sep+movie.getGenreMusical()+sep+movie.getGenreMystery()+sep+movie.getGenreRomance()+sep+movie.getGenreSciFi()+sep+movie.getGenreThriller()+sep+movie.getGenreWar()+sep+movie.getGenreWestern(); 114 } 115 116 117 private static void generateNewFile(String ratingsFile, String filename, String separator) throws Exception 118 { 119 BufferedWriter bw = null; 120 bw = new BufferedWriter(new FileWriter(new File(filename), false)); 121 if (bw == null) 122 throw new Exception("Error opening file for writing: "+filename); 123 124 BufferedReader br = null; 125 br = new BufferedReader( new InputStreamReader(FileIO.openFile(ratingsFile))); 126 if (br == null) 127 throw new Exception("Error opening file: " + filename); 128 129 int ratingId = 1; 130 String line = ""; 131 while ((line = br.readLine()) != null) 132 { 133 StringTokenizer st = new StringTokenizer(line,"\t"); 134 Integer userId = Integer.parseInt(st.nextToken()); 135 Integer movieId = Integer.parseInt(st.nextToken()); 136 Integer rat = Integer.parseInt(st.nextToken()); 137 138 User user = users.get(userId); 139 Movie movie = movies.get(movieId); 140 Rating rating = new Rating(); 141 rating.setId(ratingId++); 142 rating.setRating(rat); 143 144 145 bw.write(getUser(user,separator)+separator+getMovie(movie,separator)+separator+rating.getId()+separator+rating.getRating()); 146 bw.newLine(); 147 } 148 149 br.close(); 150 bw.close(); 151 } 152 153 /** 154 * @param args 155 */ 156 public static void main(String[] args) 157 { 158 try 159 { 160 parseUsers("jcolibri/test/recommenders/rec12/moviesDB/u.user"); 161 parseMovies("jcolibri/test/recommenders/rec12/moviesDB/u.item"); 162 163 generateNewFile("jcolibri/test/recommenders/rec12/moviesDB/u.data","src/jcolibri/test/recommenders/rec12/moviesDB/movies.txt","|"); 164 } catch (Exception e) 165 { 166 org.apache.commons.logging.LogFactory.getLog(CreateMoviesDataBase.class).error(e); 167 168 } 169 170 System.out.println("Finished"); 171 172 } 173 174 }