001 /** 002 * TestLucene.java 003 * jCOLIBRI2 framework. 004 * @author Juan A. Recio-García. 005 * GAIA - Group for Artificial Intelligence Applications 006 * http://gaia.fdi.ucm.es 007 * 10/04/2007 008 */ 009 package jcolibri.extensions.textual.lucene; 010 011 012 import java.io.File; 013 import java.io.IOException; 014 import java.util.Collection; 015 import javax.swing.JFileChooser; 016 017 import jcolibri.datatypes.Text; 018 import jcolibri.extensions.textual.wordnet.WordNetBridge; 019 020 021 /** 022 * Class used to test and learn how to use Lucene. 023 * It reads documents from a directory and indexes them. Then allows to ask for documents. 024 * <p> 025 * To avoid memory problems use the -Xms -Xmx VM params. For example to use a max of 1Gb of memory use: -Xms256m -Xmx1024m 026 * @author Juan A. Recio-García 027 * @version 1.0 028 */ 029 public class TestLucene { 030 031 //Documents only have a field 032 private static String CONTENT_FIELD = "content"; 033 034 /** Transforms files to Lucene documents. */ 035 private static Collection<LuceneDocument> indexDocs(File directory) throws IOException { 036 037 java.util.ArrayList<LuceneDocument> docs = new java.util.ArrayList<LuceneDocument>(); 038 039 if (!directory.canRead()) 040 return docs; 041 042 if (!directory.isDirectory()) 043 return docs; 044 045 File[] files = directory.listFiles(); 046 047 for(File f: files) 048 { 049 if(f.isDirectory()) 050 continue; 051 java.io.BufferedReader fr = new java.io.BufferedReader(new java.io.FileReader(f)); 052 StringBuffer sb = new StringBuffer(); 053 while (fr.ready()) 054 sb.append(fr.readLine()); 055 fr.close(); 056 057 058 LuceneDocument doc = new LuceneDocument(f.getCanonicalPath()); 059 doc.addContentField(CONTENT_FIELD, new Text(sb.toString())); 060 061 docs.add(doc); 062 } 063 064 return docs; 065 066 } 067 068 069 /** 070 * @param args 071 */ 072 public static void main(String[] args) { 073 074 //Obtain the files 075 076 JFileChooser jfc = new javax.swing.JFileChooser(); 077 jfc.setDialogType(JFileChooser.OPEN_DIALOG); 078 jfc.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY); 079 080 jfc.showOpenDialog(null); 081 File docDir = jfc.getSelectedFile(); 082 083 if (!docDir.exists() || !docDir.canRead()) { 084 System.out.println("Document directory '" +docDir.getAbsolutePath()+ "' does not exist or is not readable, please check the path"); 085 System.exit(1); 086 } 087 088 //Transform the files to Lucene documents 089 090 Collection<LuceneDocument> docs = null;; 091 try { 092 docs = indexDocs(docDir); 093 } catch (IOException e) { 094 org.apache.commons.logging.LogFactory.getLog(WordNetBridge.class).error(e); 095 } 096 097 try { 098 099 // Create the Lucene Index 100 LuceneIndex index = new LuceneIndex(docs); 101 102 // Ask for a query 103 String query = javax.swing.JOptionPane.showInputDialog("Query?"); 104 105 // Search 106 LuceneSearchResult lsr = LuceneSearcher.search(index, query, CONTENT_FIELD); 107 108 // Print results 109 System.out.println("Results: "+lsr.getResultLength()); 110 for(int i=0; i<lsr.getResultLength(); i++) 111 { 112 System.out.println(lsr.getDocScore(i,true)+" -> "+lsr.getDocAt(i).getDocID()); 113 } 114 } catch (OutOfMemoryError e) { 115 org.apache.commons.logging.LogFactory.getLog(WordNetBridge.class).error("Lucene requires more memory. Launch the JVM with these flags: java -Xms256m -Xmx512m ..."); 116 } 117 } 118 119 }