001    /**
002     * TestLucene.java
003     * jCOLIBRI2 framework. 
004     * @author Juan A. Recio-García.
005     * GAIA - Group for Artificial Intelligence Applications
006     * http://gaia.fdi.ucm.es
007     * 10/04/2007
008     */
009    package jcolibri.extensions.textual.lucene;
010    
011    
012    import java.io.File;
013    import java.io.IOException;
014    import java.util.Collection;
015    import javax.swing.JFileChooser;
016    
017    import jcolibri.datatypes.Text;
018    import jcolibri.extensions.textual.wordnet.WordNetBridge;
019    
020    
021    /**
022     * Class used to test and learn how to use Lucene.
023     * It reads documents from a directory and indexes them. Then allows to ask for documents.
024     * <p>
025     * To avoid memory problems use the -Xms -Xmx VM params. For example to use a max of 1Gb of memory use: -Xms256m -Xmx1024m 
026     * @author Juan A. Recio-García
027     * @version 1.0
028     */
029    public class TestLucene {
030    
031            //Documents only have a field
032            private static String CONTENT_FIELD = "content";
033            
034            /** Transforms files to Lucene documents. */
035            private static Collection<LuceneDocument> indexDocs(File directory) throws IOException {
036                
037                    java.util.ArrayList<LuceneDocument> docs = new java.util.ArrayList<LuceneDocument>();
038                    
039                if (!directory.canRead())
040                    return docs;
041                
042                if (!directory.isDirectory())
043                    return docs;
044                
045                File[] files = directory.listFiles();
046                
047                for(File f: files)
048                {
049                    if(f.isDirectory())
050                            continue;
051                    java.io.BufferedReader fr = new java.io.BufferedReader(new java.io.FileReader(f));
052                    StringBuffer sb = new StringBuffer();
053                    while (fr.ready())
054                            sb.append(fr.readLine());
055                        fr.close();
056    
057                            
058                    LuceneDocument doc = new LuceneDocument(f.getCanonicalPath());
059                    doc.addContentField(CONTENT_FIELD, new Text(sb.toString()));
060                    
061                    docs.add(doc);  
062                }
063                
064                return docs;
065                   
066              }
067    
068            
069            /**
070             * @param args
071             */
072            public static void main(String[] args) {
073                    
074                    //Obtain the files
075                    
076                    JFileChooser jfc = new javax.swing.JFileChooser();
077                    jfc.setDialogType(JFileChooser.OPEN_DIALOG);
078                    jfc.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY);
079                    
080                    jfc.showOpenDialog(null);
081                    File docDir = jfc.getSelectedFile();
082    
083                    if (!docDir.exists() || !docDir.canRead()) {
084                  System.out.println("Document directory '" +docDir.getAbsolutePath()+ "' does not exist or is not readable, please check the path");
085                  System.exit(1);
086                }
087                
088                    //Transform the files to Lucene documents
089                    
090                    Collection<LuceneDocument> docs = null;;
091                    try {
092                            docs = indexDocs(docDir);
093                    } catch (IOException e) {
094                            org.apache.commons.logging.LogFactory.getLog(WordNetBridge.class).error(e);
095                    }
096                    
097                    try {
098                            
099                            // Create the Lucene Index
100                            LuceneIndex index = new LuceneIndex(docs);
101                            
102                            // Ask for a query
103                            String query = javax.swing.JOptionPane.showInputDialog("Query?");
104                            
105                            // Search
106                            LuceneSearchResult lsr = LuceneSearcher.search(index, query, CONTENT_FIELD);
107                            
108                            // Print results
109                            System.out.println("Results: "+lsr.getResultLength());
110                            for(int i=0; i<lsr.getResultLength(); i++)
111                            {
112                                    System.out.println(lsr.getDocScore(i,true)+" -> "+lsr.getDocAt(i).getDocID());
113                            }
114                    } catch (OutOfMemoryError e) {
115                            org.apache.commons.logging.LogFactory.getLog(WordNetBridge.class).error("Lucene requires more memory. Launch the JVM with these flags: java -Xms256m -Xmx512m ...");
116                    }
117            }
118    
119    }