001 /** 002 * LuceneSearchResult.java 003 * jCOLIBRI2 framework. 004 * @author Juan A. Recio-García. 005 * GAIA - Group for Artificial Intelligence Applications 006 * http://gaia.fdi.ucm.es 007 * 10/04/2007 008 */ 009 package jcolibri.extensions.textual.lucene; 010 011 import java.io.IOException; 012 import java.util.HashMap; 013 import java.util.Iterator; 014 015 import org.apache.lucene.document.Document; 016 import org.apache.lucene.search.Hit; 017 import org.apache.lucene.search.Hits; 018 019 /** 020 * Stores the result of a search. 021 * It is a list of relevant documents with an assigned score. 022 * @author Juan A. Recio-García 023 * 024 */ 025 public class LuceneSearchResult { 026 027 //Table that maps between the position in the result and the ID of the document. 028 private String[] _pos2id; 029 //Table that maps between the ID of a document and its position in the result 030 private HashMap<String, Integer> _id2pos; 031 //Table that maps between the position and the score 032 private float[] _pos2score; 033 //Number of returned documents 034 private int resultLength; 035 //Index used to search 036 private LuceneIndex index; 037 //Max score obtained 038 private float maxscore; 039 040 041 /** 042 * Transforms from Lucene result format. 043 */ 044 protected LuceneSearchResult(Hits hits, LuceneIndex index) throws IOException 045 { 046 this.index = index; 047 resultLength = hits.length(); 048 _pos2id = new String[resultLength]; 049 _pos2score = new float[resultLength]; 050 _id2pos = new HashMap<String,Integer>(resultLength); 051 052 maxscore = 0; 053 int pos=0; 054 for(Iterator hi = hits.iterator(); hi.hasNext();pos++) 055 { 056 Hit hit = (Hit)hi.next(); 057 Document doc = hit.getDocument(); 058 String id = doc.get(LuceneDocument.ID_FIELD); 059 060 _pos2id[pos] = id; 061 _pos2score[pos] = hit.getScore(); 062 _id2pos.put(id, new Integer(pos)); 063 064 if(hit.getScore()>maxscore) 065 maxscore = hit.getScore(); 066 } 067 } 068 069 /** 070 * Gets the position of a document in the result given its ID. 071 * If that ID is not in the results set this methods returns -1 072 */ 073 public int getDocPosition(String docID) 074 { 075 Integer pos = _id2pos.get(docID); 076 if(pos == null) 077 return -1; 078 return pos; 079 } 080 081 /** 082 * Gets the document in a position inside the results list. 083 */ 084 public LuceneDocument getDocAt(int position) 085 { 086 if(position<resultLength) 087 return index.getDocument(_pos2id[position]); 088 return null; 089 } 090 091 /** 092 * Gets the score obtained by a document. 093 * It can be normalized to [0..1], that way, the document with max score will have a 1 and the document with min score a 0. 094 */ 095 public float getDocScore(String docID, boolean normalized) 096 { 097 int pos = getDocPosition(docID); 098 if(pos == -1) 099 return 0; 100 return getDocScore(pos, normalized); 101 } 102 103 /** 104 * Gets the score obtained by a document located in a position of the result list. 105 * It can be normalized to [0..1], that way, the document with max score will have a 1 and the document with min score a 0. 106 */ 107 public float getDocScore(int position, boolean normalized) 108 { 109 if(position>=resultLength) 110 return 0; 111 if(normalized) 112 return _pos2score[position]/maxscore; 113 else 114 return _pos2score[position]; 115 } 116 117 118 /** 119 * Returns the content of a field of the document located in the given position. 120 * @param position of the document in the result list 121 * @param fieldName that stores the text to return 122 */ 123 public String getContent(int position, String fieldName) 124 { 125 if(position>=resultLength) 126 return null; 127 String id = _pos2id[position]; 128 return index.getDocument(id).getContentField(fieldName); 129 } 130 131 /** 132 * Returns the number of results 133 */ 134 public int getResultLength() 135 { 136 return this.resultLength; 137 } 138 139 }