001 /** 002 * LuceneIndexCreator.java 003 * jCOLIBRI2 framework. 004 * @author Juan A. Recio-García. 005 * GAIA - Group for Artificial Intelligence Applications 006 * http://gaia.fdi.ucm.es 007 * 12/04/2007 008 */ 009 package jcolibri.method.precycle; 010 011 import java.util.ArrayList; 012 import java.util.Collection; 013 014 import jcolibri.cbrcore.Attribute; 015 import jcolibri.cbrcore.CBRCase; 016 import jcolibri.cbrcore.CBRCaseBase; 017 import jcolibri.datatypes.Text; 018 import jcolibri.extensions.textual.lucene.LuceneDocument; 019 import jcolibri.extensions.textual.lucene.LuceneIndex; 020 021 /** 022 * Creates a Lucene index with the text contained in some attributes of a case 023 * @author Juan A. Recio-Garcia 024 * @version 1.0 025 * @see jcolibri.extensions.textual.lucene.LuceneIndex 026 */ 027 public class LuceneIndexCreator { 028 029 /** 030 * Creates a Lucene Index with the text contained in some attributes. The type of that attributes must be "Text". 031 * This method creates a LuceneDocument for each case, and adds a new field for each attribute (recived as parameter). 032 * The name and content of the Lucene document field is the name and content of the attribute. 033 */ 034 public static LuceneIndex createLuceneIndex(CBRCaseBase casebase, Collection<Attribute> fields) 035 { 036 for(Attribute field: fields) 037 { 038 Class c = field.getType(); 039 if(!Text.class.isAssignableFrom(c)) 040 { 041 org.apache.commons.logging.LogFactory.getLog(LuceneIndexCreator.class).error("Field "+field+" is not an jcolibri.datatyps.Text. Aborting Lucene index creation"); 042 return null; 043 } 044 } 045 046 ArrayList<LuceneDocument> docs = new ArrayList<LuceneDocument>(); 047 for(CBRCase c: casebase.getCases()) 048 { 049 LuceneDocument ld = new LuceneDocument((String)c.getID()); 050 for(Attribute field: fields) 051 ld.addContentField(field.getName(), (Text)jcolibri.util.AttributeUtils.findValue(field, c)); 052 docs.add(ld); 053 } 054 return new LuceneIndex(docs); 055 056 } 057 058 059 /** 060 * Creates a Lucene Index with the text contained in some attributes. The type of that attributes must be "Text". 061 * This method creates a LuceneDocument for each case, and adds a new field for each attribute (recived as parameter). 062 * The name and content of the Lucene document field is the name and content of the attribute. 063 */ 064 public static LuceneIndex createLuceneIndex(CBRCaseBase casebase) 065 { 066 CBRCase _case = casebase.getCases().iterator().next(); 067 Collection<Attribute> attributes = new ArrayList<Attribute>(); 068 if(_case.getDescription() != null) 069 attributes.addAll(jcolibri.util.AttributeUtils.getAttributes(_case.getDescription(), Text.class)); 070 if(_case.getSolution() != null) 071 attributes.addAll(jcolibri.util.AttributeUtils.getAttributes(_case.getSolution(), Text.class)); 072 if(_case.getResult() != null) 073 attributes.addAll(jcolibri.util.AttributeUtils.getAttributes(_case.getResult(), Text.class)); 074 if(_case.getJustificationOfSolution() != null) 075 attributes.addAll(jcolibri.util.AttributeUtils.getAttributes(_case.getJustificationOfSolution(), Text.class)); 076 077 078 return createLuceneIndex(casebase, attributes); 079 080 } 081 082 }