001 /** 002 * BasicInformationExtractor.java 003 * jCOLIBRI2 framework. 004 * @author Juan A. Recio-García. 005 * GAIA - Group for Artificial Intelligence Applications 006 * http://gaia.fdi.ucm.es 007 * 21/06/2007 008 */ 009 package jcolibri.extensions.textual.IE.common; 010 011 import java.util.ArrayList; 012 import java.util.Collection; 013 import java.util.Iterator; 014 015 import jcolibri.cbrcore.Attribute; 016 import jcolibri.cbrcore.CBRCase; 017 import jcolibri.cbrcore.CBRQuery; 018 import jcolibri.cbrcore.CaseComponent; 019 import jcolibri.exception.AttributeAccessException; 020 import jcolibri.extensions.textual.IE.representation.IEText; 021 import jcolibri.extensions.textual.IE.representation.info.FeatureInfo; 022 import jcolibri.extensions.textual.IE.representation.info.PhraseInfo; 023 import jcolibri.util.ProgressController; 024 025 /** 026 * This class implements a basic information extractor.<br> 027 * For each Case Component of a case or query, this method obtains the 028 * features or phrases extracted in its textual attributes and copies the values 029 * into the other attributes of the component.<br> 030 * To copy the features, this method looks for attributes with the same name that 031 * the featues and typed as Strings. Then it copies the value of the feature. 032 * If there are many features it concatenates their values separated by a white space. 033 * <br> 034 * With the phrases it does something similar: finds attributes with the same name but 035 * typed as booleans. If so, it changes the boolean to true. 036 * <p> 037 * First version was developed at: Robert Gordon University - Aberdeen & Facultad Informática, 038 * Universidad Complutense de Madrid (GAIA) 039 * 040 * @author Juan A. Recio-Garcia 041 * @version 2.0 042 * @see jcolibri.cbrcore.CaseComponent 043 */ 044 public class BasicInformationExtractor 045 { 046 /** 047 * Performs the algorithm in a collection of cases. 048 */ 049 public static void extractInformation(Collection<CBRCase> cases) 050 { 051 org.apache.commons.logging.LogFactory.getLog(BasicInformationExtractor.class).info("Extracting cases information."); 052 ProgressController.init(BasicInformationExtractor.class, "Extracting cases information ...", cases.size()); 053 for (CBRCase c : cases) 054 { 055 extractInformation(c.getDescription()); 056 extractInformation(c.getSolution()); 057 extractInformation(c.getDescription()); 058 extractInformation(c.getDescription()); 059 ProgressController.step(BasicInformationExtractor.class); 060 } 061 ProgressController.finish(BasicInformationExtractor.class); 062 } 063 064 /** 065 * Performs the algorithm in a query. 066 */ 067 public static void extractInformation(CBRQuery query) 068 { 069 org.apache.commons.logging.LogFactory.getLog(BasicInformationExtractor.class).info("Extracting query information."); 070 extractInformation(query.getDescription()); 071 } 072 073 /** 074 * Extracts the information of a given CaseComponent 075 * @param cc 076 */ 077 private static void extractInformation(CaseComponent cc) 078 { 079 if(cc == null) 080 return; 081 try 082 { 083 Attribute[] attrs = jcolibri.util.AttributeUtils.getAttributes(cc.getClass()); 084 085 // Find the texts and other attributes 086 ArrayList<IEText> texts = new ArrayList<IEText>(); 087 ArrayList<Attribute> other = new ArrayList<Attribute>(); 088 for (int i = 0; i < attrs.length; i++) 089 { 090 Object o = attrs[i].getValue(cc); 091 if (o instanceof CaseComponent) 092 extractInformation((CaseComponent) o); 093 else if (o instanceof IEText) 094 texts.add((IEText) o); 095 else 096 other.add(attrs[i]); 097 098 } 099 100 // Obtain all features and phrases 101 ArrayList<PhraseInfo> phrases = new ArrayList<PhraseInfo>(); 102 ArrayList<FeatureInfo> features = new ArrayList<FeatureInfo>(); 103 for (IEText text : texts) 104 { 105 phrases.addAll(text.getPhrases()); 106 features.addAll(text.getFeatures()); 107 } 108 109 // find a proper value for each attribute. If its type is: 110 // String: find a feature 111 // Phrase: find a phrase 112 for (Attribute at : other) 113 { 114 String name = at.getName(); 115 if (at.getType().equals(String.class)) 116 { 117 String value = ""; 118 for (FeatureInfo feature : features) 119 if (feature.getFeature().equalsIgnoreCase(name)) 120 value += feature.getValue()+" "; 121 if (value.length() > 0) 122 { 123 at.setValue(cc, value); 124 org.apache.commons.logging.LogFactory.getLog(BasicInformationExtractor.class).debug("Adding features to attribute: "+at.getName()+" <- "+value); 125 } 126 127 } else if (at.getType().equals(Boolean.class)) 128 { 129 Boolean phrase = new Boolean(false); 130 for (Iterator<PhraseInfo> iter = phrases.iterator(); iter.hasNext() && !phrase.booleanValue();) 131 { 132 PhraseInfo p = iter.next(); 133 if (p.getPhrase().equalsIgnoreCase(name)) 134 { 135 phrase = Boolean.TRUE; 136 at.setValue(cc, phrase); 137 org.apache.commons.logging.LogFactory.getLog(BasicInformationExtractor.class).debug("Enabling attribute: "+at.getName()+". Source: "+p.getPhrase()); 138 139 } 140 } 141 } 142 } 143 } catch (AttributeAccessException e) 144 { 145 org.apache.commons.logging.LogFactory.getLog(BasicInformationExtractor.class).error(e); 146 147 } 148 149 } 150 }