001    /**
002     * BasicInformationExtractor.java
003     * jCOLIBRI2 framework. 
004     * @author Juan A. Recio-García.
005     * GAIA - Group for Artificial Intelligence Applications
006     * http://gaia.fdi.ucm.es
007     * 21/06/2007
008     */
009    package jcolibri.extensions.textual.IE.common;
010    
011    import java.util.ArrayList;
012    import java.util.Collection;
013    import java.util.Iterator;
014    
015    import jcolibri.cbrcore.Attribute;
016    import jcolibri.cbrcore.CBRCase;
017    import jcolibri.cbrcore.CBRQuery;
018    import jcolibri.cbrcore.CaseComponent;
019    import jcolibri.exception.AttributeAccessException;
020    import jcolibri.extensions.textual.IE.representation.IEText;
021    import jcolibri.extensions.textual.IE.representation.info.FeatureInfo;
022    import jcolibri.extensions.textual.IE.representation.info.PhraseInfo;
023    import jcolibri.util.ProgressController;
024    
025    /**
026     * This class implements a basic information extractor.<br>
027     * For each Case Component of a case or query, this method obtains the 
028     * features or phrases extracted in its textual attributes and copies the values
029     * into the other attributes of the component.<br>
030     * To copy the features, this method looks for attributes with the same name that
031     * the featues and typed as Strings. Then it copies the value of the feature.
032     * If there are many features it concatenates their values separated by a white space.
033     * <br>
034     * With the phrases it does something similar: finds attributes with the same name but
035     * typed as booleans. If so, it changes the boolean to true.
036     * <p>
037     * First version was developed at: Robert Gordon University - Aberdeen & Facultad Informática,
038     * Universidad Complutense de Madrid (GAIA)
039     * 
040     * @author Juan A. Recio-Garcia
041     * @version 2.0
042     * @see jcolibri.cbrcore.CaseComponent
043     */
044    public class BasicInformationExtractor
045    {
046        /**
047         * Performs the algorithm in a collection of cases.
048         */
049        public static void extractInformation(Collection<CBRCase> cases)
050        {
051            org.apache.commons.logging.LogFactory.getLog(BasicInformationExtractor.class).info("Extracting cases information.");
052            ProgressController.init(BasicInformationExtractor.class, "Extracting cases information ...", cases.size());
053            for (CBRCase c : cases)
054            {
055                extractInformation(c.getDescription());
056                extractInformation(c.getSolution());
057                extractInformation(c.getDescription());
058                extractInformation(c.getDescription());
059                ProgressController.step(BasicInformationExtractor.class);
060            }
061            ProgressController.finish(BasicInformationExtractor.class);
062        }
063    
064        /**
065         * Performs the algorithm in a query.
066         */
067        public static void extractInformation(CBRQuery query)
068        {
069            org.apache.commons.logging.LogFactory.getLog(BasicInformationExtractor.class).info("Extracting query information.");
070            extractInformation(query.getDescription());
071        }
072    
073        /**
074         * Extracts the information of a given CaseComponent
075         * @param cc
076         */
077        private static void extractInformation(CaseComponent cc)
078        {
079            if(cc == null)
080                return;
081            try
082            {
083                Attribute[] attrs = jcolibri.util.AttributeUtils.getAttributes(cc.getClass());
084    
085                // Find the texts and other attributes
086                ArrayList<IEText> texts = new ArrayList<IEText>();
087                ArrayList<Attribute> other = new ArrayList<Attribute>();
088                for (int i = 0; i < attrs.length; i++)
089                {
090                    Object o = attrs[i].getValue(cc);
091                    if (o instanceof CaseComponent)
092                        extractInformation((CaseComponent) o);
093                    else if (o instanceof IEText)
094                        texts.add((IEText) o);
095                    else
096                        other.add(attrs[i]);
097    
098                }
099    
100                // Obtain all features and phrases
101                ArrayList<PhraseInfo> phrases = new ArrayList<PhraseInfo>();
102                ArrayList<FeatureInfo> features = new ArrayList<FeatureInfo>();
103                for (IEText text : texts)
104                {
105                    phrases.addAll(text.getPhrases());
106                    features.addAll(text.getFeatures());
107                }
108    
109                // find a proper value for each attribute. If its type is:
110                // String: find a feature
111                // Phrase: find a phrase
112                for (Attribute at : other)
113                {
114                    String name = at.getName();
115                    if (at.getType().equals(String.class))
116                    {
117                        String value = "";
118                        for (FeatureInfo feature : features)
119                            if (feature.getFeature().equalsIgnoreCase(name))
120                                value += feature.getValue()+" ";
121                        if (value.length() > 0)
122                        {
123                            at.setValue(cc, value);
124                            org.apache.commons.logging.LogFactory.getLog(BasicInformationExtractor.class).debug("Adding features to attribute: "+at.getName()+" <- "+value);
125                        }
126    
127                    } else if (at.getType().equals(Boolean.class))
128                    {
129                        Boolean phrase = new Boolean(false);
130                        for (Iterator<PhraseInfo> iter = phrases.iterator(); iter.hasNext() && !phrase.booleanValue();)
131                        {
132                            PhraseInfo p = iter.next();
133                            if (p.getPhrase().equalsIgnoreCase(name))
134                            {
135                                phrase = Boolean.TRUE;
136                                at.setValue(cc, phrase);
137                                org.apache.commons.logging.LogFactory.getLog(BasicInformationExtractor.class).debug("Enabling attribute: "+at.getName()+". Source: "+p.getPhrase());
138    
139                            }
140                        }
141                    }
142                }
143            } catch (AttributeAccessException e)
144            {
145                org.apache.commons.logging.LogFactory.getLog(BasicInformationExtractor.class).error(e);
146    
147            }
148    
149        }
150    }