001    /**
002     * CRNCaseBase.java
003     * jCOLIBRI2 framework. 
004     * @author 
005     * GAIA - Group for Artificial Intelligence Applications
006     * http://gaia.fdi.ucm.es
007     * 14/12/2007
008     */
009    package jcolibri.extensions.textual.IE.common.crn;
010    
011    import java.util.ArrayList;
012    import java.util.Collection;
013    import java.util.HashMap;
014    import java.util.Iterator;
015    import java.util.List;
016    import java.util.Map.Entry;
017    
018    import jcolibri.cbrcore.Attribute;
019    import jcolibri.cbrcore.CBRCase;
020    import jcolibri.cbrcore.CBRCaseBase;
021    import jcolibri.cbrcore.CaseBaseFilter;
022    import jcolibri.cbrcore.Connector;
023    import jcolibri.extensions.textual.IE.common.crn.matrix.FloatMatrix;
024    import jcolibri.extensions.textual.IE.common.crn.matrix.Matrix;
025    import jcolibri.extensions.textual.IE.representation.IEText;
026    import jcolibri.extensions.textual.IE.representation.Token;
027    import jcolibri.util.AttributeUtils;
028    
029    /**
030     * @author 
031     * @version 1.0
032     *
033     */
034    public class CRNCaseBase implements CBRCaseBase
035    {
036    
037            /**********************************************/
038            /***              Singleton                   */
039            /**********************************************/
040    
041            // This class is implemented as a singleton because
042            // must be accessed from the similarity measures
043        
044            private static CRNCaseBase _instance = null;
045        
046            public static CRNCaseBase getInstance()
047            {
048                if(_instance == null)
049                   _instance = new CRNCaseBase();
050                return _instance;
051            }
052            
053            private CRNCaseBase()
054            {
055                //nothing
056            }
057            
058            /**********************************************/
059            /***             Attributes                   */
060            /**********************************************/
061        
062            private jcolibri.cbrcore.Connector connector;
063            private java.util.Collection<CBRCase> cases;
064    
065            /*********************************************/
066            /***         CBRCaseBase implementation      */  
067            /*********************************************/
068    
069            
070            public void init(Connector connector) {
071                    this.connector = connector;
072                    cases = this.connector.retrieveAllCases();      
073            }
074            
075            /* (non-Javadoc)
076             * @see jcolibri.cbrcore.CBRCaseBase#deInit()
077             */
078            public void close() {
079                    this.connector.close();
080    
081            }
082    
083            /* (non-Javadoc)
084             * @see jcolibri.cbrcore.CBRCaseBase#forgetCases(java.util.Collection)
085             */
086            public void forgetCases(Collection<CBRCase> cases) {
087                    // TODO Auto-generated method stub
088    
089            }
090    
091            /* (non-Javadoc)
092             * @see jcolibri.cbrcore.CBRCaseBase#getCases()
093             */
094            public Collection<CBRCase> getCases() {
095                    return cases;
096            }
097    
098            /* (non-Javadoc)
099             * @see jcolibri.cbrcore.CBRCaseBase#getCases(jcolibri.cbrcore.CaseBaseFilter)
100             */
101            public Collection<CBRCase> getCases(CaseBaseFilter filter) {
102                    // TODO Auto-generated method stub
103                    return null;
104            }
105    
106    
107            /* (non-Javadoc)
108             * @see jcolibri.cbrcore.CBRCaseBase#learnCases(java.util.Collection)
109             */
110            public void learnCases(Collection<CBRCase> cases) {
111                    connector.storeCases(cases);
112                    this.cases.addAll(cases);
113    
114            }
115    
116            /*********************************************/
117            /***         CRN generation method           */  
118            /*********************************************/
119            
120            private HashMap<Attribute,CRNTable> crnTables;
121            
122            public void createCRN()
123            {
124                crnTables = new HashMap<Attribute,CRNTable>();
125                
126                // for each case
127                for(CBRCase c: cases)
128                {
129                    //Obtain textual attributes of the case
130                    ArrayList<Attribute> textualAttributes = new ArrayList<Attribute>();
131                    textualAttributes.addAll(AttributeUtils.getAttributes(c.getDescription(), IEText.class));
132                    if(c.getSolution()!=null)
133                        textualAttributes.addAll(AttributeUtils.getAttributes(c.getSolution(), IEText.class));
134                    if(c.getResult() != null)
135                        textualAttributes.addAll(AttributeUtils.getAttributes(c.getResult(), IEText.class));
136                    if(c.getJustificationOfSolution() != null)
137                        textualAttributes.addAll(AttributeUtils.getAttributes(c.getJustificationOfSolution(), IEText.class));
138                    
139                    // for each attribute
140                    for(Attribute textualAtt: textualAttributes)
141                    {
142                        //Obtain the corresponding CRNTable
143                        CRNTable crn = crnTables.get(textualAtt);
144                        // if null create the table and store it into the hash map
145                        if(crn==null)
146                        {   
147                            crn = new CRNTable();
148                            crnTables.put(textualAtt, crn);
149                        }
150                        
151                        //Obtain the value of the attribute in the case
152                        IEText text = (IEText) AttributeUtils.findValue(textualAtt, c);
153                        //Obtain the tokens
154                        List<Token> tokens = text.getAllTokens();
155                        
156                        // Do something with the tokens and the crn ...
157                        
158                        ArrayList<String> indexedTokens= new ArrayList<String>();
159                        Iterator<Token> i= tokens.iterator();
160                        while (i.hasNext())
161                        {
162                            short count=0;
163                            Token t= i.next();
164                            if(t.isStopWord())
165                                continue;
166                            String stem = t.getStem();
167                            if (!indexedTokens.contains(stem))
168                            {
169                                    Iterator<Token> j= tokens.iterator();
170                                    while (j.hasNext())
171                                    {
172                                            Token t1= j.next();
173                                            if(t1.isStopWord())
174                                                continue;
175                                            if (stem.equals(t1.getStem()))
176                                                    count++;
177                                    }
178                                    indexedTokens.add(stem);
179                                    CRNIndexEntry e= new CRNIndexEntry(stem,c.getID(),count);
180                                    crn.addEntry(e);
181                            }
182                            
183                        }
184                    }
185                }
186                for(Entry<Attribute,CRNTable> entry: crnTables.entrySet())
187                {
188                    org.apache.commons.logging.LogFactory.getLog(this.getClass()).info("Generating matrices for attribute: "+entry.getKey().getName());
189                    entry.getValue().computeMatrix();
190                }
191                
192            }
193            
194            public double getSimilarity(IEText queryText, Attribute textualAttribute, Object caseId)
195            {
196                //Obtain the corresponding CRN table
197                CRNTable table = crnTables.get(textualAttribute);
198                ArrayList<String> cbTokens= table.getUnits();
199                
200                //Obtain the similarity from the table using the queryText and the caseId
201                List<Token> queryTokens= queryText.getAllTokens();
202                float[] queryVector= new float[cbTokens.size()];
203                
204                
205                for(Token t: queryTokens)
206                {
207                    if(t.isStopWord())
208                        continue;
209                    String stem = t.getStem();
210                    for (int p=0;p<cbTokens.size();p++)
211                    {
212                            if (stem.equals(cbTokens.get(p)))
213                                    queryVector[p]+=1;
214                    }
215                }
216                
217            float sum=0.000001f;
218            
219            for (int a=0;a<queryVector.length;a++)
220                sum+= queryVector[a];
221            
222            float[][] query2DVector= new float[1][cbTokens.size()];
223            for (int a=0;a<queryVector.length;a++)  //Normalisation
224                query2DVector[0][a]= (queryVector[a]/ (float)Math.sqrt(sum));
225    
226            Matrix queryMatrix= new FloatMatrix(query2DVector);
227            
228            Matrix caseMatrix= table.getNormalisedUnitsCasesMatrix();
229                                        
230            int column = table.getCaseIds().indexOf(caseId);
231            
232            return queryMatrix.multiply(caseMatrix, 0, column); //weights[0][table.getCaseIds().indexOf(caseId)];
233    
234            }
235            
236            
237    }