001 /** 002 * CRNCaseBase.java 003 * jCOLIBRI2 framework. 004 * @author 005 * GAIA - Group for Artificial Intelligence Applications 006 * http://gaia.fdi.ucm.es 007 * 14/12/2007 008 */ 009 package jcolibri.extensions.textual.IE.common.crn; 010 011 import java.util.ArrayList; 012 import java.util.Collection; 013 import java.util.HashMap; 014 import java.util.Iterator; 015 import java.util.List; 016 import java.util.Map.Entry; 017 018 import jcolibri.cbrcore.Attribute; 019 import jcolibri.cbrcore.CBRCase; 020 import jcolibri.cbrcore.CBRCaseBase; 021 import jcolibri.cbrcore.CaseBaseFilter; 022 import jcolibri.cbrcore.Connector; 023 import jcolibri.extensions.textual.IE.common.crn.matrix.FloatMatrix; 024 import jcolibri.extensions.textual.IE.common.crn.matrix.Matrix; 025 import jcolibri.extensions.textual.IE.representation.IEText; 026 import jcolibri.extensions.textual.IE.representation.Token; 027 import jcolibri.util.AttributeUtils; 028 029 /** 030 * @author 031 * @version 1.0 032 * 033 */ 034 public class CRNCaseBase implements CBRCaseBase 035 { 036 037 /**********************************************/ 038 /*** Singleton */ 039 /**********************************************/ 040 041 // This class is implemented as a singleton because 042 // must be accessed from the similarity measures 043 044 private static CRNCaseBase _instance = null; 045 046 public static CRNCaseBase getInstance() 047 { 048 if(_instance == null) 049 _instance = new CRNCaseBase(); 050 return _instance; 051 } 052 053 private CRNCaseBase() 054 { 055 //nothing 056 } 057 058 /**********************************************/ 059 /*** Attributes */ 060 /**********************************************/ 061 062 private jcolibri.cbrcore.Connector connector; 063 private java.util.Collection<CBRCase> cases; 064 065 /*********************************************/ 066 /*** CBRCaseBase implementation */ 067 /*********************************************/ 068 069 070 public void init(Connector connector) { 071 this.connector = connector; 072 cases = this.connector.retrieveAllCases(); 073 } 074 075 /* (non-Javadoc) 076 * @see jcolibri.cbrcore.CBRCaseBase#deInit() 077 */ 078 public void close() { 079 this.connector.close(); 080 081 } 082 083 /* (non-Javadoc) 084 * @see jcolibri.cbrcore.CBRCaseBase#forgetCases(java.util.Collection) 085 */ 086 public void forgetCases(Collection<CBRCase> cases) { 087 // TODO Auto-generated method stub 088 089 } 090 091 /* (non-Javadoc) 092 * @see jcolibri.cbrcore.CBRCaseBase#getCases() 093 */ 094 public Collection<CBRCase> getCases() { 095 return cases; 096 } 097 098 /* (non-Javadoc) 099 * @see jcolibri.cbrcore.CBRCaseBase#getCases(jcolibri.cbrcore.CaseBaseFilter) 100 */ 101 public Collection<CBRCase> getCases(CaseBaseFilter filter) { 102 // TODO Auto-generated method stub 103 return null; 104 } 105 106 107 /* (non-Javadoc) 108 * @see jcolibri.cbrcore.CBRCaseBase#learnCases(java.util.Collection) 109 */ 110 public void learnCases(Collection<CBRCase> cases) { 111 connector.storeCases(cases); 112 this.cases.addAll(cases); 113 114 } 115 116 /*********************************************/ 117 /*** CRN generation method */ 118 /*********************************************/ 119 120 private HashMap<Attribute,CRNTable> crnTables; 121 122 public void createCRN() 123 { 124 crnTables = new HashMap<Attribute,CRNTable>(); 125 126 // for each case 127 for(CBRCase c: cases) 128 { 129 //Obtain textual attributes of the case 130 ArrayList<Attribute> textualAttributes = new ArrayList<Attribute>(); 131 textualAttributes.addAll(AttributeUtils.getAttributes(c.getDescription(), IEText.class)); 132 if(c.getSolution()!=null) 133 textualAttributes.addAll(AttributeUtils.getAttributes(c.getSolution(), IEText.class)); 134 if(c.getResult() != null) 135 textualAttributes.addAll(AttributeUtils.getAttributes(c.getResult(), IEText.class)); 136 if(c.getJustificationOfSolution() != null) 137 textualAttributes.addAll(AttributeUtils.getAttributes(c.getJustificationOfSolution(), IEText.class)); 138 139 // for each attribute 140 for(Attribute textualAtt: textualAttributes) 141 { 142 //Obtain the corresponding CRNTable 143 CRNTable crn = crnTables.get(textualAtt); 144 // if null create the table and store it into the hash map 145 if(crn==null) 146 { 147 crn = new CRNTable(); 148 crnTables.put(textualAtt, crn); 149 } 150 151 //Obtain the value of the attribute in the case 152 IEText text = (IEText) AttributeUtils.findValue(textualAtt, c); 153 //Obtain the tokens 154 List<Token> tokens = text.getAllTokens(); 155 156 // Do something with the tokens and the crn ... 157 158 ArrayList<String> indexedTokens= new ArrayList<String>(); 159 Iterator<Token> i= tokens.iterator(); 160 while (i.hasNext()) 161 { 162 short count=0; 163 Token t= i.next(); 164 if(t.isStopWord()) 165 continue; 166 String stem = t.getStem(); 167 if (!indexedTokens.contains(stem)) 168 { 169 Iterator<Token> j= tokens.iterator(); 170 while (j.hasNext()) 171 { 172 Token t1= j.next(); 173 if(t1.isStopWord()) 174 continue; 175 if (stem.equals(t1.getStem())) 176 count++; 177 } 178 indexedTokens.add(stem); 179 CRNIndexEntry e= new CRNIndexEntry(stem,c.getID(),count); 180 crn.addEntry(e); 181 } 182 183 } 184 } 185 } 186 for(Entry<Attribute,CRNTable> entry: crnTables.entrySet()) 187 { 188 org.apache.commons.logging.LogFactory.getLog(this.getClass()).info("Generating matrices for attribute: "+entry.getKey().getName()); 189 entry.getValue().computeMatrix(); 190 } 191 192 } 193 194 public double getSimilarity(IEText queryText, Attribute textualAttribute, Object caseId) 195 { 196 //Obtain the corresponding CRN table 197 CRNTable table = crnTables.get(textualAttribute); 198 ArrayList<String> cbTokens= table.getUnits(); 199 200 //Obtain the similarity from the table using the queryText and the caseId 201 List<Token> queryTokens= queryText.getAllTokens(); 202 float[] queryVector= new float[cbTokens.size()]; 203 204 205 for(Token t: queryTokens) 206 { 207 if(t.isStopWord()) 208 continue; 209 String stem = t.getStem(); 210 for (int p=0;p<cbTokens.size();p++) 211 { 212 if (stem.equals(cbTokens.get(p))) 213 queryVector[p]+=1; 214 } 215 } 216 217 float sum=0.000001f; 218 219 for (int a=0;a<queryVector.length;a++) 220 sum+= queryVector[a]; 221 222 float[][] query2DVector= new float[1][cbTokens.size()]; 223 for (int a=0;a<queryVector.length;a++) //Normalisation 224 query2DVector[0][a]= (queryVector[a]/ (float)Math.sqrt(sum)); 225 226 Matrix queryMatrix= new FloatMatrix(query2DVector); 227 228 Matrix caseMatrix= table.getNormalisedUnitsCasesMatrix(); 229 230 int column = table.getCaseIds().indexOf(caseId); 231 232 return queryMatrix.multiply(caseMatrix, 0, column); //weights[0][table.getCaseIds().indexOf(caseId)]; 233 234 } 235 236 237 }