001 /** 002 * CarrotClusteringResult.java 003 * jCOLIBRI2 framework. 004 * @author Juan A. Recio-García. 005 * GAIA - Group for Artificial Intelligence Applications 006 * http://gaia.fdi.ucm.es 007 * 17/05/2007 008 */ 009 package jcolibri.extensions.textual.carrot2; 010 011 import java.util.ArrayList; 012 import java.util.Iterator; 013 import java.util.List; 014 015 import jcolibri.extensions.textual.lucene.LuceneDocument; 016 import jcolibri.extensions.textual.lucene.LuceneIndex; 017 018 import org.carrot2.core.clustering.RawCluster; 019 import org.carrot2.core.clustering.RawDocument; 020 import org.carrot2.core.impl.ArrayOutputComponent; 021 022 /** 023 * Result of a clustering. 024 * Uses an internal class "Cluster" that stores the requiered information for each cluster: 025 * <ul> 026 * <li>The labels assigned to the cluster. 027 * <li>The documents that belong to the cluster (LuceneDocuments). 028 * </ul> 029 * 030 * @author Juan A. Recio-García 031 * @version 1.0 032 * @see jcolibri.extensions.textual.lucene.LuceneDocument 033 */ 034 public class CarrotClusteringResult { 035 036 private ArrayList<CarrotClusteringResult.Cluster> clusters; 037 038 /** 039 * Internal class that stores the labels and documents for a cluster. 040 * @author Juan A. Recio-García 041 */ 042 public class Cluster 043 { 044 List<String> labels; 045 List<LuceneDocument> docs; 046 protected Cluster(List<String> labels, List<LuceneDocument> docs) 047 { 048 this.labels = labels; 049 this.docs = docs; 050 } 051 /** 052 * @return the documents of the cluster 053 */ 054 public List<LuceneDocument> getDocs() { 055 return docs; 056 } 057 /** 058 * @return the labels of the cluster 059 */ 060 public List<String> getLabels() { 061 return labels; 062 } 063 064 065 } 066 067 /** 068 * Creates a CarrotClusteringResult object from the Carrot2 output. 069 */ 070 @SuppressWarnings("unchecked") 071 protected CarrotClusteringResult(ArrayOutputComponent.Result result, LuceneIndex index) 072 { 073 clusters = new ArrayList<CarrotClusteringResult.Cluster>(); 074 075 final List carrotClusters = result.clusters; 076 for (Iterator i = carrotClusters.iterator(); i.hasNext(); ) 077 { 078 RawCluster rawc = (RawCluster) i.next(); 079 List<String> labels = rawc.getClusterDescription(); 080 ArrayList<LuceneDocument> docs = new ArrayList<LuceneDocument>(); 081 for (Iterator d = rawc.getDocuments().iterator(); d.hasNext(); ) 082 { 083 RawDocument document = (RawDocument) d.next(); 084 LuceneDocument ld = index.getDocument(document.getTitle()); 085 docs.add(ld); 086 } 087 088 CarrotClusteringResult.Cluster c = new Cluster(labels,docs); 089 clusters.add(c); 090 } 091 092 } 093 094 /** 095 * Returns the list of clusters. 096 */ 097 public List<Cluster> getClusters() 098 { 099 return this.clusters; 100 } 101 102 }