001    /**
002     * CarrotClusteringResult.java
003     * jCOLIBRI2 framework. 
004     * @author Juan A. Recio-García.
005     * GAIA - Group for Artificial Intelligence Applications
006     * http://gaia.fdi.ucm.es
007     * 17/05/2007
008     */
009    package jcolibri.extensions.textual.carrot2;
010    
011    import java.util.ArrayList;
012    import java.util.Iterator;
013    import java.util.List;
014    
015    import jcolibri.extensions.textual.lucene.LuceneDocument;
016    import jcolibri.extensions.textual.lucene.LuceneIndex;
017    
018    import org.carrot2.core.clustering.RawCluster;
019    import org.carrot2.core.clustering.RawDocument;
020    import org.carrot2.core.impl.ArrayOutputComponent;
021    
022    /**
023     * Result of a clustering.
024     * Uses an internal class "Cluster" that stores the requiered information for each cluster:
025     * <ul>
026     * <li>The labels assigned to the cluster.
027     * <li>The documents that belong to the cluster (LuceneDocuments).
028     * </ul>
029     * 
030     * @author Juan A. Recio-García
031     * @version 1.0
032     * @see jcolibri.extensions.textual.lucene.LuceneDocument
033     */
034    public class CarrotClusteringResult {
035    
036            private ArrayList<CarrotClusteringResult.Cluster> clusters;
037            
038            /**
039             * Internal class that stores the labels and documents for a cluster.
040             * @author Juan A. Recio-García
041             */
042            public class Cluster
043            {
044                    List<String> labels;
045                    List<LuceneDocument> docs;
046                    protected Cluster(List<String> labels, List<LuceneDocument> docs)
047                    {
048                            this.labels = labels;
049                            this.docs   = docs;
050                    }
051                    /**
052                     * @return the documents of the cluster
053                     */
054                    public List<LuceneDocument> getDocs() {
055                            return docs;
056                    }
057                    /**
058                     * @return the labels of the cluster
059                     */
060                    public List<String> getLabels() {
061                            return labels;
062                    }
063                    
064                    
065            }
066            
067            /**
068             * Creates a CarrotClusteringResult object from the Carrot2 output.
069             */
070            @SuppressWarnings("unchecked")
071            protected CarrotClusteringResult(ArrayOutputComponent.Result result, LuceneIndex index)
072            {
073                    clusters = new ArrayList<CarrotClusteringResult.Cluster>();
074                    
075            final List carrotClusters = result.clusters;
076            for (Iterator i = carrotClusters.iterator(); i.hasNext(); )
077            {
078                RawCluster rawc = (RawCluster) i.next();
079                List<String> labels = rawc.getClusterDescription();
080                ArrayList<LuceneDocument> docs = new ArrayList<LuceneDocument>();
081                for (Iterator d = rawc.getDocuments().iterator(); d.hasNext(); ) 
082                {
083                    RawDocument document = (RawDocument) d.next();
084                    LuceneDocument ld = index.getDocument(document.getTitle());
085                    docs.add(ld);
086                }
087                
088                CarrotClusteringResult.Cluster c =  new Cluster(labels,docs);
089                clusters.add(c);
090            }
091    
092            }
093            
094            /**
095             * Returns the list of clusters.
096             */
097            public List<Cluster> getClusters()
098            {
099                    return this.clusters;
100            }
101            
102    }