Examples of org.carrot2.core.Cluster

org.carrot2.core.Cluster
A cluster (group) of {@link Document}s. Each cluster has a human-readable label consisting of one or more phrases, a list of documents it contains and a list of its subclusters. Optionally, additional attributes can be associated with a cluster, e.g. {@link #OTHER_TOPICS}. This class is not thread-safe.

        processingAttributes.put(AttributeUtils.getKey(ByFieldClusteringAlgorithm.class,
            "fieldName"), FIELD);
        final List<Cluster> clusters = cluster(documents).getClusters();
        if (unclustered.length > 0)
        {
            final Cluster otherTopics = new Cluster("Other Topics", unclustered);
            otherTopics.setOtherTopics(true);
            expectedClusters.add(otherTopics);
        }
        assertThatClusters(clusters).isEquivalentTo(expectedClusters);
    }

View Full Code Here

                }
            }


            for (int i = 0; i < rawClusters.size(); i++)
            {
                final Cluster cluster = new Cluster();


                final IntArrayList rawCluster = rawClusters.get(i);
                if (rawCluster.size() > 1)
                {
                    cluster.addPhrases(getLabels(rawCluster,
                        vsmContext.termDocumentMatrix, rowToStemIndex,
                        preprocessingContext.allStems.mostFrequentOriginalWordIndex,
                        preprocessingContext.allWords.image));
                    for (int j = 0; j < rawCluster.size(); j++)
                    {
                        cluster.addDocuments(documents.get(rawCluster.get(j)));
                    }
                    clusters.add(cluster);
                }
            }
        }

View Full Code Here

            final int [] clusterLabelIndex = lingoContext.clusterLabelFeatureIndex;
            final BitSet [] clusterDocuments = lingoContext.clusterDocuments;
            final double [] clusterLabelScore = lingoContext.clusterLabelScore;
            for (int i = 0; i < clusterLabelIndex.length; i++)
            {
                final Cluster cluster = new Cluster();


                final int labelFeature = clusterLabelIndex[i];
                if (labelFeature < 0)
                {
                    // Cluster removed during merging
                    continue;
                }


                // Add label and score
                cluster.addPhrases(labelFormatter.format(context, labelFeature));
                cluster.setAttribute(Cluster.SCORE, clusterLabelScore[i]);


                // Add documents
                final BitSet bs = clusterDocuments[i];
                for (int bit = bs.nextSetBit(0); bit >= 0; bit = bs.nextSetBit(bit + 1))
                {
                    cluster.addDocuments(documents.get(bit));
                }


                // Add cluster
                clusters.add(cluster);
            }

View Full Code Here

            final Set<Document> partitionDocuments = documentsByPartition.get(partition);
            final int partitionDocumentsCount = partitionDocuments.size();
            double partitionFMeasure = 0;
            double partitionPrecision = 0;
            double partitionRecall = 0;
            Cluster bestFMeasureCluster = null;


            for (Cluster cluster : clusters)
            {
                final List<Document> clusterDocuments = cluster.getAllDocuments();
                if (cluster.isOtherTopics() || clusterDocuments.size() == 0)
                {
                    continue;
                }


                final Set<Document> commonDocuments = Sets.newHashSet(partitionDocuments);
                commonDocuments.retainAll(clusterDocuments);


                final double precision = commonDocuments.size()
                    / (double) clusterDocuments.size();
                final double recall = commonDocuments.size()
                    / (double) partitionDocumentsCount;
                final double fMeasure = MathUtils.harmonicMean(precision, recall);


                if (fMeasure > partitionFMeasure)
                {
                    partitionFMeasure = fMeasure;
                    partitionPrecision = precision;
                    partitionRecall = recall;
                    bestFMeasureCluster = cluster;
                }
            }


            recallSum += partitionRecall * partitionDocumentsCount;
            precisionSum += partitionPrecision * partitionDocumentsCount;
            fMeasureSum += partitionFMeasure * partitionDocumentsCount;
            partitionDocumentsCountSum += partitionDocumentsCount;


            recallByPartition.put(partition, partitionRecall);
            precisionByPartition.put(partition, partitionPrecision);
            fMeasureByPartition.put(partition, partitionFMeasure);
            if (bestFMeasureCluster != null)
            {
                bestFMeasureCluster.setAttribute(BEST_F_MEASURE_PARTITION, partition);
            }
        }


        // Dividing by partitionDocumentsCountSum rather than by the number of documents
        // because partitionDocumentsCountSum can be larger than the number of documents

View Full Code Here

public class NormalizedMutualInformationMetricTest extends IdealPartitioningBasedMetricTest
{
    @Test
    public void testEmptyCluster()
    {
        check(null, new Cluster());
    }

View Full Code Here

    }


    @Test
    public void testTrivialCluster()
    {
        check(0.0, new Cluster("test", documentWithPartitions("test")));
    }

View Full Code Here

    }


    @Test
    public void testAllDocumentsInOtherTopics()
    {
        final Cluster otherTopics = clusterWithPartitions("t1", "t2", "t3");
        otherTopics.setOtherTopics(true);
        check(0.0, otherTopics);
    }

View Full Code Here


        Iterator<Cluster> expectedIt = expected.iterator();
        Iterator<Cluster> actualIt = actual.iterator();
        while (expectedIt.hasNext() && actualIt.hasNext())
        {
            final Cluster actualCluster = actualIt.next();
            final Cluster expectedCluster = expectedIt.next();
            for (ClusterPairCheck clusterPairAssert : clusterAsserts)
            {
                clusterPairAssert.check(actualCluster, expectedCluster);
            }


            assertThatClusters(actualCluster.getSubclusters()).as(
                description() + ": subclusters of \"" + actualCluster.getLabel() + "\"")
                .passRecursively(expectedCluster.getSubclusters(), clusterAsserts);
        }


        return this;
    }

View Full Code Here

        final IAdapterManager mgr = Platform.getAdapterManager();


        final BitSet newSelection = new BitSet();
        for (Object selected : ss.toArray())
        {
            final Cluster c = (Cluster) mgr.getAdapter(selected, Cluster.class);             
            if (c == null) continue;
            newSelection.set(c.getId());
        }
        
        return newSelection;
    }

View Full Code Here

        final String label;
        final int documentCount;


        if (element instanceof Cluster)
        {
            final Cluster cluster = (Cluster) element;
            label = cluster.getLabel();
            documentCount = cluster.size();
        }
        else
        {
            return "<unknown node: " + element + ">";
        }

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.carrot2.core.Cluster

com.carrotsearch.hppc.BitSet

com.carrotsearch.hppc.IntArrayList

com.carrotsearch.hppc.IntIntOpenHashMap

com.carrotsearch.hppc.IntStack

com.tamingtext.carrot2.Carrot2ExampleTest

org.apache.http.message.BasicNameValuePair

org.apache.lucene.search.IndexSearcher

org.apache.mahout.math.matrix.DoubleMatrix2D

org.carrot2.cli.batch.BatchApp

org.carrot2.clustering.kmeans.BisectingKMeansClusteringAlgorithm

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.