Package org.carrot2.examples.research

Source Code of org.carrot2.examples.research.ClusteringQualityBenchmark

/*
* Carrot2 project.
*
* Copyright (C) 2002-2014, Dawid Weiss, Stanisław Osiński.
* All rights reserved.
*
* Refer to the full license file "carrot2.LICENSE"
* in the root folder of the repository checkout or at:
* http://www.carrot2.org/carrot2.LICENSE
*/

package org.carrot2.examples.research;

import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Map;

import org.carrot2.clustering.lingo.LingoClusteringAlgorithm;
import org.carrot2.clustering.stc.STCClusteringAlgorithm;
import org.carrot2.core.*;
import org.carrot2.output.metrics.ClusteringMetricsCalculator;
import org.carrot2.output.metrics.ContaminationMetricDescriptor;
import org.carrot2.output.metrics.NormalizedMutualInformationMetricDescriptor;
import org.carrot2.output.metrics.PrecisionRecallMetricDescriptor;
import org.carrot2.source.ambient.AmbientDocumentSource;
import org.carrot2.source.ambient.AmbientDocumentSource.AmbientTopic;
import org.carrot2.source.ambient.AmbientDocumentSourceDescriptor;
import org.carrot2.text.util.TabularOutput;

import com.google.common.collect.Lists;
import com.google.common.collect.Maps;

/**
* Runs a clustering quality benchmark based on the data set embedded in
* {@link AmbientDocumentSource}.
*/
public class ClusteringQualityBenchmark
{
    public static void main(String [] args)
    {
        // Disable excessive logging
        final AmbientTopic [] topics = AmbientDocumentSource.AmbientTopic.values();
        final Controller controller = ControllerFactory.createSimple();

        // List of algorithms to test
        final ArrayList<Class<? extends IProcessingComponent>> algorithms = Lists
            .newArrayList();
        algorithms.add(LingoClusteringAlgorithm.class);
        algorithms.add(STCClusteringAlgorithm.class);

        TabularOutput t = new TabularOutput(new PrintWriter(System.out));
        t.columnSeparator(" | ");
        t.defaultFormat(Double.class).format("%.3f");
        t.addColumn("Topic").alignLeft().format("%-18s");
        t.addColumn("Algorithm").alignLeft().format("%-15s");

        for (AmbientTopic topic : topics)
        {
            for (Class<? extends IProcessingComponent> algorithm : algorithms)
            {
                final Map<String, Object> attributes = Maps.newHashMap();
                AmbientDocumentSourceDescriptor.attributeBuilder(attributes).topic(topic);

                ProcessingResult result = controller.process(
                    attributes, AmbientDocumentSource.class, algorithm, ClusteringMetricsCalculator.class);

                t.rowData("Topic", topic.name());
                t.rowData("Algorithm", algorithm.getSimpleName());

                Map<String, Object> attrs = result.getAttributes();

                t.rowData(
                    "Contamination",
                    attrs.get(ContaminationMetricDescriptor.Keys.WEIGHTED_AVERAGE_CONTAMINATION));

                t.rowData(
                    "F-Score",
                    attrs.get(PrecisionRecallMetricDescriptor.Keys.WEIGHTED_AVERAGE_F_MEASURE));

                t.rowData(
                    "Precision",
                    attrs.get(PrecisionRecallMetricDescriptor.Keys.WEIGHTED_AVERAGE_PRECISION));

                t.rowData(
                    "Recall",
                    attrs.get(PrecisionRecallMetricDescriptor.Keys.WEIGHTED_AVERAGE_RECALL));

                t.rowData(
                    "NMI",
                    attrs.get(NormalizedMutualInformationMetricDescriptor.Keys.NORMALIZED_MUTUAL_INFORMATION));

                t.nextRow();
            }
        }

    }
}
TOP

Related Classes of org.carrot2.examples.research.ClusteringQualityBenchmark

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.