Package org.carrot2.examples.clustering

Source Code of org.carrot2.examples.clustering.ClusteringDataFromDocumentSources

/*
* Carrot2 project.
*
* Copyright (C) 2002-2014, Dawid Weiss, Stanisław Osiński.
* All rights reserved.
*
* Refer to the full license file "carrot2.LICENSE"
* in the root folder of the repository checkout or at:
* http://www.carrot2.org/carrot2.LICENSE
*/

package org.carrot2.examples.clustering;

import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.carrot2.clustering.lingo.LingoClusteringAlgorithm;
import org.carrot2.clustering.stc.STCClusteringAlgorithm;
import org.carrot2.core.Cluster;
import org.carrot2.core.Controller;
import org.carrot2.core.ControllerFactory;
import org.carrot2.core.Document;
import org.carrot2.core.IDocumentSource;
import org.carrot2.core.ProcessingResult;
import org.carrot2.core.attribute.CommonAttributesDescriptor;
import org.carrot2.examples.ConsoleFormatter;
import org.carrot2.source.etools.EToolsDocumentSource;
import org.carrot2.source.microsoft.Bing3WebDocumentSource;
import org.carrot2.source.microsoft.Bing3WebDocumentSourceDescriptor;

/**
* This example shows how to cluster {@link Document}s retrieved from
* {@link IDocumentSource}s. There are a number of implementations of this interface in the
* Carrot2 project, in this example we will cluster results from Microsoft Live (Web
* search).
*
* <p>
* It is assumed that you are familiar with {@link ClusteringDocumentList} example.
* </p>
* @see ClusteringDocumentList
* @see UsingCachingController
*/
public class ClusteringDataFromDocumentSources
{
    @SuppressWarnings("unused")
    public static void main(String [] args)
    {
        /* [[[start:clustering-data-from-document-sources-simple-intro]]]
         *
         * <div>
         * One common way to use Carrot2 Java API is to fetch a number of documents
         * from some {@link org.carrot2.core.IDocumentSource} and cluster them using some
         * {@link org.carrot2.core.IClusteringAlgorithm}. The simplest yet least flexible
         * way to do it is to use the {@link org.carrot2.core.Controller#process(String, Integer, Class...)}
         * method from the {@link org.carrot2.core.Controller}. The code shown below retrieves
         * 100 search results for query <em>data mining</em> from
         * {@link org.carrot2.source.etools.EToolsDocumentSource} and clusters them using
         * the {@link org.carrot2.clustering.lingo.LingoClusteringAlgorithm}.
         * </div>
         *
         * [[[end:clustering-data-from-document-sources-simple-intro]]]
         */
        {
            /// [[[start:clustering-data-from-document-sources-simple]]]
            /* A controller to manage the processing pipeline. */
            final Controller controller = ControllerFactory.createSimple();

            /* Perform processing */
            final ProcessingResult result = controller.process("data mining", 100,
                EToolsDocumentSource.class, LingoClusteringAlgorithm.class);
   
            /* Documents fetched from the document source, clusters created by Carrot2. */
            final List<Document> documents = result.getDocuments();
            final List<Cluster> clusters = result.getClusters();
            /// [[[end:clustering-data-from-document-sources-simple]]]
           
            ConsoleFormatter.displayResults(result);
        }
       
        /* [[[start:clustering-data-from-document-sources-advanced-intro]]]
         *
         * If your production code needs to fetch documents from popular search engines,
         * it is very important that you generate and use your own API key rather than Carrot2's
         * default one. You can pass the API key along with the query and the requested
         * number of results in an attribute map. Carrot2 manual lists all supported attributes
         * along with their keys, types and allowed values. The code shown below, fetches and clusters
         * 50 results from {@link org.carrot2.source.microsoft.Bing3WebDocumentSource}.
         *
         * [[[end:clustering-data-from-document-sources-advanced-intro]]]
         */
        {
            /// [[[start:clustering-data-from-document-sources-advanced]]]
            /* A controller to manage the processing pipeline. */
            final Controller controller = ControllerFactory.createSimple();
   
            /* Prepare attributes */
            final Map<String, Object> attributes = new HashMap<String, Object>();
           
            /* Put your own API key here! */
            Bing3WebDocumentSourceDescriptor.attributeBuilder(attributes)
                .appid(BingKeyAccess.getKey());

            /* Query an the required number of results */
            attributes.put(CommonAttributesDescriptor.Keys.QUERY, "clustering");
            attributes.put(CommonAttributesDescriptor.Keys.RESULTS, 50);
   
            /* Perform processing */
            final ProcessingResult result = controller.process(attributes,
                Bing3WebDocumentSource.class, STCClusteringAlgorithm.class);

            /* Documents fetched from the document source, clusters created by Carrot2. */
            final List<Document> documents = result.getDocuments();
            final List<Cluster> clusters = result.getClusters();
            /// [[[end:clustering-data-from-document-sources-advanced]]]
   
            ConsoleFormatter.displayResults(result);
        }
    }
}
TOP

Related Classes of org.carrot2.examples.clustering.ClusteringDataFromDocumentSources

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.