Package org.carrot2.core

Examples of org.carrot2.core.Controller$ProcessingStatistics


public class FetchAndSaveBingResponse
{
    public static void main(String [] args)
        throws Exception
    {
        final Controller controller = ControllerFactory.createSimple();
        try {
            String appid = System.getProperty(Bing3DocumentSource.SYSPROP_BING3_API);
            if (Strings.isNullOrEmpty(appid))
            {
                System.err.println("Provide Bing3 API key in property: "
                    + Bing3DocumentSource.SYSPROP_BING3_API);
            }

            final Map<String, Object> attributes = new HashMap<String, Object>();
            CommonAttributesDescriptor.attributeBuilder(attributes)
                .query("डाटा माइनिंग")
                .results(200);

            /* Put your own API key here or in a system property! */
            Bing3WebDocumentSourceDescriptor.attributeBuilder(attributes)
                .appid(appid)
                .market((MarketOption) null);

            ProcessingResult result = controller.process(attributes, Bing3WebDocumentSource.class);
            Persister p = new Persister();
            p.write(result, new File("result.xml"));
        } finally {
            controller.dispose();
        }       
    }
View Full Code Here


public class SavingResultsToJson
{
    public static void main(String [] args) throws Exception
    {
        // Let's fetch some results from MSN first
        final Controller controller = ControllerFactory.createSimple();
        final Map<String, Object> attributes = Maps.newHashMap();
        CommonAttributesDescriptor.attributeBuilder(attributes)
            .documents(new ArrayList<Document>(SampleDocumentData.DOCUMENTS_DATA_MINING))
            .query("data mining");

        final ProcessingResult result = controller.process(attributes,
            LingoClusteringAlgorithm.class);

        // Now, we can serialize the entire result to XML like this
        result.serializeJson(new PrintWriter(System.out));
        System.out.println();
View Full Code Here

            final Map<String, Object> fasterClusteringAttributes = attributeValueSets
                .getAttributeValueSet("faster-clustering").getAttributeValues();

            // Perform processing using the attribute values
            final Controller controller = ControllerFactory.createSimple();

            // Initialize the controller with one attribute set
            controller.init(fasterClusteringAttributes);

            // Perform clustering using the attribute set provided at initialization time
            Map<String, Object> requestAttributes = Maps.newHashMap();
            CommonAttributesDescriptor.attributeBuilder(requestAttributes)
                .documents(Lists.newArrayList(SampleDocumentData.DOCUMENTS_DATA_MINING))
                .query("data mining");
            ProcessingResult results = controller.process(requestAttributes, LingoClusteringAlgorithm.class);
            ConsoleFormatter.displayClusters(results.getClusters());

            // Perform clustering using some other attribute set, in this case the
            // one that is the default in the XML file.
            requestAttributes =
                CommonAttributesDescriptor.attributeBuilder(Maps.newHashMap(defaultAttributes))
                    .documents(Lists.newArrayList(SampleDocumentData.DOCUMENTS_DATA_MINING))
                    .query("data mining").map;

            results = controller.process(requestAttributes, LingoClusteringAlgorithm.class);
            ConsoleFormatter.displayClusters(results.getClusters());
        }
        finally
        {
            CloseableUtils.close(xmlStream);
View Full Code Here

public class SavingResultsToXml
{
    public static void main(String [] args) throws Exception
    {
        // Let's fetch some results from MSN first
        final Controller controller = ControllerFactory.createSimple();
        final Map<String, Object> attributes = Maps.newHashMap();
        CommonAttributesDescriptor.attributeBuilder(attributes)
            .documents(new ArrayList<Document>(SampleDocumentData.DOCUMENTS_DATA_MINING))
            .query("data mining");

        final ProcessingResult result = controller.process(attributes,
            LingoClusteringAlgorithm.class);

        // Now, we can serialize the entire result to XML like this
        result.serialize(System.out);
        System.out.println();
View Full Code Here

         * Create a caching controller that will reuse processing component instances, but
         * will not perform any caching of results produced by components. We will leave
         * caching of documents from Lucene index to Lucene and the operating system
         * caches.
         */
        final Controller controller = ControllerFactory.createPooling();

        /*
         * Prepare a map with component-specific attributes. Here, this map will contain
         * the index location and names of fields to be used to fetch document title and
         * summary.
         */
        final Map<String, Object> luceneGlobalAttributes = new HashMap<String, Object>();

        String indexPath = "put your index path here or pass as the first argument";
        if (args.length == 1)
        {
            indexPath = args[0];
        }

        LuceneDocumentSourceDescriptor
            .attributeBuilder(luceneGlobalAttributes)
            .directory(FSDirectory.open(new File(indexPath)));

        /*
         * Specify fields providing data inside your Lucene index.
         */
        SimpleFieldMapperDescriptor
            .attributeBuilder(luceneGlobalAttributes)
            .titleField("title")
            .contentField("snippet")
            .searchFields(Arrays.asList(new String [] {"titleField", "fullContent"}));

        /*
         * Initialize the controller passing the above attributes as component-specific
         * for Lucene. The global attributes map will be empty. Note that we've provided
         * an identifier for our specially-configured Lucene component, we'll need to use
         * this identifier when performing processing.
         */
        controller.init(new HashMap<String, Object>(),
            new ProcessingComponentConfiguration(LuceneDocumentSource.class, "lucene",
                luceneGlobalAttributes));

        /*
         * Perform processing.
         */
        String query = "mining";
        final Map<String, Object> processingAttributes = Maps.newHashMap();
        CommonAttributesDescriptor.attributeBuilder(processingAttributes)
            .query(query);

        /*
         * We need to refer to the Lucene component by its identifier we set during
         * initialization. As we've not assigned any identifier to the
         * LingoClusteringAlgorithm we want to use, we can its fully qualified class name.
         */
        ProcessingResult process = controller.process(processingAttributes, "lucene",
            LingoClusteringAlgorithm.class.getName());
       
        ConsoleFormatter.displayResults(process);
    }
View Full Code Here

         * Create the caching controller. You need only one caching controller instance
         * per application life cycle. This controller instance will cache the results
         * fetched from any document source and also clusters generated by the Lingo
         * algorithm.
         */
        final Controller controller = ControllerFactory.createCachingPooling(
            IDocumentSource.class, LingoClusteringAlgorithm.class);

        /*
         * Before using the caching controller, you must initialize it. On initialization,
         * you can set default values for some attributes. In this example, we'll set the
         * default results number to 50 and the API key.
         */
        final Map<String, Object> globalAttributes = new HashMap<String, Object>();
        CommonAttributesDescriptor
            .attributeBuilder(globalAttributes)
                .results(50);
        Bing3WebDocumentSourceDescriptor
            .attributeBuilder(globalAttributes)
                .appid(BingKeyAccess.getKey()); // use your own ID here
        controller.init(globalAttributes);

        /*
         * The controller is now ready to perform queries. To show that the documents from
         * the document input are cached, we will perform the same query twice and measure
         * the time for each query.
         */
        ProcessingResult result;
        long start, duration;

        final Map<String, Object> attributes;
        attributes = new HashMap<String, Object>();
        CommonAttributesDescriptor.attributeBuilder(attributes).query("data mining");

        start = System.currentTimeMillis();
        result = controller.process(attributes, Bing3WebDocumentSource.class,
            LingoClusteringAlgorithm.class);
        duration = System.currentTimeMillis() - start;
        System.out.println(duration + " ms (empty cache)");

        start = System.currentTimeMillis();
        result = controller.process(attributes, Bing3WebDocumentSource.class,
            LingoClusteringAlgorithm.class);
        duration = System.currentTimeMillis() - start;
        System.out.println(duration + " ms (documents and clusters from cache)");
        // [[[end:using-caching-controller]]]
    }
View Full Code Here

    public static void main(String [] args)
    {
        /*
         * Create a pooling controller (reuses components).
         */
        final Controller controller = ControllerFactory.createPooling();
        try {
            final Map<String, Object> params = new HashMap<String, Object>();
   
            /*
             * Add attributes relevant to the source and algorithm we will be
             * using. Note the builder classes are generated by annotation
             * processor (which must be in the compiler's classpath!).
             */
            /*
             * An alternative strategy is to put relevant attribute keys in the
             * map directly but it can be tedious.
             */
            ModuloDocumentSourceDescriptor.attributeBuilder(params)
                .query("dummy")
                .results(10)
                .documents(SampleDocumentData.DOCUMENTS_DATA_MINING)
                .modulo(2)
                .analyzer(new WhitespaceAnalyzer(Version.LUCENE_CURRENT));

            ByFirstTitleLetterClusteringAlgorithmDescriptor.attributeBuilder(params)
                .caseSensitive(false);

            /*
             * Invoke processing on the controller and display the result.
             */
            final ProcessingResult result = controller.process(params,
                ModuloDocumentSource.class,
                ByFirstTitleLetterClusteringAlgorithm.class);

            ConsoleFormatter.displayResults(result);
        } finally {
            controller.dispose();
        }
    }
View Full Code Here

*/
public class UsingCustomLanguageModel
{
    public static void main(String [] args)
    {
        @SuppressWarnings("unchecked")
        final Controller controller = ControllerFactory
            .createCachingPooling(IDocumentSource.class);

        // We will pass our custom language model element factories classes as a
        // initialization-time attributes. It is preferred to passing them as
        // processing-time attributes because the instances created at initialization
        // time will be reused for all further requests.
        Map<String, Object> attrs = Maps.newHashMap();
        BasicPreprocessingPipelineDescriptor.attributeBuilder(attrs)
            .stemmerFactory(CustomStemmerFactory.class)
            .tokenizerFactory(CustomTokenizerFactory.class)
            .lexicalDataFactory(CustomLexicalDataFactory.class);
        controller.init(attrs);

        // Cluster some data with Lingo and STC. Notice how the cluster quality degrades
        // when the stop word list is empty (especially for STC).
        clusterAndDisplayClusters(controller, LingoClusteringAlgorithm.class);
        clusterAndDisplayClusters(controller, STCClusteringAlgorithm.class);
View Full Code Here

        final ILexicalData lexicalData1;
        final ILexicalData lexicalData2;

        // Use ctrl1
        {
            final Controller ctrl = ControllerFactory.createPooling();
            final ProcessingResult result = ctrl.process(
                Collections.<String, Object> emptyMap(), TestComponent.class);

            lexicalData1 = result.getAttribute("english");
        }

        // Use ctrl1
        {
            final Controller ctrl = ControllerFactory.createPooling();
            final ProcessingResult result = ctrl.process(
                Collections.<String, Object> emptyMap(), TestComponent.class);

            lexicalData2 = result.getAttribute("english");
        }
View Full Code Here

            DefaultLexicalDataFactory.class, "reloadResources");

        final IResourceLocator classpathLocator = Location.CONTEXT_CLASS_LOADER.locator;

        // Create pooling controller, use tempDir1
        final Controller ctrl1 = ControllerFactory.createPooling();
        final ILexicalData data1;
        {
            ctrl1.init(ImmutableMap.<String, Object> of(
                resourceLookupKey,
                new ResourceLookup(new DirLocator(tempDir1), classpathLocator)));

            final ProcessingResult result = ctrl1.process(
                Collections.<String, Object> emptyMap(), TestComponent.class);

            data1 = result.getAttribute("english");
            assertTrue(data1.isCommonWord(new MutableCharArray("uniquea")));
        }

        // Create another pooling controller, same folder, but different resource lookup.
        final Controller ctrl2 = ControllerFactory.createPooling();
        final ILexicalData data2;
        {
            ctrl2.init(ImmutableMap.<String, Object> of(
                resourceLookupKey,
                new ResourceLookup(new DirLocator(tempDir1), classpathLocator)));

            final ProcessingResult result = ctrl2.process(
                Collections.<String, Object> emptyMap(), TestComponent.class);

            data2 = result.getAttribute("english");
            assertTrue(data2.isCommonWord(new MutableCharArray("uniquea")));

            assertSame(data1, data2);
        }

        /*
         * Now force reloading of resources from that path on ctrl1. The new stop word resource
         * should contain 'uniqueb'.
         */
        FileUtils.writeStringToFile(new File(tempDir1, "stopwords.en"), "uniqueb");

        final ILexicalData data3 = ctrl1.process(
            ImmutableMap.<String, Object> of(reloadResourcesKey, true), TestComponent.class)
                .getAttribute("english");

        assertNotSame(data1, data3);
        assertFalse(data3.isCommonWord(new MutableCharArray("uniquea")));
        assertTrue(data3.isCommonWord(new MutableCharArray("uniqueb")));

        /*
         * But since it's the same location, all other controllers should now see updated resources
         * (and share the same lexical data).
         */
        final ILexicalData data4 = ctrl2.process(
            Collections.<String, Object> emptyMap(), TestComponent.class).getAttribute("english");

        assertSame(data3, data4);
    }
View Full Code Here

TOP

Related Classes of org.carrot2.core.Controller$ProcessingStatistics

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.