Package org.carrot2.core

Examples of org.carrot2.core.ProcessingResult


    for (int i = 0; i < titles.length; i++) {
      Document doc = new Document(titles[i], snippets[i],
              "file://foo_" + i + ".txt");
      documents.add(doc);
    }
    final ProcessingResult result = controller.process(documents,
            "red fox",
            LingoClusteringAlgorithm.class);//<co id="crt2.process"/>
    displayResults(result);//<co id="crt2.print"/>

    /*
 
View Full Code Here


    requestParams.put(NutchInputComponent.NUTCH_INPUT_SUMMARIES_ARRAY,
      descriptions);

    try {
      // The input component takes Nutch's results so we don't need the query argument.
      final ProcessingResult result =
        controller.query(PROCESS_ID, "no-query", requestParams);

      final ArrayOutputComponent.Result output =
        (ArrayOutputComponent.Result) result.getQueryResult();

      final List outputClusters = output.clusters;
      final HitsCluster [] clusters = new HitsCluster[ outputClusters.size() ];

      int j = 0;
View Full Code Here

            logger.debug("Reloading XML rescheduled: browser not ready.");
            new ReloadXMLJob("delaying").reschedule(BROWSER_REFRESH_DELAY);
            return Status.OK_STATUS;
        }

        ProcessingResult pr = getProcessingResult();
        if (pr == lastProcessingResult)
        {
            logger.debug("Reloading XML aborted: identical processing result.");
            return Status.OK_STATUS;
        }

        try
        {
            StringWriter sw = new StringWriter();
            pr.serializeJson(sw, "updateDataJson", true, false, true, false);

            String json = sw.toString();
            logger.info("Updating view XML: " +
                StringUtils.abbreviate(json, 180));
View Full Code Here

     */
    private ProcessingResult getProcessingResult()
    {
        assert Display.getCurrent() != null;

        final ProcessingResult pr = editor.getSearchResult().getProcessingResult();
        if (pr == null || pr.getClusters() == null)
            return null;
        return pr;
    }
View Full Code Here

        final int results = 2;
        CommonAttributesDescriptor.attributeBuilder(processingAttributes)
            .query("\"data mining\"").results(results);
        LuceneDocumentSourceDescriptor.attributeBuilder(processingAttributes)
            .keepLuceneDocuments(true);
        final ProcessingResult result = getSimpleController(initAttributes).process(
            processingAttributes, LuceneDocumentSource.class);
        assertThat(result.getDocuments().size()).as("Number of results").isEqualTo(
            results);

        final StringWriter json = new StringWriter();
        result.serializeJson(json);
        assertThat(json.toString()).doesNotContain("\"luceneDocument\"");

        final ByteArrayOutputStream xml = new ByteArrayOutputStream();
        result.serialize(xml);
        assertThat(xml.toString("UTF-8")).doesNotContain(
            "org.apache.lucene.document.Document");
    }
View Full Code Here

        CommonAttributesDescriptor.attributeBuilder(processingAttributes)
            .documents(Lists.newArrayList(SampleDocumentData.DOCUMENTS_DATA_MINING))
            .query("data mining");

        final ProcessingResult result = controller.process(processingAttributes,
            clusteringAlgorithm);
        ConsoleFormatter.displayClusters(result.getClusters(), 0);
    }
View Full Code Here

            /* Put your own API key here! */
            attributes.put(Bing3WebDocumentSourceDescriptor.Keys.APPID, BingKeyAccess.getKey());
           
            /* Perform processing */
            final ProcessingResult result = controller.process(attributes,
                Bing3WebDocumentSource.class, LingoClusteringAlgorithm.class);
   
            /* Documents fetched from the document source, clusters created by Carrot2. */
            final List<Document> documents = result.getDocuments();
            final List<Cluster> clusters = result.getClusters();
            // [[[end:using-attributes-raw-map]]]
           
            ConsoleFormatter.displayResults(result);
        }
       
        /* [[[start:using-attributes-builders-intro]]]
         *
         * <div>
         * <p>
         * As an alternative to the raw attribute map used in the previous example, you
         * can use attribute map builders. Attribute map builders have a number of advantages:
         * </p>
         *
         * <ul>
         * <li>Type-safety: the correct type of the value will be enforced at compile time</li>
         * <li>Error prevention: unexpected results caused by typos in attribute name strings are avoided</li>
         * <li>Early error detection: in case an attribute's key changes, your compiler will detect that</li>
         * <li>IDE support: your IDE will suggest the right method names and parameters</li>
         * </ul>
         *
         * <p>
         * A possible disadvantage of attribute builders is that one algorithm's attributes can
         * be divided into a number of builders and hence not readily available in your IDE's auto
         * complete window. Please consult attribute documentation in Carrot2 manual for pointers to
         * the appropriate builder classes and methods.
         * </p>
         *
         * <p>
         * The code shown below fetches 100 results for query <em>data mining</em> from
         * {@link org.carrot2.source.microsoft.Bing3WebDocumentSource} and clusters them using
         * the {@link org.carrot2.clustering.lingo.LingoClusteringAlgorithm} tuned to create slightly
         * fewer clusters than by default. Please note how the API key is passed and use your own
         * key in production deployments.
         * </p>
         * </div>
         *
         * [[[end:using-attributes-builders-intro]]]
         */
        {
            /// [[[start:using-attributes-builders]]]
            /* A controller to manage the processing pipeline. */
            final Controller controller = ControllerFactory.createSimple();
           
            /* Prepare attribute map */
            final Map<String, Object> attributes = new HashMap<String, Object>();

            /* Put values using attribute builders */
            CommonAttributesDescriptor
                .attributeBuilder(attributes)
                    .query("data mining")
                    .results(100);
            LingoClusteringAlgorithmDescriptor
                .attributeBuilder(attributes)
                    .desiredClusterCountBase(15)
                    .matrixReducer()
                        .factorizationQuality(FactorizationQuality.HIGH);
                       
            Bing3WebDocumentSourceDescriptor
                .attributeBuilder(attributes)
                    .appid(BingKeyAccess.getKey()); // use your own key here
           
            /* Perform processing */
            final ProcessingResult result = controller.process(attributes,
                Bing3WebDocumentSource.class, LingoClusteringAlgorithm.class);
   
            /* Documents fetched from the document source, clusters created by Carrot2. */
            final List<Document> documents = result.getDocuments();
            final List<Cluster> clusters = result.getClusters();
            /// [[[end:using-attributes-builders]]]
           
            ConsoleFormatter.displayResults(result);
        }
       
        /* [[[start:using-attributes-output-intro]]]
         * <div>
         * <p>
         * Some algorithms apart from clusters can produce additional, usually
         * diagnostic, output. The output is present in the attributes map contained
         * in the {@link org.carrot2.core.ProcessingResult}. You can read the contents
         * of that map directly or through the attribute map builders. Carrot2 manual
         * lists and describes in detail the output attributes of each component.
         * </p>
         * <p>
         * The code shown below clusters clusters an example collection of
         * {@link org.carrot2.core.Document}s using the Lingo algorithm. Lingo can
         * optionally use native platform-specific matrix computation libraries. The
         * example code reads an attribute to find out whether such libraries were
         * successfully loaded and used.
         * </p>
         * </div>
         * [[[end:using-attributes-output-intro]]]
         */
        {
            /// [[[start:using-attributes-output]]]
            /* A controller to manage the processing pipeline. */
            final Controller controller = ControllerFactory.createSimple();
           
            /* Prepare attribute map */
            final Map<String, Object> attributes = new HashMap<String, Object>();
            CommonAttributesDescriptor
                .attributeBuilder(attributes)
                    .documents(SampleDocumentData.DOCUMENTS_DATA_MINING);
            LingoClusteringAlgorithmDescriptor
                .attributeBuilder(attributes)
                    .desiredClusterCountBase(15)
                    .matrixReducer()
                        .factorizationQuality(FactorizationQuality.HIGH);

            /* Perform processing */
            final ProcessingResult result = controller.process(attributes,
                LingoClusteringAlgorithm.class);
           
            /* Clusters created by Carrot2, read processing time */
            final List<Cluster> clusters = result.getClusters();
            final Long clusteringTime = CommonAttributesDescriptor.attributeBuilder(
                result.getAttributes()).processingTimeAlgorithm();
            /// [[[end:using-attributes-output]]]
           
            ConsoleFormatter.displayResults(result);
        }
    }
View Full Code Here

            /// [[[start:clustering-data-from-document-sources-simple]]]
            /* A controller to manage the processing pipeline. */
            final Controller controller = ControllerFactory.createSimple();

            /* Perform processing */
            final ProcessingResult result = controller.process("data mining", 100,
                EToolsDocumentSource.class, LingoClusteringAlgorithm.class);
   
            /* Documents fetched from the document source, clusters created by Carrot2. */
            final List<Document> documents = result.getDocuments();
            final List<Cluster> clusters = result.getClusters();
            /// [[[end:clustering-data-from-document-sources-simple]]]
           
            ConsoleFormatter.displayResults(result);
        }
       
        /* [[[start:clustering-data-from-document-sources-advanced-intro]]]
         *
         * If your production code needs to fetch documents from popular search engines,
         * it is very important that you generate and use your own API key rather than Carrot2's
         * default one. You can pass the API key along with the query and the requested
         * number of results in an attribute map. Carrot2 manual lists all supported attributes
         * along with their keys, types and allowed values. The code shown below, fetches and clusters
         * 50 results from {@link org.carrot2.source.microsoft.Bing3WebDocumentSource}.
         *
         * [[[end:clustering-data-from-document-sources-advanced-intro]]]
         */
        {
            /// [[[start:clustering-data-from-document-sources-advanced]]]
            /* A controller to manage the processing pipeline. */
            final Controller controller = ControllerFactory.createSimple();
   
            /* Prepare attributes */
            final Map<String, Object> attributes = new HashMap<String, Object>();
           
            /* Put your own API key here! */
            Bing3WebDocumentSourceDescriptor.attributeBuilder(attributes)
                .appid(BingKeyAccess.getKey());

            /* Query an the required number of results */
            attributes.put(CommonAttributesDescriptor.Keys.QUERY, "clustering");
            attributes.put(CommonAttributesDescriptor.Keys.RESULTS, 50);
   
            /* Perform processing */
            final ProcessingResult result = controller.process(attributes,
                Bing3WebDocumentSource.class, STCClusteringAlgorithm.class);

            /* Documents fetched from the document source, clusters created by Carrot2. */
            final List<Document> documents = result.getDocuments();
            final List<Cluster> clusters = result.getClusters();
            /// [[[end:clustering-data-from-document-sources-advanced]]]
   
            ConsoleFormatter.displayResults(result);
        }
    }
View Full Code Here

        }

        final Map<String, Object> attributes = Maps.newHashMap();
        CommonAttributesDescriptor.attributeBuilder(attributes)
            .documents(documents);
        final ProcessingResult englishResult = controller.process(
            attributes, LingoClusteringAlgorithm.class);
        ConsoleFormatter.displayResults(englishResult);

        /*
         * In the second call, we will fetch results for a Chinese query from Bing,
         * setting explicitly the Bing's specific language attribute. Based on that
         * attribute, the document source will set the appropriate language for each
         * document.
         */
        attributes.clear();
       
        CommonAttributesDescriptor.attributeBuilder(attributes)
            .query("聚类" /* clustering? */)
            .results(100);

        Bing3WebDocumentSourceDescriptor.attributeBuilder(attributes)
            .market(MarketOption.CHINESE_CHINA);
        Bing3WebDocumentSourceDescriptor
            .attributeBuilder(attributes)
                .appid(BingKeyAccess.getKey()); // use your own ID here!

        final ProcessingResult chineseResult = controller.process(attributes,
            Bing3WebDocumentSource.class, LingoClusteringAlgorithm.class);
        ConsoleFormatter.displayResults(chineseResult);

        /*
         * In the third call, we will fetch results for the same Chinese query from
         * Google. As Google document source does not have its specific attribute for
         * setting the language, it will not set the documents' language for us. To make
         * sure the right lexical resources are used, we will need to set the
         * MultilingualClustering.defaultLanguage attribute to Chinese on our own.
         */
        attributes.clear();
       
        CommonAttributesDescriptor.attributeBuilder(attributes)
            .query("聚类" /* clustering? */)
            .results(100);

        MultilingualClusteringDescriptor.attributeBuilder(attributes)
            .defaultLanguage(LanguageCode.CHINESE_SIMPLIFIED);

        final ProcessingResult chineseResult2 = controller.process(attributes,
            GoogleDocumentSource.class, LingoClusteringAlgorithm.class);
        ConsoleFormatter.displayResults(chineseResult2);
        // [[[end:clustering-non-english-content]]]
    }
View Full Code Here

            sendBadRequest("dcs.c2stream only supported in POST requests.", response, null);
            return;
        }

        // Check for c2stream in a POST/www-url-encoded and decode it... or try to.
        ProcessingResult input = null;
        if (request.getMethod().equalsIgnoreCase("POST") &&
            request.getParameter(DCS_C2STREAM) != null)
        {
            // Deserialize documents from the stream
            try
View Full Code Here

TOP

Related Classes of org.carrot2.core.ProcessingResult

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.