Package org.carrot2.core

Examples of org.carrot2.core.ProcessingComponentSuite

        // We'll read the component suite definition from an XML stream.
        // IResource is an abstraction layer over resources in Carrot2.
        IResource suiteXml = resourceLookup.getFirst("suite-examples.xml");

        // Deserialize the component suite definition.
        final ProcessingComponentSuite suite =
            ProcessingComponentSuite.deserialize(suiteXml, resourceLookup);

        // Initialize the controller with the suite. All components from the suite
        // will be available for processing within this controller.
        controller.init(initAttributes, suite.getComponentConfigurations());

        // From the suite definition, you can get the document sources and clustering
        // algorithm descriptors.
        final List<DocumentSourceDescriptor> sources = suite.getSources();
        final List<String> sourceIds = Lists.transform(sources,
        System.out.println("Found " + sourceIds.size() + " document sources: "
            + sourceIds);

        final List<ProcessingComponentDescriptor> algorithms = suite.getAlgorithms();
        final List<String> algorithmIds = Lists.transform(algorithms,
        System.out.println("Found " + algorithmIds.size() + " clutering algorithms: "
            + algorithmIds + "\n\n");
View Full Code Here

                 * We use a custom resource locator that searches the contributing
                 * plugin for resources matching the included resource.
                    final ProcessingComponentSuite suite = ProcessingComponentSuite
                        .deserialize(suiteResource, resourceLookup);

                     * Remove invalid descriptors, cache icons.
                    for (ProcessingComponentDescriptor d : suite.getComponents())
                        final String iconPath = d.getIconPath();
                        if (StringUtils.isEmpty(iconPath))

                            imageDescriptorFromPlugin(bundleId, iconPath));

                catch (Exception e)
                    // Skip errors, logging them.
                    Utils.logError("Failed to load suite extension.", e, false);

        // Merge all available suites
        final ArrayList<DocumentSourceDescriptor> sources = Lists.newArrayList();
        final ArrayList<ProcessingComponentDescriptor> algorithms = Lists.newArrayList();

        for (ProcessingComponentSuite s : suites)

        this.componentSuite = new ProcessingComponentSuite(sources, algorithms);

        // Extract and cache bindableDescriptors.
        for (ProcessingComponentDescriptor pcd : componentSuite.getComponents())
View Full Code Here

                throw new ElasticsearchException(
                        "Could not find algorithm suite: " + suiteResourceName);

            final List<String> failed = Lists.newArrayList();
            final ProcessingComponentSuite suite = LoggerUtils.quietCall(new Callable<ProcessingComponentSuite>() {
                public ProcessingComponentSuite call() throws Exception {
                    ProcessingComponentSuite suite = ProcessingComponentSuite.deserialize(
                            suiteResource, resourceLookup);
                    for (ProcessingComponentDescriptor desc : suite.removeUnavailableComponents()) {
                        if (isNoClassDefFound(desc.getInitializationFailure())) {
                            logger.debug("Algorithm not available on classpath: {}", desc.getId());
                        } else {
                            logger.debug("Algorithm initialization failed: {}", desc.getInitializationFailure(), desc.getId());
                    return suite;
            algorithms = Lists.newArrayList();
            for (ProcessingComponentDescriptor descriptor : suite.getAlgorithms()) {
            algorithms = Collections.unmodifiableList(algorithms);

            if (!algorithms.isEmpty()) {
      "Available clustering components: {}", Joiner.on(", ").join(algorithms));
            if (!failed.isEmpty()) {
      "Unavailable clustering components: {}", Joiner.on(", ").join(failed));

            // Change the default resource lookup to include the configured location.
            Map<String, Object> c2SettingsAsMap = Maps.newHashMap();

            // Create component pool.
            Integer poolSize = c2Settings.getAsInt(DEFAULT_COMPONENT_SIZE_PROPERTY_NAME, 0);
            if (poolSize > 0) {
                controller = ControllerFactory.createPooling(poolSize);
            } else {
                controller = ControllerFactory.createPooling();
            controller.init(c2SettingsAsMap, suite.getComponentConfigurations());
        } catch (Exception e) {
            throw new ElasticsearchException(
                    "Could not start Carrot2 controller.", e);
View Full Code Here

    final Map params = context.getRequestParameters();
    final HitDetails [] details = (HitDetails[]) params.get(NUTCH_INPUT_HIT_DETAILS_ARRAY);
    final String [] summaries = (String[]) params.get(NUTCH_INPUT_SUMMARIES_ARRAY);

    if (details == null)
      throw new ProcessingException("Details array must not be null.");

    if (summaries == null)
      throw new ProcessingException("Summaries array must not be null.");

    if (summaries.length != details.length)
      throw new ProcessingException("Summaries and details must be of the same length.");
    // produce 'documents' for successor components.
    final RawDocumentsConsumer consumer = (RawDocumentsConsumer) next;
    for (int i = 0; i < summaries.length; i++) {
      consumer.addDocument(new NutchDocument(i, details[i], summaries[i], defaultLanguage));
View Full Code Here

    for (int i = 0; i < titles.length; i++) {
      Document doc = new Document(titles[i], snippets[i],
              "file://foo_" + i + ".txt");
    final ProcessingResult result = controller.process(documents,
            "red fox",
            LingoClusteringAlgorithm.class);//<co id="crt2.process"/>
    displayResults(result);//<co id="crt2.print"/>

View Full Code Here


    try {
      // The input component takes Nutch's results so we don't need the query argument.
      final ProcessingResult result =
        controller.query(PROCESS_ID, "no-query", requestParams);

      final ArrayOutputComponent.Result output =
        (ArrayOutputComponent.Result) result.getQueryResult();

      final List outputClusters = output.clusters;
      final HitsCluster [] clusters = new HitsCluster[ outputClusters.size() ];

      int j = 0;
View Full Code Here

            logger.debug("Reloading XML rescheduled: browser not ready.");
            new ReloadXMLJob("delaying").reschedule(BROWSER_REFRESH_DELAY);
            return Status.OK_STATUS;

        ProcessingResult pr = getProcessingResult();
        if (pr == lastProcessingResult)
            logger.debug("Reloading XML aborted: identical processing result.");
            return Status.OK_STATUS;

            StringWriter sw = new StringWriter();
            pr.serializeJson(sw, "updateDataJson", true, false, true, false);

            String json = sw.toString();
  "Updating view XML: " +
                StringUtils.abbreviate(json, 180));
View Full Code Here

    private ProcessingResult getProcessingResult()
        assert Display.getCurrent() != null;

        final ProcessingResult pr = editor.getSearchResult().getProcessingResult();
        if (pr == null || pr.getClusters() == null)
            return null;
        return pr;
View Full Code Here

      final List outputClusters = output.clusters;
      final HitsCluster [] clusters = new HitsCluster[ outputClusters.size() ];

      int j = 0;
      for (Iterator i = outputClusters.iterator(); i.hasNext(); j++) {
        RawCluster rcluster = (RawCluster);
        clusters[j] = new HitsClusterAdapter(rcluster, hitDetails);

      // invoke Carrot2 process here.
      return clusters;
View Full Code Here

        subclusters = null;
      } else {
        subclusters = new HitsCluster[rawSubclusters.size()];
        int j = 0;
        for (Iterator i = rawSubclusters.iterator(); i.hasNext(); j++) {
          RawCluster c = (RawCluster);
          subclusters[j] = new HitsClusterAdapter(c, hits);
View Full Code Here


Related Classes of org.carrot2.core.ProcessingComponentSuite

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact