Package org.carrot2.core.clustering

Examples of org.carrot2.core.clustering.RawDocument


    final Map params = context.getRequestParameters();
    final HitDetails [] details = (HitDetails[]) params.get(NUTCH_INPUT_HIT_DETAILS_ARRAY);
    final String [] summaries = (String[]) params.get(NUTCH_INPUT_SUMMARIES_ARRAY);

    if (details == null)
      throw new ProcessingException("Details array must not be null.");

    if (summaries == null)
      throw new ProcessingException("Summaries array must not be null.");

    if (summaries.length != details.length)
      throw new ProcessingException("Summaries and details must be of the same length.");
   
    // produce 'documents' for successor components.
    final RawDocumentsConsumer consumer = (RawDocumentsConsumer) next;
    for (int i = 0; i < summaries.length; i++) {
      consumer.addDocument(new NutchDocument(i, details[i], summaries[i], defaultLanguage));
View Full Code Here


        final boolean hasStartIndex = URLResourceWithParams.containsAttributePlaceholder(
            feedUrlTemplate, START_INDEX_VARIABLE_NAME);

        if (!(hasStartPage ^ hasStartIndex))
        {
            throw new ProcessingException(
                "The feedUrlTemplate must contain either "
                    + URLResourceWithParams
                        .formatAttributePlaceholder(START_INDEX_VARIABLE_NAME)
                    + " or "
                    + URLResourceWithParams
                        .formatAttributePlaceholder(START_PAGE_VARIABLE_NAME)
                    + " variable");
        }

        if (!URLResourceWithParams.containsAttributePlaceholder(feedUrlTemplate,
            SEARCH_TERMS_VARIABLE_NAME))
        {
            throw new ProcessingException(
                "The feedUrlTemplate must contain "
                    + URLResourceWithParams
                        .formatAttributePlaceholder(SEARCH_TERMS_VARIABLE_NAME)
                    + " variable");
        }

        if (resultsPerPage == 0)
        {
            throw new ProcessingException("resultsPerPage must be set");
        }

        this.metadata = new MultipageSearchEngineMetadata(resultsPerPage, maximumResults,
            hasStartPage);
        this.feedFetcher = new HttpURLFeedFetcher();
View Full Code Here

    for (int i = 0; i < titles.length; i++) {
      Document doc = new Document(titles[i], snippets[i],
              "file://foo_" + i + ".txt");
      documents.add(doc);
    }
    final ProcessingResult result = controller.process(documents,
            "red fox",
            LingoClusteringAlgorithm.class);//<co id="crt2.process"/>
    displayResults(result);//<co id="crt2.print"/>

    /*
 
View Full Code Here

    requestParams.put(NutchInputComponent.NUTCH_INPUT_SUMMARIES_ARRAY,
      descriptions);

    try {
      // The input component takes Nutch's results so we don't need the query argument.
      final ProcessingResult result =
        controller.query(PROCESS_ID, "no-query", requestParams);

      final ArrayOutputComponent.Result output =
        (ArrayOutputComponent.Result) result.getQueryResult();

      final List outputClusters = output.clusters;
      final HitsCluster [] clusters = new HitsCluster[ outputClusters.size() ];

      int j = 0;
View Full Code Here

            logger.debug("Reloading XML rescheduled: browser not ready.");
            new ReloadXMLJob("delaying").reschedule(BROWSER_REFRESH_DELAY);
            return Status.OK_STATUS;
        }

        ProcessingResult pr = getProcessingResult();
        if (pr == lastProcessingResult)
        {
            logger.debug("Reloading XML aborted: identical processing result.");
            return Status.OK_STATUS;
        }

        try
        {
            StringWriter sw = new StringWriter();
            pr.serializeJson(sw, "updateDataJson", true, false, true, false);

            String json = sw.toString();
            logger.info("Updating view XML: " +
                StringUtils.abbreviate(json, 180));
View Full Code Here

     */
    private ProcessingResult getProcessingResult()
    {
        assert Display.getCurrent() != null;

        final ProcessingResult pr = editor.getSearchResult().getProcessingResult();
        if (pr == null || pr.getClusters() == null)
            return null;
        return pr;
    }
View Full Code Here

      final List outputClusters = output.clusters;
      final HitsCluster [] clusters = new HitsCluster[ outputClusters.size() ];

      int j = 0;
      for (Iterator i = outputClusters.iterator(); i.hasNext(); j++) {
        RawCluster rcluster = (RawCluster) i.next();
        clusters[j] = new HitsClusterAdapter(rcluster, hitDetails);
      }

      // invoke Carrot2 process here.
      return clusters;
View Full Code Here

        subclusters = null;
      } else {
        subclusters = new HitsCluster[rawSubclusters.size()];
        int j = 0;
        for (Iterator i = rawSubclusters.iterator(); i.hasNext(); j++) {
          RawCluster c = (RawCluster) i.next();
          subclusters[j] = new HitsClusterAdapter(c, hits);
        }
      }
    }
View Full Code Here

      List rawDocuments = this.rawCluster.getDocuments();
      documents = new HitDetails[ rawDocuments.size() ];
     
      int j = 0;
      for (Iterator i = rawDocuments.iterator(); i.hasNext(); j++) {
        RawDocument doc = (RawDocument) i.next();
        Integer offset = (Integer) doc.getId();
        documents[j] = this.hits[offset.intValue()];
      }
    }

    return documents;
View Full Code Here

    if (summaries.length != details.length)
      throw new ProcessingException("Summaries and details must be of the same length.");
   
    // produce 'documents' for successor components.
    final RawDocumentsConsumer consumer = (RawDocumentsConsumer) next;
    for (int i = 0; i < summaries.length; i++) {
      consumer.addDocument(new NutchDocument(i, details[i], summaries[i], defaultLanguage));
    }
  }
View Full Code Here

TOP

Related Classes of org.carrot2.core.clustering.RawDocument

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.