Package org.carrot2.text.preprocessing

Examples of org.carrot2.text.preprocessing.PreprocessingContext$AllTokens


    for (int i = 0; i < titles.length; i++) {
      Document doc = new Document(titles[i], snippets[i],
              "file://foo_" + i + ".txt");
      documents.add(doc);
    }
    final ProcessingResult result = controller.process(documents,
            "red fox",
            LingoClusteringAlgorithm.class);//<co id="crt2.process"/>
    displayResults(result);//<co id="crt2.print"/>

    /*
 
View Full Code Here


    requestParams.put(NutchInputComponent.NUTCH_INPUT_SUMMARIES_ARRAY,
      descriptions);

    try {
      // The input component takes Nutch's results so we don't need the query argument.
      final ProcessingResult result =
        controller.query(PROCESS_ID, "no-query", requestParams);

      final ArrayOutputComponent.Result output =
        (ArrayOutputComponent.Result) result.getQueryResult();

      final List outputClusters = output.clusters;
      final HitsCluster [] clusters = new HitsCluster[ outputClusters.size() ];

      int j = 0;
View Full Code Here

      final List outputClusters = output.clusters;
      final HitsCluster [] clusters = new HitsCluster[ outputClusters.size() ];

      int j = 0;
      for (Iterator i = outputClusters.iterator(); i.hasNext(); j++) {
        RawCluster rcluster = (RawCluster) i.next();
        clusters[j] = new HitsClusterAdapter(rcluster, hitDetails);
      }

      // invoke Carrot2 process here.
      return clusters;
View Full Code Here

        subclusters = null;
      } else {
        subclusters = new HitsCluster[rawSubclusters.size()];
        int j = 0;
        for (Iterator i = rawSubclusters.iterator(); i.hasNext(); j++) {
          RawCluster c = (RawCluster) i.next();
          subclusters[j] = new HitsClusterAdapter(c, hits);
        }
      }
    }
View Full Code Here

      List rawDocuments = this.rawCluster.getDocuments();
      documents = new HitDetails[ rawDocuments.size() ];
     
      int j = 0;
      for (Iterator i = rawDocuments.iterator(); i.hasNext(); j++) {
        RawDocument doc = (RawDocument) i.next();
        Integer offset = (Integer) doc.getId();
        documents[j] = this.hits[offset.intValue()];
      }
    }

    return documents;
View Full Code Here

    if (summaries.length != details.length)
      throw new ProcessingException("Summaries and details must be of the same length.");
   
    // produce 'documents' for successor components.
    final RawDocumentsConsumer consumer = (RawDocumentsConsumer) next;
    for (int i = 0; i < summaries.length; i++) {
      consumer.addDocument(new NutchDocument(i, details[i], summaries[i], defaultLanguage));
    }
  }
View Full Code Here

      addDefaultProcess();
    } else {
      logger.info("Using custom clustering process: " + processResource);
      controller.setComponentAutoload(true);
     
      final ControllerHelper helper = new ControllerHelper();
      final InputStream is = Thread.currentThread()
        .getContextClassLoader().getResourceAsStream(processResource);
      if (is != null) {
        try {
          final LocalComponentFactory nutchInputFactory = new LocalComponentFactory() {
            public LocalComponent getInstance() {
              return new NutchInputComponent(defaultLanguage);
            }
          };
          controller.addLocalComponentFactory("input-nutch", nutchInputFactory);
         
          final LocalProcess process = helper.loadProcess(
              helper.getExtension(processResource), is).getProcess();
          controller.addProcess(PROCESS_ID, process);
          is.close();
        } catch (IOException e) {
          logger.error("Could not load process resource: " + processResource, e);
        } catch (LoaderExtensionUnknownException e) {
View Full Code Here

    controller.addLocalComponentFactory("filter-lingo", lingoFactory);

    // *   <output component-key="output-clustersConsumer" />
    LocalComponentFactory clusterConsumerOutputFactory = new LocalComponentFactory() {
      public LocalComponent getInstance() {
        return new ArrayOutputComponent();
      }
    };
    controller.addLocalComponentFactory("output-array",
      clusterConsumerOutputFactory);
  }
View Full Code Here

        // Initialize a new Lingo clustering component.
        ArrayList languageList = new ArrayList(languages.length);
        for (int i = 0; i < languages.length; i++) {
          final String lcode = languages[i];
          try {
            final Language lang = AllKnownLanguages.getLanguageForIsoCode(lcode);
            if (lang == null) {
              logger.warn("Language not supported in Carrot2: " + lcode);
            } else {
              languageList.add(lang);
              logger.debug("Language loaded: " + lcode);
View Full Code Here

            }
          } catch (Throwable t) {
              logger.warn("Language could not be loaded: " + lcode, t);
          }
        }
        return new LingoLocalFilterComponent(
          (Language []) languageList.toArray(new Language [languageList.size()]), defaults);
      }
    };
    controller.addLocalComponentFactory("filter-lingo", lingoFactory);
View Full Code Here

TOP

Related Classes of org.carrot2.text.preprocessing.PreprocessingContext$AllTokens

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.