Package org.carrot2.text.clustering

Examples of org.carrot2.text.clustering.IMonolingualClusteringAlgorithm


        .getLexicalData(LanguageCode.MALTESE);

    for (String word : wordsToCheck.split(",")) {
      if (!lexicalData.isCommonWord(new MutableCharArray(word))
          && !lexicalData.isStopLabel(word)) {
        clusters.add(new Cluster(word));
      }
    }
  }
View Full Code Here


  @Test
  public void testSimple() throws Exception {
    //<start id="crt2.simple"/>
    //... setup some documents elsewhere
    final Controller controller =
            ControllerFactory.createSimple();//<co id="crt2.controller.creation"/>
    documents = new ArrayList<Document>();
    for (int i = 0; i < titles.length; i++) {
      Document doc = new Document(titles[i], snippets[i],
              "file://foo_" + i + ".txt");
      documents.add(doc);
    }
    final ProcessingResult result = controller.process(documents,
            "red fox",
            LingoClusteringAlgorithm.class);//<co id="crt2.process"/>
    displayResults(result);//<co id="crt2.print"/>

    /*
 
View Full Code Here

    //... setup some documents elsewhere
    final Controller controller =
            ControllerFactory.createSimple();//<co id="crt2.controller.creation"/>
    documents = new ArrayList<Document>();
    for (int i = 0; i < titles.length; i++) {
      Document doc = new Document(titles[i], snippets[i],
              "file://foo_" + i + ".txt");
      documents.add(doc);
    }
    final ProcessingResult result = controller.process(documents,
            "red fox",
View Full Code Here

          if (highlt != null && highlt.length == 1) {
            snippet = highlt[0];
          }
        }
      }
      Document carrotDocument = new Document(getValue(sdoc, titleField),
              snippet, (String)sdoc.getFieldValue(urlField));
      carrotDocument.setField(SOLR_DOCUMENT_ID, sdoc.getFieldValue(idFieldName));
      result.add(carrotDocument);
    }

    return result;
  }
View Full Code Here

      final ControllerHelper helper = new ControllerHelper();
      final InputStream is = Thread.currentThread()
        .getContextClassLoader().getResourceAsStream(processResource);
      if (is != null) {
        try {
          final LocalComponentFactory nutchInputFactory = new LocalComponentFactory() {
            public LocalComponent getInstance() {
              return new NutchInputComponent(defaultLanguage);
            }
          };
          controller.addLocalComponentFactory("input-nutch", nutchInputFactory);
View Full Code Here

  }

  /** Adds the required component factories to a local Carrot2 controller. */
  private void addComponentFactories() throws DuplicatedKeyException {
    //  *   <input  component-key="input-nutch" />
    LocalComponentFactory nutchInputFactory = new LocalComponentFactory() {
      public LocalComponent getInstance() {
        return new NutchInputComponent(defaultLanguage);
      }
    };
    controller.addLocalComponentFactory("input-nutch", nutchInputFactory);

    // *   <filter component-key="filter-lingo" />
    LocalComponentFactory lingoFactory = new LocalComponentFactory() {
      public LocalComponent getInstance() {
        final HashMap defaults = new HashMap();

        // These are adjustments settings for the clustering algorithm.
        // If you try the live WebStart demo of Carrot2 you can see how they affect
        // the final clustering: http://www.carrot2.org
        defaults.put("lsi.threshold.clusterAssignment", "0.150");
        defaults.put("lsi.threshold.candidateCluster""0.775");

        // Initialize a new Lingo clustering component.
        ArrayList languageList = new ArrayList(languages.length);
        for (int i = 0; i < languages.length; i++) {
          final String lcode = languages[i];
          try {
            final Language lang = AllKnownLanguages.getLanguageForIsoCode(lcode);
            if (lang == null) {
              logger.warn("Language not supported in Carrot2: " + lcode);
            } else {
              languageList.add(lang);
              logger.debug("Language loaded: " + lcode);
            }
          } catch (Throwable t) {
              logger.warn("Language could not be loaded: " + lcode, t);
          }
        }
        return new LingoLocalFilterComponent(
          (Language []) languageList.toArray(new Language [languageList.size()]), defaults);
      }
    };
    controller.addLocalComponentFactory("filter-lingo", lingoFactory);

    // *   <output component-key="output-clustersConsumer" />
    LocalComponentFactory clusterConsumerOutputFactory = new LocalComponentFactory() {
      public LocalComponent getInstance() {
        return new ArrayOutputComponent();
      }
    };
    controller.addLocalComponentFactory("output-array",
View Full Code Here

    c2Logger.setLevel(Level.ERROR);
    AllKnownLanguages.getLanguageCodes();
    c2Logger.setLevel(original);

    // Initialize the controller.   
    controller = new LocalControllerBase();

    final Configuration nutchConf = getConf();
    final String processResource = nutchConf.get(
        "extension.clustering.carrot2.process-resource");
View Full Code Here

              return new NutchInputComponent(defaultLanguage);
            }
          };
          controller.addLocalComponentFactory("input-nutch", nutchInputFactory);
         
          final LocalProcess process = helper.loadProcess(
              helper.getExtension(processResource), is).getProcess();
          controller.addProcess(PROCESS_ID, process);
          is.close();
        } catch (IOException e) {
          logger.error("Could not load process resource: " + processResource, e);
View Full Code Here

  /**
   * Adds a hardcoded clustering process to the local controller.
   */ 
  private void addProcesses() {
    final LocalProcessBase process = new LocalProcessBase(
        "input-nutch",
        "output-array",
        new String [] {"filter-lingo"},
        "The Lingo clustering algorithm (www.carrot2.org).",
        "");
View Full Code Here

    final Map params = context.getRequestParameters();
    final HitDetails [] details = (HitDetails[]) params.get(NUTCH_INPUT_HIT_DETAILS_ARRAY);
    final String [] summaries = (String[]) params.get(NUTCH_INPUT_SUMMARIES_ARRAY);

    if (details == null)
      throw new ProcessingException("Details array must not be null.");

    if (summaries == null)
      throw new ProcessingException("Summaries array must not be null.");

    if (summaries.length != details.length)
      throw new ProcessingException("Summaries and details must be of the same length.");
   
    // produce 'documents' for successor components.
    final RawDocumentsConsumer consumer = (RawDocumentsConsumer) next;
    for (int i = 0; i < summaries.length; i++) {
      consumer.addDocument(new NutchDocument(i, details[i], summaries[i], defaultLanguage));
View Full Code Here

TOP

Related Classes of org.carrot2.text.clustering.IMonolingualClusteringAlgorithm

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.