Package org.carrot2.text.clustering

Examples of org.carrot2.text.clustering.IMonolingualClusteringAlgorithm


    /**
     * Creates a {@link SpriteBuilder} with the provided parameters and log.
     */
    public SpriteBuilder(SmartSpritesParameters parameters, MessageLog messageLog)
    {
        this(parameters, messageLog, new FileSystemResourceHandler(
            parameters.getDocumentRootDir(), parameters.getCssFileEncoding(), messageLog));
    }
View Full Code Here


    @Before
    public void prepare()
    {
        spriteDirectiveOccurrenceCollector = new SpriteDirectiveOccurrenceCollector(
            messageLog, new FileSystemResourceHandler(null,
                SmartSpritesParameters.DEFAULT_CSS_FILE_ENCODING, messageLog));
    }
View Full Code Here

       */
    case ARABIC:
      // Intentional fall-through.

    default:
      return new ExtendedWhitespaceTokenizer();
    }
  }
View Full Code Here

    static ITokenizer createTokenizer() {
      try {
        return new ChineseTokenizer();
      } catch (Throwable e) {
        return new ExtendedWhitespaceTokenizer();
      }
    }
View Full Code Here

        // IMonolingualClusteringAlgorithm implementation below. This is safe because
        // processing components are not thread-safe by definition and
        // IMonolingualClusteringAlgorithm forbids concurrent execution by contract.
        final List<Document> originalDocuments = documents;
        clusters = multilingualClustering.process(documents,
            new IMonolingualClusteringAlgorithm()
            {
                public List<Cluster> process(List<Document> documents,
                    LanguageCode language)
                {
                    STCClusteringAlgorithm.this.documents = documents;
View Full Code Here

    @Override
    public void process() throws ProcessingException
    {
        final List<Document> originalDocuments = documents;
        clusters = multilingualClustering.process(documents,
            new IMonolingualClusteringAlgorithm()
            {
                public List<Cluster> process(List<Document> documents,
                    LanguageCode language)
                {
                    BasicPreprocessing.this.documents = documents;
View Full Code Here

    @Override
    public void process() throws ProcessingException
    {
        final List<Document> originalDocuments = documents;
        clusters = multilingualClustering.process(documents,
            new IMonolingualClusteringAlgorithm()
            {
                public List<Cluster> process(List<Document> documents,
                    LanguageCode language)
                {
                    CompletePreprocessing.this.documents = documents;
View Full Code Here

        // IMonolingualClusteringAlgorithm implementation below. This is safe because
        // processing components are not thread-safe by definition and
        // IMonolingualClusteringAlgorithm forbids concurrent execution by contract.
        final List<Document> originalDocuments = documents;
        clusters = multilingualClustering.process(documents,
            new IMonolingualClusteringAlgorithm()
            {
                public List<Cluster> process(List<Document> documents, LanguageCode language)
                {
                    BisectingKMeansClusteringAlgorithm.this.documents = documents;
                    BisectingKMeansClusteringAlgorithm.this.cluster(language);
View Full Code Here

        // IMonolingualClusteringAlgorithm implementation below. This is safe because
        // processing components are not thread-safe by definition and
        // IMonolingualClusteringAlgorithm forbids concurrent execution by contract.
        final List<Document> originalDocuments = documents;
        clusters = multilingualClustering.process(documents,
            new IMonolingualClusteringAlgorithm()
            {
                public List<Cluster> process(List<Document> documents,
                    LanguageCode language)
                {
                    LingoClusteringAlgorithm.this.documents = documents;
View Full Code Here

    }
    return solrStopWords.get(fieldName);
  }

  public ILexicalData getLexicalData(LanguageCode languageCode) {
    final ILexicalData carrot2LexicalData = carrot2LexicalDataFactory
        .getLexicalData(languageCode);

    return new ILexicalData() {
      public boolean isStopLabel(CharSequence word) {
        // Nothing in Solr maps to the concept of a stop label,
        // so return Carrot2's default here.
        return carrot2LexicalData.isStopLabel(word);
      }

      public boolean isCommonWord(MutableCharArray word) {
        // Loop over the fields involved in clustering first
        for (String fieldName : fieldNames) {
          for (CharArraySet stopWords : getSolrStopWordsForField(fieldName)) {
            if (stopWords.contains(word)) {
              return true;
            }
          }
        }
        // Check default Carrot2 stop words too
        return carrot2LexicalData.isCommonWord(word);
      }
    };
  }
View Full Code Here

TOP

Related Classes of org.carrot2.text.clustering.IMonolingualClusteringAlgorithm

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.