Package org.apache.lucene.analysis.Analyzer

Examples of org.apache.lucene.analysis.Analyzer.TokenStreamComponents


   
      Analyzer a = new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
          Tokenizer tokenizer = new KeywordTokenizer(reader);
          return new TokenStreamComponents(tokenizer, new WordDelimiterFilter(TEST_VERSION_CURRENT, tokenizer, flags, protectedWords));
        }
      };
      // depending upon options, this thing may or may not preserve the empty term
      checkAnalysisConsistency(random, a, random.nextBoolean(), "");
    }
View Full Code Here


    for (final String lang : SNOWBALL_LANGS) {
      Analyzer a = new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
          Tokenizer tokenizer = new KeywordTokenizer(reader);
          return new TokenStreamComponents(tokenizer, new SnowballFilter(tokenizer, lang));
        }
      };
      checkOneTerm(a, "", "");
    }
  }
View Full Code Here

  public void checkRandomStrings(final String snowballLanguage) throws IOException {
    Analyzer a = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer t = new MockTokenizer(reader);
        return new TokenStreamComponents(t, new SnowballFilter(t, snowballLanguage));
     
    };
    checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
  }
View Full Code Here

        @Override
        public TokenStreamComponents createComponents(String fieldName, Reader reader) {
          MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true, 100);
          tokenizer.setEnableChecks(true);
          MockTokenFilter filt = new MockTokenFilter(tokenizer, MockTokenFilter.EMPTY_STOPSET);
          return new TokenStreamComponents(tokenizer, filt);
        }
      };
    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, a);
    iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat()));
    RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
View Full Code Here

    Analyzer a = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
        TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet);
        return new TokenStreamComponents(source, new PortugueseLightStemFilter(sink));
      }
    };
    checkOneTerm(a, "quilométricas", "quilométricas");
  }
View Full Code Here

  public void testEmptyTerm() throws IOException {
    Analyzer a = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new KeywordTokenizer(reader);
        return new TokenStreamComponents(tokenizer, new PortugueseLightStemFilter(tokenizer));
      }
    };
    checkOneTermReuse(a, "", "");
  }
View Full Code Here

    Analyzer a = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
        TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet);
        return new TokenStreamComponents(source, new PortugueseMinimalStemFilter(sink));
      }
    };
    checkOneTerm(a, "quilométricas", "quilométricas");
  }
View Full Code Here

  public void testEmptyTerm() throws IOException {
    Analyzer a = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new KeywordTokenizer(reader);
        return new TokenStreamComponents(tokenizer, new PortugueseMinimalStemFilter(tokenizer));
      }
    };
    checkOneTermReuse(a, "", "");
  }
View Full Code Here

    Analyzer analyzer = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer(Version.LUCENE_43, reader) : factory.create(reader);
        TokenStream stream = ignoreCase ? new LowerCaseFilter(Version.LUCENE_43, tokenizer) : tokenizer;
        return new TokenStreamComponents(tokenizer, stream);
      }
    };

    try {
      if (format == null || format.equals("solr")) {
View Full Code Here

    for (final String lang : SNOWBALL_LANGS) {
      Analyzer a = new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
          Tokenizer tokenizer = new KeywordTokenizer(reader);
          return new TokenStreamComponents(tokenizer, new SnowballFilter(tokenizer, lang));
        }
      };
      checkOneTermReuse(a, "", "");
    }
  }
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.Analyzer.TokenStreamComponents

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.