Package org.apache.lucene.analysis.core

Examples of org.apache.lucene.analysis.core.StopFilter


    Analyzer a = new Analyzer() {
        @Override
        public TokenStreamComponents createComponents(String field, Reader reader) {
          Tokenizer tokenizer = new MockTokenizer(reader);
          CharArraySet stopSet = StopFilter.makeStopSet(TEST_VERSION_CURRENT, "of");
          return new TokenStreamComponents(tokenizer, new StopFilter(TEST_VERSION_CURRENT, tokenizer, stopSet));
        }
      };

    Iterable<Input> keys = shuffle(
        new Input("wizard of oz", 50)
View Full Code Here


    Analyzer a = new Analyzer() {
        @Override
        public TokenStreamComponents createComponents(String field, Reader reader) {
          Tokenizer tokenizer = new MockTokenizer(reader);
          CharArraySet stopSet = StopFilter.makeStopSet(TEST_VERSION_CURRENT, "of");
          return new TokenStreamComponents(tokenizer, new StopFilter(TEST_VERSION_CURRENT, tokenizer, stopSet));
        }
      };

    Iterable<Input> keys = shuffle(
        new Input("wizard of of oz", 50)
View Full Code Here

    final WhitespaceTokenizer source = new WhitespaceTokenizer(matchVersion, reader);
    TokenStream sink = new URIDecodingFilter(source, "UTF-8");
    sink = this.applyURINormalisation(sink);
    sink = new MailtoFilter(sink);
    sink = new LowerCaseFilter(matchVersion, sink );
    sink = new StopFilter(matchVersion, sink, stopSet);
    sink = new LengthFilter(true, sink, 2, 256);
    return new TokenStreamComponents(source, sink);
  }
View Full Code Here

  protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) {
    final StandardTokenizer src = new StandardTokenizer(matchVersion, reader);
    src.setMaxTokenLength(maxTokenLength);
    TokenStream tok = new StandardFilter(matchVersion, src);
    tok = new LowerCaseFilter(matchVersion, tok);
    tok = new StopFilter(matchVersion, tok, stopwords);
    return new TokenStreamComponents(src, tok) {
      @Override
      protected void setReader(final Reader reader) throws IOException {
        src.setMaxTokenLength(NoStopWordStandardAnalyzer.this.maxTokenLength);
        super.setReader(reader);
View Full Code Here

  protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
    Tokenizer tokenizer = new JapaneseTokenizer(reader, userDict, true, mode);
    TokenStream stream = new JapaneseBaseFormFilter(tokenizer);
    stream = new JapanesePartOfSpeechStopFilter(matchVersion, stream, stoptags);
    stream = new CJKWidthFilter(stream);
    stream = new StopFilter(matchVersion, stream, stopwords);
    stream = new JapaneseKatakanaStemFilter(stream);
    stream = new LowerCaseFilter(matchVersion, stream);
    return new TokenStreamComponents(tokenizer, stream);
  }
View Full Code Here

  protected TokenStreamComponents createComponents(String fieldName,
      Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
    TokenStream result = new StandardFilter(matchVersion, source);
    result = new LowerCaseFilter(matchVersion, result);
    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
      result = new SetKeywordMarkerFilter(result, stemExclusionSet);
    result = new SnowballFilter(result, new FinnishStemmer());
    return new TokenStreamComponents(source, result);
  }
View Full Code Here

  protected TokenStreamComponents createComponents(String fieldName,
      Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
    TokenStream result = new StandardFilter(matchVersion, source);
    result = new LowerCaseFilter(matchVersion, result);
    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
      result = new SetKeywordMarkerFilter(result, stemExclusionSet);
    result = new SnowballFilter(result, new NorwegianStemmer());
    return new TokenStreamComponents(source, result);
  }
View Full Code Here

    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
    TokenStream result = new StandardFilter(matchVersion, source);
    if (matchVersion.onOrAfter(Version.LUCENE_31))
      result = new LowerCaseFilter(matchVersion, result);
    result = new ThaiWordFilter(matchVersion, result);
    return new TokenStreamComponents(source, new StopFilter(matchVersion,
        result, stopwords));
  }
View Full Code Here

  protected TokenStreamComponents createComponents(String fieldName,
      Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
    TokenStream result = new StandardFilter(matchVersion, source);
    result = new LowerCaseFilter(matchVersion, result);
    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
      result = new SetKeywordMarkerFilter(result, stemExclusionSet);
    if (matchVersion.onOrAfter(Version.LUCENE_36)) {
      result = new SpanishLightStemFilter(result);
    } else {
View Full Code Here

      Reader aReader) {
    if (matchVersion.onOrAfter(Version.LUCENE_31)) {
      final Tokenizer source = new StandardTokenizer(matchVersion, aReader);
      TokenStream result = new StandardFilter(matchVersion, source);
      result = new LowerCaseFilter(matchVersion, result);
      result = new StopFilter(matchVersion, result, stoptable);
      if (!excltable.isEmpty())
        result = new SetKeywordMarkerFilter(result, excltable);
      if (stemdict != null)
        result = new StemmerOverrideFilter(result, stemdict);
      result = new SnowballFilter(result, new org.tartarus.snowball.ext.DutchStemmer());
      return new TokenStreamComponents(source, result);
    } else {
      final Tokenizer source = new StandardTokenizer(matchVersion, aReader);
      TokenStream result = new StandardFilter(matchVersion, source);
      result = new StopFilter(matchVersion, result, stoptable);
      if (!excltable.isEmpty())
        result = new SetKeywordMarkerFilter(result, excltable);
      result = new DutchStemFilter(result, origStemdict);
      return new TokenStreamComponents(source, result);
    }
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.core.StopFilter

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.