Package org.apache.lucene.analysis

Examples of org.apache.lucene.analysis.StopFilter


  public Set<?> getStopWords() {
    return stopWords;
  }

  public StopFilter create(TokenStream input) {
    StopFilter stopFilter = new StopFilter(luceneMatchVersion,input,stopWords,ignoreCase);
    stopFilter.setEnablePositionIncrements(enablePositionIncrements);
    return stopFilter;
  }
View Full Code Here


    else if (pattern == WHITESPACE_PATTERN) { // fast path
      stream = new FastStringTokenizer(text, false, toLowerCase, stopWords);
    }
    else {
      stream = new PatternTokenizer(text, pattern, toLowerCase);
      if (stopWords != null) stream = new StopFilter(false, stream, stopWords);
    }
   
    return stream;
  }
View Full Code Here

   *   and {@link DutchStemFilter}
   */
  public TokenStream tokenStream(String fieldName, Reader reader) {
    TokenStream result = new StandardTokenizer(reader);
    result = new StandardFilter(result);
    result = new StopFilter(result, stoptable);
    result = new DutchStemFilter(result, excltable, stemdict);
    return result;
  }
View Full Code Here

    SavedStreams streams = (SavedStreams) getPreviousTokenStream();
    if (streams == null) {
      streams = new SavedStreams();
      streams.source = new StandardTokenizer(reader);
      streams.result = new StandardFilter(streams.source);
      streams.result = new StopFilter(streams.result, stoptable);
      streams.result = new DutchStemFilter(streams.result, excltable, stemdict);
      setPreviousTokenStream(streams);
    } else {
      streams.source.reset(reader);
    }
View Full Code Here

    public StopWhitespaceAnalyzer(boolean enablePositionIncrements) {
      this.enablePositionIncrements = enablePositionIncrements;
    }
    public TokenStream tokenStream(String fieldName, Reader reader) {
      TokenStream ts = a.tokenStream(fieldName,reader);
      return new StopFilter(enablePositionIncrements, ts, new String[]{"stop"});
    }
View Full Code Here

    // result = new LowerCaseFilter(result);
    // LowerCaseFilter is not needed, as SegTokenFilter lowercases Basic Latin text.
    // The porter stemming is too strict, this is not a bug, this is a feature:)
    result = new PorterStemFilter(result);
    if (stopWords != null) {
      result = new StopFilter(result, stopWords, false);
    }
    return result;
  }
View Full Code Here

      setPreviousTokenStream(streams);
      streams.tokenStream = new SentenceTokenizer(reader);
      streams.filteredTokenStream = new WordTokenFilter(streams.tokenStream);
      streams.filteredTokenStream = new PorterStemFilter(streams.filteredTokenStream);
      if (stopWords != null) {
        streams.filteredTokenStream = new StopFilter(streams.filteredTokenStream, stopWords, false);
      }
    } else {
      streams.tokenStream.reset(reader);
      streams.filteredTokenStream.reset(); // reset WordTokenFilter's state
    }
View Full Code Here

  @Override
  public TokenStream tokenStream(String fieldName, Reader reader) {
    TokenStream result = new WikipediaTokenizer(reader);
    result = new StandardFilter(result);
    result = new LowerCaseFilter(result);
    result = new StopFilter(true, result, stopSet);
    return result;
  }
View Full Code Here

      expect(plainFileReaderFactory.createPlainFileReader(TEST_FILE_1)).andReturn(plainFileReader);
      expect(plainFileReader.readLines()).andReturn(lines);
        replay(plainFileReaderFactory);
      replay(plainFileReader);
     
      StopFilter stopFilter = (StopFilter) stopwordFilterFactory.createTokenFilter(tokenStream, properties);
      assertNotNull(stopFilter)
      verify(plainFileReaderFactory);
      verify(plainFileReader);
     
      reset(plainFileReaderFactory);
View Full Code Here

    } else if (pattern == WHITESPACE_PATTERN) { // fast path
      stream = new FastStringTokenizer(text, false, toLowerCase, stopWords);
    } else {
      stream = new PatternTokenizer(text, pattern, toLowerCase);
      if (stopWords != null)
        stream = new StopFilter(matchVersion, stream, stopWords);
    }

    return stream;
  }
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.StopFilter

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.