Package org.apache.lucene.analysis.core

Examples of org.apache.lucene.analysis.core.LowerCaseFilter


  protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) {
    final WhitespaceTokenizer source = new WhitespaceTokenizer(matchVersion, reader);
    TokenStream sink = new URIDecodingFilter(source, "UTF-8");
    sink = this.applyURINormalisation(sink);
    sink = new MailtoFilter(sink);
    sink = new LowerCaseFilter(matchVersion, sink );
    sink = new StopFilter(matchVersion, sink, stopSet);
    sink = new LengthFilter(true, sink, 2, 256);
    return new TokenStreamComponents(source, sink);
  }
View Full Code Here


    final Analyzer dateAnalyser = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(final String fieldName,
                                                       final Reader reader) {
        final WhitespaceTokenizer t = new WhitespaceTokenizer(LuceneTestCase.TEST_VERSION_CURRENT, reader);
        final TokenStream ts = new LowerCaseFilter(LuceneTestCase.TEST_VERSION_CURRENT, t);
        return new TokenStreamComponents(t, ts);
      }
    };
    datatypes.put("xsd:date", dateAnalyser);
View Full Code Here

    final Analyzer dateAnalyser = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(final String fieldName,
                                                       final Reader reader) {
        final WhitespaceTokenizer t = new WhitespaceTokenizer(LuceneTestCase.TEST_VERSION_CURRENT, reader);
        final TokenStream ts = new LowerCaseFilter(LuceneTestCase.TEST_VERSION_CURRENT, t);
        return new TokenStreamComponents(t, ts);
      }
    };
    datatypes.put("xsd:date", dateAnalyser);
View Full Code Here

        final Version matchVersion = Version.LUCENE_40;
        final JsonTokenizer src = new JsonTokenizer(reader);
        TokenStream tok = new DatatypeAnalyzerFilter(matchVersion, src,
          new StandardAnalyzer(matchVersion),
          new StandardAnalyzer(matchVersion));
        tok = new LowerCaseFilter(matchVersion, tok);
        // The PositionAttributeFilter and SirenPayloadFilter are mandatory
        // and must be always the last filters in your token stream
        tok = new PositionAttributeFilter(tok);
        tok = new SirenPayloadFilter(tok);
        return new TokenStreamComponents(src, tok);
View Full Code Here

  @Override
  protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) {
    final StandardTokenizer src = new StandardTokenizer(matchVersion, reader);
    src.setMaxTokenLength(maxTokenLength);
    TokenStream tok = new StandardFilter(matchVersion, src);
    tok = new LowerCaseFilter(matchVersion, tok);
    tok = new StopFilter(matchVersion, tok, stopwords);
    return new TokenStreamComponents(src, tok) {
      @Override
      protected void setReader(final Reader reader) throws IOException {
        src.setMaxTokenLength(NoStopWordStandardAnalyzer.this.maxTokenLength);
View Full Code Here

   
    Analyzer analyzer = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer(Version.LUCENE_43, reader) : factory.create(reader);
        TokenStream stream = ignoreCase ? new LowerCaseFilter(Version.LUCENE_43, tokenizer) : tokenizer;
        return new TokenStreamComponents(tokenizer, stream);
      }
    };

    try {
View Full Code Here

    TokenStream stream = new JapaneseBaseFormFilter(tokenizer);
    stream = new JapanesePartOfSpeechStopFilter(matchVersion, stream, stoptags);
    stream = new CJKWidthFilter(stream);
    stream = new StopFilter(matchVersion, stream, stopwords);
    stream = new JapaneseKatakanaStemFilter(stream);
    stream = new LowerCaseFilter(matchVersion, stream);
    return new TokenStreamComponents(tokenizer, stream);
  }
View Full Code Here

  @Override
  protected TokenStreamComponents createComponents(String fieldName,
      Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
    TokenStream result = new StandardFilter(matchVersion, source);
    result = new LowerCaseFilter(matchVersion, result);
    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
      result = new SetKeywordMarkerFilter(result, stemExclusionSet);
    result = new SnowballFilter(result, new FinnishStemmer());
    return new TokenStreamComponents(source, result);
View Full Code Here

  @Override
  protected TokenStreamComponents createComponents(String fieldName,
      Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
    TokenStream result = new StandardFilter(matchVersion, source);
    result = new LowerCaseFilter(matchVersion, result);
    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
      result = new SetKeywordMarkerFilter(result, stemExclusionSet);
    result = new SnowballFilter(result, new NorwegianStemmer());
    return new TokenStreamComponents(source, result);
View Full Code Here

  protected TokenStreamComponents createComponents(String fieldName,
      Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
    TokenStream result = new StandardFilter(matchVersion, source);
    if (matchVersion.onOrAfter(Version.LUCENE_31))
      result = new LowerCaseFilter(matchVersion, result);
    result = new ThaiWordFilter(matchVersion, result);
    return new TokenStreamComponents(source, new StopFilter(matchVersion,
        result, stopwords));
  }
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.core.LowerCaseFilter

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.