Examples of PorterStemFilter


Examples of org.apache.lucene.analysis.en.PorterStemFilter

    // the override filter will convert "booked" to "books",
    // but also mark it with KeywordAttribute so Porter will not change it.
    CharArrayMap<String> dictionary = new CharArrayMap<String>(TEST_VERSION_CURRENT, 1, false);
    dictionary.put("booked", "books");
    Tokenizer tokenizer = new KeywordTokenizer(new StringReader("booked"));
    TokenStream stream = new PorterStemFilter(
        new StemmerOverrideFilter(tokenizer, dictionary));
    assertTokenStreamContents(stream, new String[] { "books" });
  }
View Full Code Here

Examples of org.apache.lucene.analysis.en.PorterStemFilter

    args.put("protected", "protwords.txt");
    factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
    factory.init(args);
    factory.inform(loader);
   
    TokenStream ts = new PorterStemFilter(factory.create(tokenizer));
    assertTokenStreamContents(ts, new String[] { "dog", "cats" });
  }
View Full Code Here

Examples of org.apache.lucene.analysis.en.PorterStemFilter

    args.put("ignoreCase", "true");
    factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
    factory.init(args);
    factory.inform(loader);
   
    TokenStream ts = new PorterStemFilter(factory.create(tokenizer));
    assertTokenStreamContents(ts, new String[] { "dog", "cats", "Cats" });
  }
View Full Code Here

Examples of org.apache.lucene.analysis.en.PorterStemFilter

      result = new WordTokenFilter(tokenizer);
    }
    // result = new LowerCaseFilter(result);
    // LowerCaseFilter is not needed, as SegTokenFilter lowercases Basic Latin text.
    // The porter stemming is too strict, this is not a bug, this is a feature:)
    result = new PorterStemFilter(result);
    if (!stopWords.isEmpty()) {
      result = new StopFilter(matchVersion, result, stopWords);
    }
    return new TokenStreamComponents(tokenizer, result);
  }
View Full Code Here

Examples of org.apache.lucene.analysis.en.PorterStemFilter

    TokenStream result = new StandardFilter(LUCENE_VERSION, tokenizer);
    result = new LowerCaseFilter(LUCENE_VERSION, result);
    result = new ASCIIFoldingFilter(result);
    result = new AlphaNumericMaxLengthFilter(result);
    result = new StopFilter(LUCENE_VERSION, result, STOP_SET);
    result = new PorterStemFilter(result);
    return new TokenStreamComponents(tokenizer, result);
  }
View Full Code Here

Examples of org.apache.lucene.analysis.en.PorterStemFilter

    // the override filter will convert "booked" to "books",
    // but also mark it with KeywordAttribute so Porter will not change it.
    StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder();
    builder.add("booked", "books");
    Tokenizer tokenizer = new KeywordTokenizer(new StringReader("booked"));
    TokenStream stream = new PorterStemFilter(new StemmerOverrideFilter(
        tokenizer, builder.build()));
    assertTokenStreamContents(stream, new String[] {"books"});
  }
View Full Code Here

Examples of org.apache.lucene.analysis.en.PorterStemFilter

    // the override filter will convert "booked" to "books",
    // but also mark it with KeywordAttribute so Porter will not change it.
    StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(true);
    builder.add("boOkEd", "books");
    Tokenizer tokenizer = new KeywordTokenizer(new StringReader("BooKeD"));
    TokenStream stream = new PorterStemFilter(new StemmerOverrideFilter(
        tokenizer, builder.build()));
    assertTokenStreamContents(stream, new String[] {"books"});
  }
View Full Code Here

Examples of org.apache.lucene.analysis.en.PorterStemFilter

  }

  public void testNoOverrides() throws IOException {
    StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(true);
    Tokenizer tokenizer = new KeywordTokenizer(new StringReader("book"));
    TokenStream stream = new PorterStemFilter(new StemmerOverrideFilter(
        tokenizer, builder.build()));
    assertTokenStreamContents(stream, new String[] {"book"});
  }
View Full Code Here

Examples of org.apache.lucene.analysis.en.PorterStemFilter

        output.add(entry.getValue());
      }
    }
    Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
        new StringReader(input.toString()));
    TokenStream stream = new PorterStemFilter(new StemmerOverrideFilter(
        tokenizer, builder.build()));
    assertTokenStreamContents(stream, output.toArray(new String[0]));
  }
View Full Code Here

Examples of org.apache.lucene.analysis.en.PorterStemFilter

    StemmerOverrideMap build = builder.build();
    for (Entry<String,String> entry : entrySet) {
      if (random().nextBoolean()) {
        Tokenizer tokenizer = new KeywordTokenizer(new StringReader(
            entry.getKey()));
        TokenStream stream = new PorterStemFilter(new StemmerOverrideFilter(
            tokenizer, build));
        assertTokenStreamContents(stream, new String[] {entry.getValue()});
      }
    }
  }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.