Package org.apache.lucene.analysis

Examples of org.apache.lucene.analysis.KeywordTokenizer


    // lets make booked stem to books
    // the override filter will convert "booked" to "books",
    // but also mark it with KeywordAttribute so Porter will not change it.
    CharArrayMap<String> dictionary = new CharArrayMap<String>(TEST_VERSION_CURRENT, 1, false);
    dictionary.put("booked", "books");
    Tokenizer tokenizer = new KeywordTokenizer(new StringReader("booked"));
    TokenStream stream = new PorterStemFilter(
        new StemmerOverrideFilter(TEST_VERSION_CURRENT, tokenizer, dictionary));
    assertTokenStreamContents(stream, new String[] { "books" });
  }
View Full Code Here


 
  public void testEmptyTerm() throws IOException {
    Analyzer a = new ReusableAnalyzerBase() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new KeywordTokenizer(reader);
        return new TokenStreamComponents(tokenizer, new HindiStemFilter(tokenizer));
      }
    };
    checkOneTermReuse(a, "", "");
  }
View Full Code Here

 
  public void testEmptyTerm() throws IOException {
    Analyzer a = new ReusableAnalyzerBase() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new KeywordTokenizer(reader);
        return new TokenStreamComponents(tokenizer, new GermanMinimalStemFilter(tokenizer));
      }
    };
    checkOneTermReuse(a, "", "");
  }
View Full Code Here

 
  public void testEmptyTerm() throws IOException {
    Analyzer a = new ReusableAnalyzerBase() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new KeywordTokenizer(reader);
        return new TokenStreamComponents(tokenizer, new BulgarianStemFilter(tokenizer));
      }
    };
    checkOneTermReuse(a, "", "");
  }
View Full Code Here

 
  public void testEmptyTerm() throws IOException {
    Analyzer a = new ReusableAnalyzerBase() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new KeywordTokenizer(reader);
        return new TokenStreamComponents(tokenizer, new GermanStemFilter(tokenizer));
      }
    };
    checkOneTermReuse(a, "", "");
  }
View Full Code Here

 
  public void testEmptyTerm() throws IOException {
    Analyzer a = new ReusableAnalyzerBase() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new KeywordTokenizer(reader);
        return new TokenStreamComponents(tokenizer, new BrazilianStemFilter(tokenizer));
      }
    };
    checkOneTermReuse(a, "", "");
  }
View Full Code Here

 
  public void testEmptyTerm() throws IOException {
    Analyzer a = new ReusableAnalyzerBase() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new KeywordTokenizer(reader);
        return new TokenStreamComponents(tokenizer, new ShingleFilter(tokenizer));
      }
    };
    checkOneTermReuse(a, "", "");
  }
View Full Code Here

 
  public void testEmptyTerm() throws Exception {
    Analyzer a = new ReusableAnalyzerBase() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new KeywordTokenizer(reader);
        return new TokenStreamComponents(tokenizer,
            new EdgeNGramTokenFilter(tokenizer, EdgeNGramTokenFilter.Side.FRONT, 2, 15));
      }   
    };
    checkAnalysisConsistency(random, a, random.nextBoolean(), "");
   
    Analyzer b = new ReusableAnalyzerBase() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new KeywordTokenizer(reader);
        return new TokenStreamComponents(tokenizer,
            new EdgeNGramTokenFilter(tokenizer, EdgeNGramTokenFilter.Side.BACK, 2, 15));
      }   
    };
    checkAnalysisConsistency(random, b, random.nextBoolean(), "");
View Full Code Here

 
  public void testEmptyTerm() throws IOException {
    Analyzer a = new ReusableAnalyzerBase() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new KeywordTokenizer(reader);
        return new TokenStreamComponents(tokenizer, new SpanishLightStemFilter(tokenizer));
      }
    };
    checkOneTermReuse(a, "", "");
  }
View Full Code Here

 
  public void testEmptyTerm() throws IOException {
    Analyzer a = new ReusableAnalyzerBase() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new KeywordTokenizer(reader);
        return new TokenStreamComponents(tokenizer, new FinnishLightStemFilter(tokenizer));
      }
    };
    checkOneTermReuse(a, "", "");
  }
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.KeywordTokenizer

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.