Package org.apache.lucene.analysis

Examples of org.apache.lucene.analysis.KeywordTokenizer


    TestAnalyzer(Collator collator) {
      this.collator = collator;
    }

    public TokenStream tokenStream(String fieldName, Reader reader) {
      TokenStream result = new KeywordTokenizer(reader);
      result = new CollationKeyFilter(result, collator);
      return result;
    }
View Full Code Here


  public ICUCollationKeyAnalyzer(Collator collator) {
    this.collator = collator;
  }

  public TokenStream tokenStream(String fieldName, Reader reader) {
    TokenStream result = new KeywordTokenizer(reader);
    result = new ICUCollationKeyFilter(result, collator);
    return result;
  }
View Full Code Here

    throws IOException {
   
    SavedStreams streams = (SavedStreams)getPreviousTokenStream();
    if (streams == null) {
      streams = new SavedStreams();
      streams.source = new KeywordTokenizer(reader);
      streams.result = new ICUCollationKeyFilter(streams.source, collator);
      setPreviousTokenStream(streams);
    } else {
      streams.source.reset(reader);
    }
View Full Code Here

 
  public void testEmptyTerm() throws IOException {
    Analyzer a = new ReusableAnalyzerBase() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new KeywordTokenizer(reader);
        return new TokenStreamComponents(tokenizer, new WordTokenFilter(tokenizer));
      }
    };
    checkAnalysisConsistency(random, a, random.nextBoolean(), "");
  }
View Full Code Here

 
  public void testOptimizer() throws Exception {
    String rules = "a > b; b > c;"; // convert a's to b's and b's to c's
    Transliterator custom = Transliterator.createFromRules("test", rules, Transliterator.FORWARD);
    assertTrue(custom.getFilter() == null);
    new ICUTransformFilter(new KeywordTokenizer(new StringReader("")), custom);
    assertTrue(custom.getFilter().equals(new UnicodeSet("[ab]")));
  }
View Full Code Here

 
  public void testOptimizerSurrogate() throws Exception {
    String rules = "\\U00020087 > x;"; // convert CJK UNIFIED IDEOGRAPH-20087 to an x
    Transliterator custom = Transliterator.createFromRules("test", rules, Transliterator.FORWARD);
    assertTrue(custom.getFilter() == null);
    new ICUTransformFilter(new KeywordTokenizer(new StringReader("")), custom);
    assertTrue(custom.getFilter().equals(new UnicodeSet("[\\U00020087]")));
  }
View Full Code Here

    new ICUTransformFilter(new KeywordTokenizer(new StringReader("")), custom);
    assertTrue(custom.getFilter().equals(new UnicodeSet("[\\U00020087]")));
  }

  private void checkToken(Transliterator transform, String input, String expected) throws IOException {
    TokenStream ts = new ICUTransformFilter(new KeywordTokenizer((new StringReader(input))), transform);
    assertTokenStreamContents(ts, new String[] { expected });
  }
View Full Code Here

 
  public void testEmptyTerm() throws IOException {
    Analyzer a = new ReusableAnalyzerBase() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new KeywordTokenizer(reader);
        return new TokenStreamComponents(tokenizer, new ICUTransformFilter(tokenizer, Transliterator.getInstance("Any-Latin")));
      }
    };
    checkOneTermReuse(a, "", "");
  }
View Full Code Here

 
  public void testEmptyTerm() throws IOException {
    Analyzer a = new ReusableAnalyzerBase() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new KeywordTokenizer(reader);
        return new TokenStreamComponents(tokenizer, new ASCIIFoldingFilter(tokenizer));
      }
    };
    checkOneTermReuse(a, "", "");
  }
View Full Code Here

 
  public void testEmptyTerm() throws IOException {
    Analyzer a = new ReusableAnalyzerBase() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new KeywordTokenizer(reader);
        return new TokenStreamComponents(tokenizer, new JapaneseKatakanaStemFilter(tokenizer));
      }
    };
    checkOneTermReuse(a, "", "");
  }
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.KeywordTokenizer

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.