Examples of LetterTokenizer


Examples of org.apache.lucene.analysis.LetterTokenizer

* @author yonik
* @version $Id: LetterTokenizerFactory.java 472574 2006-11-08 18:25:52Z yonik $
*/
public class LetterTokenizerFactory extends BaseTokenizerFactory {
  public TokenStream create(Reader input) {
    return new LetterTokenizer(input);
  }
View Full Code Here

Examples of org.apache.lucene.analysis.LetterTokenizer

  }
 
  private TokenStream luceneTokenStream(String text, boolean letters, boolean toLowerCase, Set stopWords) {
    TokenStream stream;
    if (letters)
      stream = new LetterTokenizer(new StringReader(text));
    else
      stream = new WhitespaceTokenizer(new StringReader(text));
    if (toLowerCasestream = new LowerCaseFilter(stream);
    if (stopWords != null) stream = new StopFilter(stream, stopWords);
    return stream;           
View Full Code Here

Examples of org.apache.lucene.analysis.LetterTokenizer

    int invocationCount = 0;
    public TokenStream tokenStream(String fieldName, Reader reader) {
      if (++invocationCount % 2 == 0)
        return new WhitespaceTokenizer(reader);
      else
        return new LetterTokenizer(reader);
    }
View Full Code Here

Examples of org.apache.lucene.analysis.LetterTokenizer

    int invocationCount = 0;
    public TokenStream tokenStream(String fieldName, Reader reader) {
      if (++invocationCount % 2 == 0)
        return new WhitespaceTokenizer(reader);
      else
        return new LetterTokenizer(reader);
    }
View Full Code Here

Examples of org.apache.lucene.analysis.LetterTokenizer

    @Override
    public TokenStream tokenStream(String fieldName, Reader reader) {
      if (++invocationCount % 2 == 0)
        return new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
      else
        return new LetterTokenizer(TEST_VERSION_CURRENT, reader);
    }
View Full Code Here

Examples of org.apache.lucene.analysis.LetterTokenizer

    @Override
    public TokenStream tokenStream(String fieldName, Reader reader) {
      if (++invocationCount % 2 == 0)
        return new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
      else
        return new LetterTokenizer(TEST_VERSION_CURRENT, reader);
    }
View Full Code Here

Examples of org.apache.lucene.analysis.core.LetterTokenizer

  // LUCENE-3642: normalize SMP->BMP and check that offsets are correct
  public void testCrossPlaneNormalization() throws IOException {
    Analyzer analyzer = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT, reader) {
          @Override
          protected int normalize(int c) {
            if (c > 0xffff) {
              return 'δ';
            } else {
View Full Code Here

Examples of org.apache.lucene.analysis.core.LetterTokenizer

  // LUCENE-3642: normalize BMP->SMP and check that offsets are correct
  public void testCrossPlaneNormalization2() throws IOException {
    Analyzer analyzer = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT, reader) {
          @Override
          protected int normalize(int c) {
            if (c <= 0xffff) {
              return 0x1043C;
            } else {
View Full Code Here

Examples of org.apache.lucene.analysis.core.LetterTokenizer

    };
    checkAnalysisConsistency(random, b, random.nextBoolean(), "");
  }

  public void testGraphs() throws IOException {
    TokenStream tk = new LetterTokenizer(TEST_VERSION_CURRENT, new StringReader("abc d efgh ij klmno p q"));
    tk = new ShingleFilter(tk);
    tk = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, tk, 7, 10);
    assertTokenStreamContents(tk,
        new String[] { "efgh ij", "ij klmn", "ij klmno", "klmno p" },
        new int[]    { 6,11,11,14 },
View Full Code Here

Examples of org.apache.lucene.analysis.core.LetterTokenizer

    assureMatchVersion();
  }

  @Override
  public LetterTokenizer create(Reader input) {
    return new LetterTokenizer(luceneMatchVersion, input);
  }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.