Package org.apache.lucene.analysis

Examples of org.apache.lucene.analysis.WhitespaceTokenizer


public class EdgeNGramTokenFilterTest extends BaseTokenStreamTestCase {
  private TokenStream input;

  public void setUp() throws Exception {
    super.setUp();
    input = new WhitespaceTokenizer(new StringReader("abcde"));
  }
View Full Code Here


    EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.Side.BACK, 1, 3);
    assertTokenStreamContents(tokenizer, new String[]{"e","de","cde"}, new int[]{4,3,2}, new int[]{5,5,5});
  }
 
  public void testSmallTokenInStream() throws Exception {
    input = new WhitespaceTokenizer(new StringReader("abc de fgh"));
    EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.Side.FRONT, 3, 3);
    assertTokenStreamContents(tokenizer, new String[]{"abc","fgh"}, new int[]{0,0}, new int[]{3,3});
  }
View Full Code Here

    EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.Side.FRONT, 3, 3);
    assertTokenStreamContents(tokenizer, new String[]{"abc","fgh"}, new int[]{0,0}, new int[]{3,3});
  }
 
  public void testReset() throws Exception {
    WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(new StringReader("abcde"));
    EdgeNGramTokenFilter filter = new EdgeNGramTokenFilter(tokenizer, EdgeNGramTokenFilter.Side.FRONT, 1, 3);
    assertTokenStreamContents(filter, new String[]{"a","ab","abc"}, new int[]{0,0,0}, new int[]{1,2,3});
    tokenizer.reset(new StringReader("abcde"));
    assertTokenStreamContents(filter, new String[]{"a","ab","abc"}, new int[]{0,0,0}, new int[]{1,2,3});
  }
View Full Code Here

  public void test() throws IOException {

    PrefixAndSuffixAwareTokenFilter ts = new PrefixAndSuffixAwareTokenFilter(
        new SingleTokenTokenStream(createToken("^", 0, 0)),
        new WhitespaceTokenizer(new StringReader("hello world")),
        new SingleTokenTokenStream(createToken("$", 0, 0)));

    assertNext(ts, "^", 0, 0);
    assertNext(ts, "hello", 0, 5);
    assertNext(ts, "world", 6, 11);
View Full Code Here

    assertNext(ts, "b", 1, 2);
    assertFalse(ts.incrementToken());

    // prefix and suffix using 2x prefix

    ts = new PrefixAwareTokenFilter(new SingleTokenTokenStream(createToken("^", 0, 0)), new WhitespaceTokenizer(new StringReader("hello world")));
    ts = new PrefixAwareTokenFilter(ts, new SingleTokenTokenStream(createToken("$", 0, 0)));

    assertNext(ts, "^", 0, 0);
    assertNext(ts, "hello", 0, 5);
    assertNext(ts, "world", 6, 11);
View Full Code Here

      this.maxSynonyms = maxSynonyms;
    }
   
    @Override
    public TokenStream tokenStream(String fieldName, Reader reader) {
      TokenStream ts = new WhitespaceTokenizer(reader);
      ts = new LowerCaseFilter(ts);
      ts = new SynonymTokenFilter(ts, synonyms, maxSynonyms);
      return ts;
    }
View Full Code Here

    public TokenStream reusableTokenStream(String fieldName, Reader reader)
        throws IOException {
      SavedStreams streams = (SavedStreams) getPreviousTokenStream();
      if (streams == null) {
        streams = new SavedStreams();
        streams.source = new WhitespaceTokenizer(reader);
        streams.result = new LowerCaseFilter(streams.source);
        streams.result = new SynonymTokenFilter(streams.result, synonyms, maxSynonyms);
        setPreviousTokenStream(streams);
      } else {
        streams.source.reset(reader);
View Full Code Here

      super(Version.LUCENE_CURRENT, name);
    }
   
    @Override
    public TokenStream tokenStream(String fieldName, Reader reader) {
      return new WhitespaceTokenizer(reader);
    }
View Full Code Here

    private TokenStream input;
   
    @Override
    public void setUp() throws Exception {
        super.setUp();
        input = new WhitespaceTokenizer(new StringReader("abcde"));
    }
View Full Code Here

      NGramTokenFilter filter = new NGramTokenFilter(input, 6, 7);
      assertTokenStreamContents(filter, new String[0], new int[0], new int[0]);
    }
   
    public void testSmallTokenInStream() throws Exception {
      input = new WhitespaceTokenizer(new StringReader("abc de fgh"));
      NGramTokenFilter filter = new NGramTokenFilter(input, 3, 3);
      assertTokenStreamContents(filter, new String[]{"abc","fgh"}, new int[]{0,7}, new int[]{3,10});
    }
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.WhitespaceTokenizer

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.