Package org.apache.lucene.analysis

Examples of org.apache.lucene.analysis.WhitespaceTokenizer


  /**
   * Ensure the filter actually stems text.
   */
  public void testStemming() throws Exception {
    Reader reader = new StringReader("angličtí");
    Tokenizer tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, reader);
    CzechStemFilterFactory factory = new CzechStemFilterFactory();
    TokenStream stream = factory.create(tokenizer);
    assertTokenStreamContents(stream, new String[] { "anglick" });
  }
View Full Code Here


  private static final String[] commonWords = { "s", "a", "b", "c", "d", "the",
      "of" };
 
  public void testReset() throws Exception {
    final String input = "How the s a brown s cow d like A B thing?";
    WhitespaceTokenizer wt = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input));
    CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords);
   
    CharTermAttribute term = cgf.addAttribute(CharTermAttribute.class);
    assertTrue(cgf.incrementToken());
    assertEquals("How", term.toString());
    assertTrue(cgf.incrementToken());
    assertEquals("How_the", term.toString());
    assertTrue(cgf.incrementToken());
    assertEquals("the", term.toString());
    assertTrue(cgf.incrementToken());
    assertEquals("the_s", term.toString());
   
    wt.reset(new StringReader(input));
    cgf.reset();
    assertTrue(cgf.incrementToken());
    assertEquals("How", term.toString());
  }
View Full Code Here

    assertEquals("How", term.toString());
  }
 
  public void testQueryReset() throws Exception {
    final String input = "How the s a brown s cow d like A B thing?";
    WhitespaceTokenizer wt = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input));
    CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords);
    CommonGramsQueryFilter nsf = new CommonGramsQueryFilter(cgf);
   
    CharTermAttribute term = wt.addAttribute(CharTermAttribute.class);
    assertTrue(nsf.incrementToken());
    assertEquals("How_the", term.toString());
    assertTrue(nsf.incrementToken());
    assertEquals("the_s", term.toString());
   
    wt.reset(new StringReader(input));
    nsf.reset();
    assertTrue(nsf.incrementToken());
    assertEquals("How_the", term.toString());
  }
View Full Code Here

*/
public class TestPortugueseMinimalStemFilterFactory extends BaseTokenTestCase {
  public void testStemming() throws Exception {
    Reader reader = new StringReader("questões");
    PortugueseMinimalStemFilterFactory factory = new PortugueseMinimalStemFilterFactory();
    TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader));
    assertTokenStreamContents(stream, new String[] { "questão" });
  }
View Full Code Here

    factory.init(args);
    factory.inform(loader);
    Set<?> words = factory.getCommonWords();
    assertTrue("words is null and it shouldn't be", words != null);
    assertTrue(words.contains("the"));
    Tokenizer tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("testing the factory"));
    TokenStream stream = factory.create(tokenizer);
    assertTokenStreamContents(stream,
        new String[] { "testing", "testing_the", "the", "the_factory", "factory" });
  }
View Full Code Here

    DelimitedPayloadTokenFilterFactory factory = new DelimitedPayloadTokenFilterFactory();
    factory.init(args);
    ResourceLoader loader = new SolrResourceLoader(null, null);
    factory.inform(loader);

    TokenStream input = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("the|0.1 quick|0.1 red|0.1"));
    DelimitedPayloadTokenFilter tf = factory.create(input);
    while (tf.incrementToken()){
      PayloadAttribute payAttr = tf.getAttribute(PayloadAttribute.class);
      assertTrue("payAttr is null and it shouldn't be", payAttr != null);
      byte[] payData = payAttr.getPayload().getData();
View Full Code Here

    DelimitedPayloadTokenFilterFactory factory = new DelimitedPayloadTokenFilterFactory();
    factory.init(args);
    ResourceLoader loader = new SolrResourceLoader(null, null);
    factory.inform(loader);

    TokenStream input = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("the*0.1 quick*0.1 red*0.1"));
    DelimitedPayloadTokenFilter tf = factory.create(input);
    while (tf.incrementToken()){
      PayloadAttribute payAttr = tf.getAttribute(PayloadAttribute.class);
      assertTrue("payAttr is null and it shouldn't be", payAttr != null);
      byte[] payData = payAttr.getPayload().getData();
View Full Code Here

*/
public class TestKStemFilterFactory extends BaseTokenTestCase {
  public void testStemming() throws Exception {
    Reader reader = new StringReader("bricks");
    KStemFilterFactory factory = new KStemFilterFactory();
    TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader));
    assertTokenStreamContents(stream, new String[] { "brick" });
  }
View Full Code Here

  /**
   * Ensure the filter actually stems text.
   */
  public void testStemming() throws Exception {
    Reader reader = new StringReader("lichamelijkheden");
    Tokenizer tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, reader);
    DutchStemFilterFactory factory = new DutchStemFilterFactory();
    TokenStream stream = factory.create(tokenizer);
    assertTokenStreamContents(stream, new String[] { "licham" });
  }
View Full Code Here

*/
public class TestGermanMinimalStemFilterFactory extends BaseTokenTestCase {
  public void testStemming() throws Exception {
    Reader reader = new StringReader("bilder");
    GermanMinimalStemFilterFactory factory = new GermanMinimalStemFilterFactory();
    TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader));
    assertTokenStreamContents(stream, new String[] { "bild" });
  }
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.WhitespaceTokenizer

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.