Package org.apache.lucene.analysis.util

Examples of org.apache.lucene.analysis.util.ClasspathResourceLoader


    TokenStream stream = tokenizerFactory("UAX29URLEmail").create(reader);
    assertTokenStreamContents(stream,
        new String[] {"ざ"});
   
    reader = new StringReader("ざ");
    stream = tokenizerFactory("UAX29URLEmail", Version.LUCENE_31, new ClasspathResourceLoader(getClass())).create(reader);
    assertTokenStreamContents(stream,
        new String[] {"さ"}); // old broken behavior
  }
View Full Code Here


/** basic tests for {@link ICUTokenizerFactory} **/
public class TestICUTokenizerFactory extends BaseTokenStreamTestCase {
  public void testMixedText() throws Exception {
    Reader reader = new StringReader("การที่ได้ต้องแสดงว่างานดี  This is a test ກວ່າດອກ");
    ICUTokenizerFactory factory = new ICUTokenizerFactory(new HashMap<String,String>());
    factory.inform(new ClasspathResourceLoader(getClass()));
    TokenStream stream = factory.create(reader);
    assertTokenStreamContents(stream,
        new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี",
        "This", "is", "a", "test", "ກວ່າ", "ດອກ"});
  }
View Full Code Here

    Reader reader = new StringReader
        ("  Don't,break.at?/(punct)!  \u201Cnice\u201D\r\n\r\n85_At:all; `really\" +2=3$5,&813 !@#%$^)(*@#$   ");
    final Map<String,String> args = new HashMap<String,String>();
    args.put(ICUTokenizerFactory.RULEFILES, "Latn:Latin-break-only-on-whitespace.rbbi");
    ICUTokenizerFactory factory = new ICUTokenizerFactory(args);
    factory.inform(new ClasspathResourceLoader(this.getClass()));
    TokenStream stream = factory.create(reader);
    assertTokenStreamContents(stream,
        new String[] { "Don't,break.at?/(punct)!", "\u201Cnice\u201D", "85_At:all;", "`really\"""+2=3$5,&813", "!@#%$^)(*@#$" },
        new String[] { "<ALPHANUM>",               "<ALPHANUM>",       "<ALPHANUM>", "<ALPHANUM>", "<NUM>",       "<OTHER>" });
  }
View Full Code Here

    Reader reader = new StringReader
        ("One-two punch.  Brang-, not brung-it.  This one--not that one--is the right one, -ish.");
    final Map<String,String> args = new HashMap<String,String>();
    args.put(ICUTokenizerFactory.RULEFILES, "Latn:Latin-dont-break-on-hyphens.rbbi");
    ICUTokenizerFactory factory = new ICUTokenizerFactory(args);
    factory.inform(new ClasspathResourceLoader(getClass()));
    TokenStream stream = factory.create(reader);
    assertTokenStreamContents(stream,
        new String[] { "One-two", "punch",
            "Brang", "not", "brung-it",
            "This", "one", "not", "that", "one", "is", "the", "right", "one", "ish" });
View Full Code Here

    Reader reader = new StringReader
        ("Some English.  Немного русский.  ข้อความภาษาไทยเล็ก ๆ น้อย ๆ  More English.");
    final Map<String,String> args = new HashMap<String,String>();
    args.put(ICUTokenizerFactory.RULEFILES, "Cyrl:KeywordTokenizer.rbbi,Thai:KeywordTokenizer.rbbi");
    ICUTokenizerFactory factory = new ICUTokenizerFactory(args);
    factory.inform(new ClasspathResourceLoader(getClass()));
    TokenStream stream = factory.create(reader);
    assertTokenStreamContents(stream, new String[] { "Some", "English",
        "Немного русский.  ",
        "ข้อความภาษาไทยเล็ก ๆ น้อย ๆ  ",
        "More", "English" });
View Full Code Here

  public void test() throws Exception {
    Reader reader = new StringReader("foo foobar super-duper-trooper");
    TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
    stream = tokenFilterFactory("Length",
        Version.LUCENE_43, new ClasspathResourceLoader(getClass()),
        "min", "4",
        "max", "10",
        "enablePositionIncrements", "false").create(stream);
    assertTokenStreamContents(stream, new String[] { "foobar" }, new int[] { 1 });
  }
View Full Code Here

    TokenStream stream = tokenizerFactory("UAX29URLEmail").create(reader);
    assertTokenStreamContents(stream,
        new String[] {"ざ"});
   
    reader = new StringReader("ざ");
    stream = tokenizerFactory("UAX29URLEmail", Version.LUCENE_31, new ClasspathResourceLoader(getClass())).create(reader);
    assertTokenStreamContents(stream,
        new String[] {"さ"}); // old broken behavior
  }
View Full Code Here

   
    PhoneticFilterFactory ff = new PhoneticFilterFactory();
   
    args.put( PhoneticFilterFactory.ENCODER, "Metaphone" );
    ff.init( args );
    ff.inform(new ClasspathResourceLoader(ff.getClass()));
    assertTrue( ff.getEncoder() instanceof Metaphone );
    assertTrue( ff.inject ); // default

    args.put( PhoneticFilterFactory.INJECT, "false" );
    ff.init( args );
    ff.inform(new ClasspathResourceLoader(ff.getClass()));
    assertFalse( ff.inject );

    args.put( PhoneticFilterFactory.MAX_CODE_LENGTH, "2");
    ff.init(args);
    ff.inform(new ClasspathResourceLoader(ff.getClass()));
    assertEquals(2, ((Metaphone) ff.getEncoder()).getMaxCodeLen());
  }
View Full Code Here

   */
  public void testFactoryCaseFailure() throws IOException {
    Map<String,String> args = new HashMap<String, String>();
   
    PhoneticFilterFactory ff = new PhoneticFilterFactory();
    ClasspathResourceLoader loader = new ClasspathResourceLoader(ff.getClass());

    try {
      ff.init( args );
      ff.inform( loader );
      fail( "missing encoder parameter" );
View Full Code Here

   */
  public void testFactoryCaseReflection() throws IOException {
    Map<String,String> args = new HashMap<String, String>();
   
    PhoneticFilterFactory ff = new PhoneticFilterFactory();
    ClasspathResourceLoader loader = new ClasspathResourceLoader(ff.getClass());

    args.put( PhoneticFilterFactory.ENCODER, "org.apache.commons.codec.language.Metaphone" );
    ff.init( args );
    ff.inform( loader );
    assertTrue( ff.getEncoder() instanceof Metaphone );
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.util.ClasspathResourceLoader

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.