Examples of org.apache.lucene.analysis.util.ClasspathResourceLoader

org.apache.lucene.analysis.util.ClasspathResourceLoader
Simple {@link ResourceLoader} that uses {@link ClassLoader#getResourceAsStream(String)}and {@link Class#forName(String,boolean,ClassLoader)} to open resources andclasses, respectively.

        ("  Don't,break.at?/(punct)!  \u201Cnice\u201D\r\n\r\n85_At:all; `really\" +2=3$5,&813 !@#%$^)(*@#$   ");
    ICUTokenizerFactory factory = new ICUTokenizerFactory();
    final Map<String,String> args = new HashMap<String,String>();
    args.put(ICUTokenizerFactory.RULEFILES, "Latn:Latin-break-only-on-whitespace.rbbi");
    factory.init(args);
    factory.inform(new ClasspathResourceLoader(this.getClass()));
    TokenStream stream = factory.create(reader);
    assertTokenStreamContents(stream,
        new String[] { "Don't,break.at?/(punct)!", "\u201Cnice\u201D", "85_At:all;", "`really\"",  "+2=3$5,&813", "!@#%$^)(*@#$" },
        new String[] { "<ALPHANUM>",               "<ALPHANUM>",       "<ALPHANUM>", "<ALPHANUM>", "<NUM>",       "<OTHER>" });
  }

View Full Code Here

        ("One-two punch.  Brang-, not brung-it.  This one--not that one--is the right one, -ish.");
    ICUTokenizerFactory factory = new ICUTokenizerFactory();
    final Map<String,String> args = new HashMap<String,String>();
    args.put(ICUTokenizerFactory.RULEFILES, "Latn:Latin-dont-break-on-hyphens.rbbi");
    factory.init(args);
    factory.inform(new ClasspathResourceLoader(getClass()));
    TokenStream stream = factory.create(reader);
    assertTokenStreamContents(stream,
        new String[] { "One-two", "punch",
            "Brang", "not", "brung-it",
            "This", "one", "not", "that", "one", "is", "the", "right", "one", "ish" });

View Full Code Here

        ("Some English.  Немного русский.  ข้อความภาษาไทยเล็ก ๆ น้อย ๆ  More English.");
    ICUTokenizerFactory factory = new ICUTokenizerFactory();
    final Map<String,String> args = new HashMap<String,String>();
    args.put(ICUTokenizerFactory.RULEFILES, "Cyrl:KeywordTokenizer.rbbi,Thai:KeywordTokenizer.rbbi");
    factory.init(args);
    factory.inform(new ClasspathResourceLoader(getClass()));
    TokenStream stream = factory.create(reader);
    assertTokenStreamContents(stream, new String[] { "Some", "English",
        "Немного русский.  ",
        "ข้อความภาษาไทยเล็ก ๆ น้อย ๆ  ",
        "More", "English" });

View Full Code Here

/** basic tests for {@link ICUTokenizerFactory} **/
public class TestICUTokenizerFactory extends BaseTokenStreamTestCase {
  public void testMixedText() throws Exception {
    Reader reader = new StringReader("การที่ได้ต้องแสดงว่างานดี  This is a test ກວ່າດອກ");
    ICUTokenizerFactory factory = new ICUTokenizerFactory(new HashMap<String,String>());
    factory.inform(new ClasspathResourceLoader(getClass()));
    TokenStream stream = factory.create(newAttributeFactory(), reader);
    assertTokenStreamContents(stream,
        new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี",
        "This", "is", "a", "test", "ກວ່າ", "ດອກ"});
  }

View Full Code Here

    Reader reader = new StringReader
        ("  Don't,break.at?/(punct)!  \u201Cnice\u201D\r\n\r\n85_At:all; `really\" +2=3$5,&813 !@#%$^)(*@#$   ");
    final Map<String,String> args = new HashMap<>();
    args.put(ICUTokenizerFactory.RULEFILES, "Latn:Latin-break-only-on-whitespace.rbbi");
    ICUTokenizerFactory factory = new ICUTokenizerFactory(args);
    factory.inform(new ClasspathResourceLoader(this.getClass()));
    TokenStream stream = factory.create(newAttributeFactory(), reader);
    assertTokenStreamContents(stream,
        new String[] { "Don't,break.at?/(punct)!", "\u201Cnice\u201D", "85_At:all;", "`really\"",  "+2=3$5,&813", "!@#%$^)(*@#$" },
        new String[] { "<ALPHANUM>",               "<ALPHANUM>",       "<ALPHANUM>", "<ALPHANUM>", "<NUM>",       "<OTHER>" });
  }

View Full Code Here

    Reader reader = new StringReader
        ("One-two punch.  Brang-, not brung-it.  This one--not that one--is the right one, -ish.");
    final Map<String,String> args = new HashMap<>();
    args.put(ICUTokenizerFactory.RULEFILES, "Latn:Latin-dont-break-on-hyphens.rbbi");
    ICUTokenizerFactory factory = new ICUTokenizerFactory(args);
    factory.inform(new ClasspathResourceLoader(getClass()));
    TokenStream stream = factory.create(newAttributeFactory(), reader);
    assertTokenStreamContents(stream,
        new String[] { "One-two", "punch",
            "Brang", "not", "brung-it",
            "This", "one", "not", "that", "one", "is", "the", "right", "one", "ish" });

View Full Code Here

    Reader reader = new StringReader
        ("Some English.  Немного русский.  ข้อความภาษาไทยเล็ก ๆ น้อย ๆ  More English.");
    final Map<String,String> args = new HashMap<>();
    args.put(ICUTokenizerFactory.RULEFILES, "Cyrl:KeywordTokenizer.rbbi,Thai:KeywordTokenizer.rbbi");
    ICUTokenizerFactory factory = new ICUTokenizerFactory(args);
    factory.inform(new ClasspathResourceLoader(getClass()));
    TokenStream stream = factory.create(newAttributeFactory(), reader);
    assertTokenStreamContents(stream, new String[] { "Some", "English",
        "Немного русский.  ",
        "ข้อความภาษาไทยเล็ก ๆ น้อย ๆ  ",
        "More", "English" });

View Full Code Here


  public void test() throws Exception {
    Reader reader = new StringReader("foo foobar super-duper-trooper");
    TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
    stream = tokenFilterFactory("Length",
        Version.LUCENE_4_3, new ClasspathResourceLoader(getClass()),
        "min", "4",
        "max", "10",
        "enablePositionIncrements", "false").create(stream);
    assertTokenStreamContents(stream, new String[] { "foobar" }, new int[] { 1 });
  }

View Full Code Here

   * @deprecated Remove this test in Lucene 5.0 */
  @Deprecated
  public void testSynonymsOld() throws Exception {
    Reader reader = new StringReader("GB");
    TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
    stream = tokenFilterFactory("Synonym", Version.LUCENE_3_3, new ClasspathResourceLoader(getClass()),
        "synonyms", "synonyms.txt").create(stream);
    assertTrue(stream instanceof SlowSynonymFilter);
    assertTokenStreamContents(stream, 
        new String[] { "GB", "gib", "gigabyte", "gigabytes" },
        new int[] { 1, 0, 0, 0 });

View Full Code Here

    TokenStream stream = tokenizerFactory("UAX29URLEmail").create(reader);
    assertTokenStreamContents(stream, 
        new String[] {"ざ"});
    
    reader = new StringReader("ざ");
    stream = tokenizerFactory("UAX29URLEmail", Version.LUCENE_3_1, new ClasspathResourceLoader(getClass())).create(reader);
    assertTokenStreamContents(stream, 
        new String[] {"さ"}); // old broken behavior
  }

View Full Code Here

0 1 2 3 4 5 6 7

TOP

Related Classes of org.apache.lucene.analysis.util.ClasspathResourceLoader

org.apache.lucene.analysis.commongrams.TestCommonGramsFilterFactory

org.apache.lucene.analysis.commongrams.TestCommonGramsQueryFilterFactory

org.apache.lucene.analysis.compound.TestDictionaryCompoundWordTokenFilterFactory

org.apache.lucene.analysis.compound.TestHyphenationCompoundWordTokenFilterFactory

org.apache.lucene.analysis.core.TestFactories

org.apache.lucene.analysis.core.TestStopFilterFactory

org.apache.lucene.analysis.core.TestTypeTokenFilterFactory

org.apache.lucene.analysis.hunspell.TestHunspellStemFilterFactory

org.apache.lucene.analysis.icu.segmentation.TestICUTokenizerFactory

org.apache.lucene.analysis.miscellaneous.TestKeepFilterFactory

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.