/* default behavior */
factoryDefault.init(args);
factoryDefault.inform(loader);
TokenStream ts = factoryDefault.create(
new WhitespaceTokenizer(BaseTokenTestCase.DEFAULT_VERSION, new StringReader(testText)));
BaseTokenTestCase.assertTokenStreamContents(ts,
new String[] { "I", "borrowed", "5", "400", "00", "540000", "at", "25", "interest", "rate", "interestrate" });
ts = factoryDefault.create(
new WhitespaceTokenizer(BaseTokenTestCase.DEFAULT_VERSION, new StringReader("foo\u200Dbar")));
BaseTokenTestCase.assertTokenStreamContents(ts,
new String[] { "foo", "bar", "foobar" });
/* custom behavior */
WordDelimiterFilterFactory factoryCustom = new WordDelimiterFilterFactory();
// use a custom type mapping
args.put("types", "wdftypes.txt");
factoryCustom.init(args);
factoryCustom.inform(loader);
ts = factoryCustom.create(
new WhitespaceTokenizer(BaseTokenTestCase.DEFAULT_VERSION, new StringReader(testText)));
BaseTokenTestCase.assertTokenStreamContents(ts,
new String[] { "I", "borrowed", "$5,400.00", "at", "25%", "interest", "rate", "interestrate" });
/* test custom behavior with a char > 0x7F, because we had to make a larger byte[] */
ts = factoryCustom.create(
new WhitespaceTokenizer(BaseTokenTestCase.DEFAULT_VERSION, new StringReader("foo\u200Dbar")));
BaseTokenTestCase.assertTokenStreamContents(ts,
new String[] { "foo\u200Dbar" });
}