assertEquals( "Big", new String(termBuffer, 0, termBuffer.length));
termBuffer = "BIG".toCharArray();
factory.processWord(termBuffer, 0, termBuffer.length, 0 );
assertEquals( "BIG", new String(termBuffer, 0, termBuffer.length));
Tokenizer tokenizer = new KeywordTokenizer(new StringReader("Hello thEre my Name is Ryan"));
TokenStream stream = factory.create(tokenizer);
assertTokenStreamContents(stream, new String[] { "Hello there my name is ryan" });
// now each token
factory.onlyFirstWord = false;
tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("Hello thEre my Name is Ryan"));
stream = factory.create(tokenizer);
assertTokenStreamContents(stream, new String[] { "Hello", "There", "My", "Name", "Is", "Ryan" });
// now only the long words
factory.minWordLength = 3;
tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("Hello thEre my Name is Ryan" ));
stream = factory.create(tokenizer);
assertTokenStreamContents(stream, new String[] { "Hello", "There", "my", "Name", "is", "Ryan" });
// without prefix
tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("McKinley" ));
stream = factory.create(tokenizer);
assertTokenStreamContents(stream, new String[] { "Mckinley" });
// Now try some prefixes
factory = new CapitalizationFilterFactory();
args.put( "okPrefix", "McK" ); // all words
factory.init( args );
tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("McKinley" ));
stream = factory.create(tokenizer);
assertTokenStreamContents(stream, new String[] { "McKinley" });
// now try some stuff with numbers
factory.forceFirstLetter = false;
factory.onlyFirstWord = false;
tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("1st 2nd third" ));
stream = factory.create(tokenizer);
assertTokenStreamContents(stream, new String[] { "1st", "2nd", "Third" });
factory.forceFirstLetter = true;
tokenizer = new KeywordTokenizer(new StringReader("the The the" ));
stream = factory.create(tokenizer);
assertTokenStreamContents(stream, new String[] { "The The the" });
}