public SmartChineseAnalyzer(Set stopWords) {
this.stopWords = stopWords;
}
public TokenStream tokenStream(String fieldName, Reader reader) {
TokenStream result = new SentenceTokenizer(reader);
result = new WordTokenFilter(result);
// result = new LowerCaseFilter(result);
// LowerCaseFilter is not needed, as SegTokenFilter lowercases Basic Latin text.
// The porter stemming is too strict, this is not a bug, this is a feature:)
result = new PorterStemFilter(result);