TokenizerModel tokenizerModel = new TokenizerModelLoader().load(params.getTokenizerModel());
Tokenizer tokenizer = new TokenizerME(tokenizerModel);
ObjectStream<String> mucDocStream = new FileToStringSampleStream(
new DirectorySampleStream(params.getData(), new FileFilter() {
public boolean accept(File file) {
return StringUtil.toLowerCase(file.getName()).endsWith(".sgm");
}
}, false), Charset.forName("UTF-8"));