* @param examples
* @return
* @throws Exception
*/
protected Filter getUnigramBigramFilter(ExampleSet examples) throws Exception {
WordFeaturesExtractor filter = new WordFeaturesExtractor();
filter.setSelectedAttributeName(ExampleSet.MESSAGE_ATTR_NAME);
filter.setLowerCaseTokens(true);
//use stemming and remove "nonsense"
filter.setStemmer(new SimpleStringToWordVector.NoNonsenseStemmer(false));
filter.setUseBigrams(true);
filter.setInputFormat(examples.getInstances());
Instances filtered = Filter.useFilter(examples.getInstances(), filter);
examples.setInstances(filtered);
return filter;
}