* @return
* @throws Exception
*/
@Override
protected Filter getBagOfWordsFilter(ExampleSet examples) throws Exception {
SimpleStringToWordVector filter = new SimpleStringToWordVector();
filter.setAttributeNamePrefix(BAG_OF_WORDS_FEATURE_PREFIX);
filter.setStringAttributeName(ExampleSet.MESSAGE_ATTR_NAME);
//This is stupid because it depends on how much data you use
//bagger.setMinTermFreq(20);
filter.setDoNotOperateOnPerClassBasis(true);
filter.setWordsToKeep(3000);
filter.setLowerCaseTokens(true);
//use stemming and remove "nonsense"
filter.setStemmer(new NoNonsenseStemmer(true));
filter.setTFTransform(true);
filter.setIDFTransform(true);
filter.setNormalizeDocLength(new SelectedTag(StringToWordVector.FILTER_NORMALIZE_ALL, StringToWordVector.TAGS_FILTER));
filter.setOutputWordCounts(true);
filter.setInputFormat(examples.getInstances());
Instances filtered = Filter.useFilter(examples.getInstances(), filter);
examples.setInstances(filtered);
return filter;
}