public void testWithReuters() {
List<TextInstance<Integer>> training = DatasetUtils.getTrainingFolds(0, 10, Datasets.getReutersSamples());
List<TextInstance<Integer>> eval = DatasetUtils.getEvalFold(0, 10, Datasets.getReutersSamples());
// Use 500 max words per class to speed up test
KLDClassifier kldClassifier = new KLDClassifier(9, 500);
// Train
for (TextInstance<Integer> instance : training) {
kldClassifier.update(instance.label, instance.tokens);
}
// Eval
double evalSize = 0.0;
double errorCount = 0.0;
for (TextInstance<Integer> instance : eval) {
int actual = kldClassifier.classify(instance.tokens);
if (actual != instance.label) {
errorCount++;
}
evalSize++;
}