File base = new File(inputFile);
//contains the best model
OnlineLogisticRegression classifier = ModelSerializer.readBinary(new FileInputStream(modelFile), OnlineLogisticRegression.class);
Dictionary newsGroups = new Dictionary();
Multiset<String> overallCounts = HashMultiset.create();
List<File> files = Lists.newArrayList();
for (File newsgroup : base.listFiles()) {
if (newsgroup.isDirectory()) {
newsGroups.intern(newsgroup.getName());
files.addAll(Arrays.asList(newsgroup.listFiles()));
}
}
System.out.printf("%d test files\n", files.size());
ResultAnalyzer ra = new ResultAnalyzer(newsGroups.values(), "DEFAULT");
for (File file : files) {
String ng = file.getParentFile().getName();
int actual = newsGroups.intern(ng);
NewsgroupHelper helper = new NewsgroupHelper();
Vector input = helper.encodeFeatureVector(file, actual, 0, overallCounts);//no leak type ensures this is a normal vector
Vector result = classifier.classifyFull(input);
int cat = result.maxValueIndex();
double score = result.maxValue();
double ll = classifier.logLikelihood(actual, input);
ClassifierResult cr = new ClassifierResult(newsGroups.values().get(cat), score, ll);
ra.addInstance(newsGroups.values().get(actual), cr);
}
output.printf("%s\n\n", ra.toString());
}