throw new IllegalArgumentException("Unrecognized dataSource type: " + params.get("dataSource"));
}
ClassifierContext classifier = new ClassifierContext(algorithm, datastore);
classifier.initialize();
ResultAnalyzer resultAnalyzer = new ResultAnalyzer(classifier.getLabels(), params.get("defaultCat"));
TimingStatistics totalStatistics = new TimingStatistics();
if (subdirs != null) {
for (File file : subdirs) {
if (verbose) {
log.info("--------------");
log.info("Testing: {}", file);
}
TimingStatistics operationStats = new TimingStatistics();
long lineNum = 0;
for (String line : new FileLineIterable(new File(file.getPath()), Charset.forName(params
.get("encoding")), false)) {
Map<String,List<String>> document = new NGrams(line, Integer.parseInt(params.get("gramSize")))
.generateNGrams();
for (Map.Entry<String,List<String>> stringListEntry : document.entrySet()) {
String correctLabel = stringListEntry.getKey();
List<String> strings = stringListEntry.getValue();
TimingStatistics.Call call = operationStats.newCall();
TimingStatistics.Call outercall = totalStatistics.newCall();
ClassifierResult classifiedLabel = classifier.classifyDocument(strings.toArray(new String[strings
.size()]), params.get("defaultCat"));
call.end();
outercall.end();
boolean correct = resultAnalyzer.addInstance(correctLabel, classifiedLabel);
if (verbose) {
// We have one document per line
log.info("Line Number: {} Line(30): {} Expected Label: {} Classified Label: {} Correct: {}",
new Object[] {lineNum, line.length() > 30 ? line.substring(0, 30) : line, correctLabel,
classifiedLabel.getLabel(), correct,});
}
// log.info("{} {}", correctLabel, classifiedLabel);
}
lineNum++;
}
/*
* log.info("{}\t{}\t{}/{}", new Object[] {correctLabel,
* resultAnalyzer.getConfusionMatrix().getAccuracy(correctLabel),
* resultAnalyzer.getConfusionMatrix().getCorrect(correctLabel),
* resultAnalyzer.getConfusionMatrix().getTotal(correctLabel)});
*/
log.info("Classified instances from {}", file.getName());
if (verbose) {
log.info("Performance stats {}", operationStats.toString());
}
}
}
if (verbose) {