FileSystem fs = FileSystem.get(conf);
log.info("Loading model from: {}", modelPaths);
Model model;
Classifier classifier;
String classifierType = (String) cmdLine.getValue(typeOpt);
if (classifierType.equalsIgnoreCase("bayes")) {
log.info("Testing Bayes Classifier");
model = new BayesModel();
classifier = new BayesClassifier();
} else if (classifierType.equalsIgnoreCase("cbayes")) {
log.info("Testing Complementary Bayes Classifier");
model = new CBayesModel();
classifier = new CBayesClassifier();
} else {
throw new IllegalArgumentException("Unrecognized classifier type: " + classifierType);
}
SequenceFileModelReader.loadModel(model, fs, modelPaths, conf);
log.info("Done loading model: # labels: {}", model.getLabels().size());
log.info("Done generating Model");
String defaultCat = "unknown";
if (cmdLine.hasOption(defaultCatOpt)) {
defaultCat = (String) cmdLine.getValue(defaultCatOpt);
}
String encoding = "UTF-8";
if (cmdLine.hasOption(encodingOpt)) {
encoding = (String) cmdLine.getValue(encodingOpt);
}
//Analyzer analyzer = null;
//if (cmdLine.hasOption(analyzerOpt)) {
//String className = (String) cmdLine.getValue(analyzerOpt);
//Class clazz = Class.forName(className);
//analyzer = (Analyzer) clazz.newInstance();
//}
//if (analyzer == null) {
// analyzer = new StandardAnalyzer();
//}
int gramSize = 1;
if (cmdLine.hasOption(gramSizeOpt)) {
gramSize = Integer.parseInt((String) cmdLine
.getValue(gramSizeOpt));
}
String testDirPath = (String) cmdLine.getValue(dirOpt);
File dir = new File(testDirPath);
File[] subdirs = dir.listFiles();
ResultAnalyzer resultAnalyzer = new ResultAnalyzer(model.getLabels(), defaultCat);
if (subdirs != null) {
for (File subdir : subdirs) {
String correctLabel = subdir.getName().split(".txt")[0];
BufferedReader fileReader = new BufferedReader(new InputStreamReader(
new FileInputStream(subdir.getPath()), encoding));
try {
String line;
while ((line = fileReader.readLine()) != null) {
Map<String, List<String>> document = Model.generateNGrams(line, gramSize);
for (Map.Entry<String, List<String>> stringListEntry : document.entrySet()) {
List<String> strings = stringListEntry.getValue();
ClassifierResult classifiedLabel = classifier.classify(model,
strings.toArray(new String[strings.size()]),
defaultCat);
resultAnalyzer.addInstance(correctLabel, classifiedLabel);
}
}