super.setUp();
ClassifierData.writeDataToFile("testdata/bayesinput", ClassifierData.DATA);
}
public void testSelfTestBayes() throws InvalidDatastoreException, IOException {
BayesParameters params = new BayesParameters(1);
params.set("alpha_i", "1.0");
params.set("dataSource", "hdfs");
TrainClassifier.trainNaiveBayes("testdata/bayesinput", "testdata/bayesmodel", params);
params.set("verbose", "true");
params.set("basePath", "testdata/bayesmodel");
params.set("classifierType", "bayes");
params.set("dataSource", "hdfs");
params.set("defaultCat", "unknown");
params.set("encoding", "UTF-8");
params.set("alpha_i", "1.0");
Algorithm algorithm = new BayesAlgorithm();
Datastore datastore = new InMemoryBayesDatastore(params);
ClassifierContext classifier = new ClassifierContext(algorithm, datastore);
classifier.initialize();
ResultAnalyzer resultAnalyzer = new ResultAnalyzer(classifier.getLabels(), params.get("defaultCat"));
for (String[] entry : ClassifierData.DATA) {
List<String> document = new NGrams(entry[1], Integer.parseInt(params.get("gramSize")))
.generateNGramsWithoutLabel();
assertEquals(3, classifier.classifyDocument(document.toArray(new String[document.size()]),
params.get("defaultCat"), 100).length);
ClassifierResult result = classifier.classifyDocument(document.toArray(new String[document.size()]), params
.get("defaultCat"));
assertEquals(entry[0], result.getLabel());
resultAnalyzer.addInstance(entry[0], result);
}
int[][] matrix = resultAnalyzer.getConfusionMatrix().getConfusionMatrix();
for (int i = 0; i < 3; i++) {
for (int j = 0; j < 3; j++) {
if (i == j)
assertEquals(4, matrix[i][j]);
else
assertEquals(0, matrix[i][j]);
}
}
params.set("testDirPath", "testdata/bayesinput");
TestClassifier.classifyParallel(params);
Configuration conf = new Configuration();
Path outputFiles = new Path("testdata/bayesinput-output/part*");
FileSystem fs = FileSystem.get(outputFiles.toUri(), conf);
matrix = BayesClassifierDriver.readResult(fs, outputFiles, conf, params).getConfusionMatrix();