fs.copyFromLocalFile(new Path(tempInputFile.getAbsolutePath()), input);
}
@Test
public void testSelfTestBayes() throws Exception {
BayesParameters params = new BayesParameters(1);
params.set("alpha_i", "1.0");
params.set("dataSource", "hdfs");
Path bayesInputPath = getTestTempFilePath("bayesinput");
Path bayesModelPath = getTestTempDirPath("bayesmodel");
TrainClassifier.trainNaiveBayes(bayesInputPath, bayesModelPath, params);
params.set("verbose", "true");
params.set("basePath", bayesModelPath.toString());
params.set("classifierType", "bayes");
params.set("dataSource", "hdfs");
params.set("defaultCat", "unknown");
params.set("encoding", "UTF-8");
params.set("alpha_i", "1.0");
Algorithm algorithm = new BayesAlgorithm();
Datastore datastore = new InMemoryBayesDatastore(params);
ClassifierContext classifier = new ClassifierContext(algorithm, datastore);
classifier.initialize();
ResultAnalyzer resultAnalyzer = new ResultAnalyzer(classifier.getLabels(), params.get("defaultCat"));
for (String[] entry : ClassifierData.DATA) {
List<String> document = new NGrams(entry[1], Integer.parseInt(params.get("gramSize")))
.generateNGramsWithoutLabel();
assertEquals(3, classifier.classifyDocument(document.toArray(new String[document.size()]),
params.get("defaultCat"), 100).length);
ClassifierResult result = classifier.classifyDocument(document.toArray(new String[document.size()]), params
.get("defaultCat"));
assertEquals(entry[0], result.getLabel());
resultAnalyzer.addInstance(entry[0], result);
}
int[][] matrix = resultAnalyzer.getConfusionMatrix().getConfusionMatrix();
for (int i = 0; i < 3; i++) {
for (int j = 0; j < 3; j++) {
assertEquals(i == j ? 4 : 0, matrix[i][j]);
}
}
params.set("testDirPath", bayesInputPath.toString());
TestClassifier.classifyParallel(params);
Configuration conf = new Configuration();
Path outputFiles = getTestTempFilePath("bayesinput-output/part*");
FileSystem fs = FileSystem.get(outputFiles.toUri(), conf);
matrix = BayesClassifierDriver.readResult(fs, outputFiles, conf, params).getConfusionMatrix();