logger.debug("Started TestFtypeClassifier");
//*-- read the classification model
String modelFile = Constants.FTYPE_CLASS_MODEL;
ObjectInputStream oi = new ObjectInputStream( new FileInputStream(modelFile) );
LMClassifier compiledClassifier = (LMClassifier) oi.readObject();
oi.close();
//*-- loop through the identical categories and test the classification of test documents
ConfusionMatrix confMatrix = new ConfusionMatrix(CATEGORIES);
NumberFormat nf = NumberFormat.getInstance();
nf.setMaximumIntegerDigits(1); nf.setMaximumFractionDigits(3);
for (int i=0; i < CATEGORIES.length; ++i)
{
File classDir = new File(TESTING_DIR, CATEGORIES[i]);
String[] testingFiles = classDir.list();
//*-- for each file, find the best category using the classifier and compare with the
//*-- designated category
for (int j=0; j < testingFiles.length; ++j)
{
String text = Files.readFromFile( new File(classDir, testingFiles[j]) );
//*-- limit the length of the text
if (text.length() > 500) text = text.substring(0, 500);
logger.debug("Testing on " + CATEGORIES[i] + File.separator + testingFiles[j]);
JointClassification jc = compiledClassifier.classifyJoint(text);
//*-- check if we have sufficient confidence in the decision
String bestCategory = (jc.score(0) > -2.5) ? jc.bestCategory(): "text";
confMatrix.increment(CATEGORIES[i], bestCategory);
logger.debug("Best Category: " + bestCategory );