* Test the classifier using the files provided in the test/category directories
*/
public static void main(String[] args) throws ClassNotFoundException, IOException
{
PropertyConfigurator.configure (Constants.LOG4J_FILE);
Logger logger = Logger.getLogger(TestFtypeClassifier.class.getName());
logger.debug("Started TestFtypeClassifier");
//*-- read the classification model
String modelFile = Constants.FTYPE_CLASS_MODEL;
ObjectInputStream oi = new ObjectInputStream( new FileInputStream(modelFile) );
LMClassifier compiledClassifier = (LMClassifier) oi.readObject();
oi.close();
//*-- loop through the identical categories and test the classification of test documents
ConfusionMatrix confMatrix = new ConfusionMatrix(CATEGORIES);
NumberFormat nf = NumberFormat.getInstance();
nf.setMaximumIntegerDigits(1); nf.setMaximumFractionDigits(3);
for (int i=0; i < CATEGORIES.length; ++i)
{
File classDir = new File(TESTING_DIR, CATEGORIES[i]);
String[] testingFiles = classDir.list();
//*-- for each file, find the best category using the classifier and compare with the
//*-- designated category
for (int j=0; j < testingFiles.length; ++j)
{
String text = Files.readFromFile( new File(classDir, testingFiles[j]) );
//*-- limit the length of the text
if (text.length() > 500) text = text.substring(0, 500);
logger.debug("Testing on " + CATEGORIES[i] + File.separator + testingFiles[j]);
JointClassification jc = compiledClassifier.classifyJoint(text);
//*-- check if we have sufficient confidence in the decision
String bestCategory = (jc.score(0) > -2.5) ? jc.bestCategory(): "text";
confMatrix.increment(CATEGORIES[i], bestCategory);
logger.debug("Best Category: " + bestCategory );
StringBuffer sb = new StringBuffer();
sb.append("Scores ");
for (int k = 0; k < CATEGORIES.length; k++)
sb.append(nf.format(jc.score(k)) + " ");
logger.debug(sb);
} //*-- end of inner for
} //*-- end of outer for
logger.info("--------------------------------------------");
logger.info("- Results ");
logger.info("--------------------------------------------");
int[][] imatrix = confMatrix.matrix();
StringBuffer sb = new StringBuffer();
sb.append(StringTools.fillin("CATEGORY", 10, true, ' ') );
for (int i = 0; i < CATEGORIES.length; i++) sb.append(StringTools.fillin(CATEGORIES[i], 8, false, ' ') );
logger.info(sb.toString());
for (int i = 0; i < imatrix.length; i++)
{ sb = new StringBuffer();
sb.append(StringTools.fillin(CATEGORIES[i], 10, true, ' ', 10 - CATEGORIES[i].length() ) );
for (int j = 0; j < imatrix.length; j++)
{ String out = "" + imatrix[i][j];
sb.append(StringTools.fillin(out, 8, false, ' ', 8 - out.length() ) );
}
logger.info(sb.toString());
}
logger.info("Total Accuracy: " + nf.format(confMatrix.totalAccuracy()) );
logger.info("Total Correct : " + confMatrix.totalCorrect() + " out of " + confMatrix.totalCount() );
}