{
// train a classifier on the entire training set
logger.info("Training token classifier on entire data set (size=" + trainList.size() + ")...");
m_tokenClassifier = m_trainer.train(trainList);
Trial t = new Trial(m_tokenClassifier, trainList);
logger.info("Training set accuracy = " + t.getAccuracy());
if (m_numCV == 0)
return;
// train classifiers using cross validation
InstanceList.CrossValidationIterator cvIter = trainList.new CrossValidationIterator(m_numCV, m_randSeed);
int f = 1;
while (cvIter.hasNext()) {
f++;
InstanceList[] fold = cvIter.nextSplit();
logger.info("Training token classifier on cv fold " + f + " / " + m_numCV + " (size=" + fold[0].size() + ")...");
Classifier foldClassifier = m_trainer.train(fold[0]);
Trial t1 = new Trial(foldClassifier, fold[0]);
Trial t2 = new Trial(foldClassifier, fold[1]);
logger.info("Within-fold accuracy = " + t1.getAccuracy());
logger.info("Out-of-fold accuracy = " + t2.getAccuracy());
/*for (int x = 0; x < t2.size(); x++) {
logger.info("xxx pred:" + t2.getClassification(x).getLabeling().getBestLabel() + " true:" + t2.getClassification(x).getInstance().getLabeling());
}*/