CmdLineUtil.checkInputFile("Training Data", trainingDataInFile);
ObjectStream<POSSample> sampleStream = POSTaggerTrainerTool.openSampleData(
"Training Data", trainingDataInFile, params.getEncoding());
POSTaggerCrossValidator validator;
POSTaggerEvaluationMonitor missclassifiedListener = null;
if (params.getMisclassified()) {
missclassifiedListener = new POSEvaluationErrorListener();
}
if (mlParams == null) {
mlParams = new TrainingParameters();
mlParams.put(TrainingParameters.ALGORITHM_PARAM, "MAXENT");
mlParams.put(TrainingParameters.ITERATIONS_PARAM,
Integer.toString(params.getIterations()));
mlParams.put(TrainingParameters.CUTOFF_PARAM,
Integer.toString(params.getCutoff()));
}
try {
// TODO: Move to util method ...
POSDictionary tagdict = null;
if (params.getDict() != null) {
tagdict = POSDictionary.create(new FileInputStream(params.getDict()));
}
validator = new POSTaggerCrossValidator(params.getLang(), mlParams,
tagdict, params.getNgram(), missclassifiedListener);
validator.evaluate(sampleStream, params.getFolds());
} catch (IOException e) {
CmdLineUtil.printTrainingIoError(e);
throw new TerminateToolException(-1);
} finally {
try {
sampleStream.close();
} catch (IOException e) {
// sorry that this can fail
}
}
System.out.println("done");
System.out.println();
System.out.println("Accuracy: " + validator.getWordAccuracy());
}