PropertyConfigurator.configure (Constants.LOG4J_FILE);
Logger logger = Logger.getLogger(TrainPOSTagger.class.getName());
logger.debug("Started POS tagged model generation");
//*-- set up parser with estimator as handler
HmmCharLmEstimator estimator = new HmmCharLmEstimator(N_GRAM, NUM_CHARS, LAMBDA_FACTOR);
Parser parser = new BrownPosParser();
parser.setHandler(estimator);
//*-- train on files in data directory ending in "txt"
if (!TRAINING_DIR.isDirectory())
{ logger.fatal("Could not find training directory=" + TRAINING_DIR); }
File[] files = TRAINING_DIR.listFiles(new FileExtensionFilter("txt"));
for (int i = 0; i < files.length; ++i)
{ logger.debug("Training on file: " + files[i]); parser.parse(files[i]); }
//*-- write output to file
File modelFile = new File(MUSTRU_HOME + File.separator + "data" + File.separator + "training" + File.separator + "pos" + File.separator + "pos_tagger");
ObjectOutputStream objOut = new ObjectOutputStream(new FileOutputStream(modelFile));
estimator.compileTo(objOut);
Streams.closeOutputStream(objOut);
logger.debug("Finished POS tagger model generation");
}