{
long startTime = System.currentTimeMillis();
Reader trainingFile = null, testFile = null;
Reader constraintsFile = null;
InstanceList trainingData = null, testData = null;
int restArgs = commandOptions.processOptions(args);
if (restArgs == args.length)
{
commandOptions.printUsage(true);
throw new IllegalArgumentException("Missing data file(s)");
}
if (trainOption.value)
{
trainingFile = new FileReader(new File(args[restArgs]));
if (testOption.value != null) {
testFile = new FileReader(new File(args[restArgs+1]));
constraintsFile = new FileReader(new File(args[restArgs+2]));
}
else {
constraintsFile = new FileReader(new File(args[restArgs+1]));
}
} else
testFile = new FileReader(new File(args[restArgs]));
Pipe p = null;
CRF crf = null;
TransducerEvaluator eval = null;
if (continueTrainingOption.value || !trainOption.value) {
if (modelOption.value == null)
{
commandOptions.printUsage(true);
throw new IllegalArgumentException("Missing model file option");
}
ObjectInputStream s =
new ObjectInputStream(new FileInputStream(modelOption.value));
crf = (CRF) s.readObject();
s.close();
p = crf.getInputPipe();
}
else {
p = new SimpleTaggerSentence2FeatureVectorSequence();
p.getTargetAlphabet().lookupIndex(defaultOption.value);
}
if (trainOption.value)
{
p.setTargetProcessing(true);
trainingData = new InstanceList(p);
trainingData.addThruPipe(
new LineGroupIterator(trainingFile,
Pattern.compile("^\\s*$"), true));
logger.info
("Number of features in training data: "+p.getDataAlphabet().size());
if (testOption.value != null)
{
if (testFile != null)
{
testData = new InstanceList(p);
testData.addThruPipe(
new LineGroupIterator(testFile,
Pattern.compile("^\\s*$"), true));
} else
{
Random r = new Random (randomSeedOption.value);
InstanceList[] trainingLists =
trainingData.split(
r, new double[] {trainingFractionOption.value,
1-trainingFractionOption.value});
trainingData = trainingLists[0];
testData = trainingLists[1];
}
}
} else if (testOption.value != null)
{
p.setTargetProcessing(true);
testData = new InstanceList(p);
testData.addThruPipe(
new LineGroupIterator(testFile,
Pattern.compile("^\\s*$"), true));
} else
{
p.setTargetProcessing(false);
testData = new InstanceList(p);
testData.addThruPipe(
new LineGroupIterator(testFile,
Pattern.compile("^\\s*$"), true));
}
logger.info ("Number of predicates: "+p.getDataAlphabet().size());