if (testOption.value != null && restArgs < args.length - 1)
testFile = new FileReader(new File(args[restArgs+1]));
} else
testFile = new FileReader(new File(args[restArgs]));
Pipe p = null;
CRF crf = null;
TransducerEvaluator eval = null;
if (continueTrainingOption.value || !trainOption.value) {
if (modelOption.value == null)
{
commandOptions.printUsage(true);
throw new IllegalArgumentException("Missing model file option");
}
ObjectInputStream s =
new ObjectInputStream(new FileInputStream(modelOption.value));
crf = (CRF) s.readObject();
s.close();
p = crf.getInputPipe();
}
else {
p = new SimpleTaggerSentence2FeatureVectorSequence();
p.getTargetAlphabet().lookupIndex(defaultOption.value);
}
if (trainOption.value)
{
p.setTargetProcessing(true);
trainingData = new InstanceList(p);
trainingData.addThruPipe(
new LineGroupIterator(trainingFile,
Pattern.compile("^\\s*$"), true));
logger.info
("Number of features in training data: "+p.getDataAlphabet().size());
if (testOption.value != null)
{
if (testFile != null)
{
testData = new InstanceList(p);
testData.addThruPipe(
new LineGroupIterator(testFile,
Pattern.compile("^\\s*$"), true));
}
else
{
Random r = new Random (randomSeedOption.value);
InstanceList[] trainingLists =
trainingData.split(
r, new double[] {trainingFractionOption.value,
1-trainingFractionOption.value});
trainingData = trainingLists[0];
testData = trainingLists[1];
}
}
} else if (testOption.value != null)
{
p.setTargetProcessing(true);
testData = new InstanceList(p);
testData.addThruPipe(
new LineGroupIterator(testFile,
Pattern.compile("^\\s*$"), true));
} else
{
p.setTargetProcessing(false);
testData = new InstanceList(p);
testData.addThruPipe(
new LineGroupIterator(testFile,
Pattern.compile("^\\s*$"), true));
}
logger.info ("Number of predicates: "+p.getDataAlphabet().size());
if (printWeights.value) {
crf.print();
System.exit(0);
}
if (testOption.value != null)
{
if (testOption.value.startsWith("lab"))
eval = new TokenAccuracyEvaluator(new InstanceList[] {trainingData, testData}, new String[] {"Training", "Testing"});
else if (testOption.value.startsWith("seg="))
{
String[] pairs = testOption.value.substring(4).split(",");
if (pairs.length < 1)
{
commandOptions.printUsage(true);
throw new IllegalArgumentException(
"Missing segment start/continue labels: " + testOption.value);
}
String startTags[] = new String[pairs.length];
String continueTags[] = new String[pairs.length];
for (int i = 0; i < pairs.length; i++)
{
String[] pair = pairs[i].split("\\.");
if (pair.length != 2)
{
commandOptions.printUsage(true);
throw new
IllegalArgumentException(
"Incorrectly-specified segment start and end labels: " +
pairs[i]);
}
startTags[i] = pair[0];
continueTags[i] = pair[1];
}
eval = new MultiSegmentationEvaluator(new InstanceList[] {trainingData, testData}, new String[] {"Training", "Testing"},
startTags, continueTags);
}
else
{
commandOptions.printUsage(true);
throw new IllegalArgumentException("Invalid test option: " +
testOption.value);
}
}
if (p.isTargetProcessing())
{
Alphabet targets = p.getTargetAlphabet();
StringBuffer buf = new StringBuffer("Labels:");
for (int i = 0; i < targets.size(); i++)
buf.append(" ").append(targets.lookupObject(i).toString());
logger.info(buf.toString());
}