if (trainOption.value)
{
p.setTargetProcessing(true);
trainingData = new InstanceList(p);
trainingData.addThruPipe(
new LineGroupIterator(trainingFile,
Pattern.compile("^\\s*$"), true));
logger.info
("Number of features in training data: "+p.getDataAlphabet().size());
if (testOption.value != null)
{
if (testFile != null)
{
testData = new InstanceList(p);
testData.addThruPipe(
new LineGroupIterator(testFile,
Pattern.compile("^\\s*$"), true));
} else
{
Random r = new Random (randomSeedOption.value);
InstanceList[] trainingLists =
trainingData.split(
r, new double[] {trainingFractionOption.value,
1-trainingFractionOption.value});
trainingData = trainingLists[0];
testData = trainingLists[1];
}
}
} else if (testOption.value != null)
{
p.setTargetProcessing(true);
testData = new InstanceList(p);
testData.addThruPipe(
new LineGroupIterator(testFile,
Pattern.compile("^\\s*$"), true));
} else
{
p.setTargetProcessing(false);
testData = new InstanceList(p);
testData.addThruPipe(
new LineGroupIterator(testFile,
Pattern.compile("^\\s*$"), true));
}
logger.info ("Number of predicates: "+p.getDataAlphabet().size());