}
public static void main(String[] args) {
// Add the options.
ArgOptions options = new ArgOptions();
options.addOption('p', "partOfSpeechTag",
"If set, each token will be appended with it's " +
"part of speech tag, such as cat-noun",
false, null, "Optional");
options.addOption('S', "separateByPeriod",
"If set, seperates sentences by periods",
false, null, "Optional");
options.addOption('U', "utterancePerDoc",
"If set, one utterance is considered a document, " +
"otherwise all uterances in a file will be " +
"considered a document",
false, null, "Optional");
options.addOption('g', "generateOneDoc",
"If set, only one document will be generated for " +
"all the text processed",
false, null, "Optional");
options.addOption('A', "augmentedUtterances",
"Generates augmented utterances from comments " +
"about the utterances", false, null, "Augmented");
options.addOption('F', "augmentedUtterancesFilter",
"Specifes a token filter for which tokens in " +
"comments are used to generate augmented utterances",
true, "SPEC", "Augmented");
options.addOption('d', "baseChildesDirectory",
"The base childes directory. XML files will be " +
"searched for recursively from this base. Use of " +
"this overrides the fileList option.",
true, "DIRECTORY", "Required (At least one of)");
options.addOption('f', "fileList",
"The list of files to process",
true, "FILE[,FILE]*", "Required (At least one of)");
// Process the options and emit errors if any required options are
// missing.
options.parseOptions(args);
if ((!options.hasOption("fileList") &&
!options.hasOption("baseChildesDirectory")) ||
options.numPositionalArgs() != 2) {
System.out.println(
"usage: java ChildesParser [options] " +
"<outfile> <pos-file>\n" +
options.prettyPrint());
return;
}
// The default is to have all utterances from a conversation be in a
// single document
boolean utterancePerDoc = false;
utterancePerDoc = options.hasOption("utterancePerDoc");
boolean genAugmented = options.hasOption("augmentedUtterances");
if (genAugmented && options.hasOption("augmentedUtterancesFilter")) {
String filterConf =
options.getStringOption("augmentedUtterancesFilter");
Properties p = System.getProperties();
p.setProperty(IteratorFactory.TOKEN_FILTER_PROPERTY, filterConf);
IteratorFactory.setProperties(p);
}
ChildesParser parser = new ChildesParser(options.getPositionalArg(0),
options.getPositionalArg(1),
genAugmented,
options.hasOption('S'),
options.hasOption('p'),
options.hasOption('g'));
// Process the given file list, if provided.
if (options.hasOption("fileList")) {
String[] files = options.getStringOption("fileList").split(",");
for (String file : files)
parser.parseFile(new File(file), utterancePerDoc);
} else {
// Otherwise search for xml files to process.
File baseDir =
new File(options.getStringOption("baseChildesDirectory"));
findXmlFiles(parser, utterancePerDoc, baseDir);
}
parser.finish();
}