if (inputPath == null) {
throw new IllegalArgumentException("Must specify input file with -input");
}
LexicalizedParser parser = LexicalizedParser.loadModel(parserModel);
TreeBinarizer binarizer = TreeBinarizer.simpleTreeBinarizer(parser.getTLPParams().headFinder(), parser.treebankLanguagePack());
if (sentimentModelPath != null) {
sentimentModel = SentimentModel.loadSerialized(sentimentModelPath);
}
String text = IOUtils.slurpFileNoExceptions(inputPath);
String[] chunks = text.split("\\n\\s*\\n+"); // need blank line to make a new chunk
for (String chunk : chunks) {
if (chunk.trim() == "") {
continue;
}
// The expected format is that line 0 will be the text of the
// sentence, and each subsequence line, if any, will be a value
// followed by the sequence of tokens that get that value.
// Here we take the first line and tokenize it as one sentence.
String[] lines = chunk.trim().split("\\n");
String sentence = lines[0];
StringReader sin = new StringReader(sentence);
DocumentPreprocessor document = new DocumentPreprocessor(sin);
document.setSentenceFinalPuncWords(new String[] {"\n"});
List<HasWord> tokens = document.iterator().next();
Integer mainLabel = new Integer(tokens.get(0).word());
//System.out.print("Main Sentence Label: " + mainLabel.toString() + "; ");
tokens = tokens.subList(1, tokens.size());
//System.err.println(tokens);
Map<Pair<Integer, Integer>, String> spanToLabels = Generics.newHashMap();
for (int i = 1; i < lines.length; ++i) {
extractLabels(spanToLabels, tokens, lines[i]);
}
// TODO: add an option which treats the spans as constraints when parsing
Tree tree = parser.apply(tokens);
Tree binarized = binarizer.transformTree(tree);
Tree collapsedUnary = transformer.transformTree(binarized);
// if there is a sentiment model for use in prelabeling, we
// label here and then use the user given labels to adjust
if (sentimentModel != null) {