extentTokens.add(initCoreLabel("It"));
extentTokens.add(initCoreLabel("was"));
final int ADDED_WORDS = 2;
for (int i = m.startIndex; i < endIdx; i++) {
// Add everything except separated dashes! The separated dashes mess with the parser too badly.
CoreLabel label = tokens.get(i);
if ( ! "-".equals(label.word())) {
extentTokens.add(tokens.get(i));
} else {
approximateness++;
}
}
extentTokens.add(initCoreLabel("."));
// constrain the parse to the part we're interested in.
// Starting from ADDED_WORDS comes from skipping "It was".
// -1 to exclude the period.
// We now let it be any kind of nominal constituent, since there
// are VP and S ones
ParserConstraint constraint = new ParserConstraint(ADDED_WORDS, extentTokens.size() - 1, Pattern.compile(".*"));
List<ParserConstraint> constraints = Collections.singletonList(constraint);
Tree tree = parse(extentTokens, constraints);
convertToCoreLabels(tree); // now unnecessary, as parser uses CoreLabels?
tree.indexSpans(m.startIndex - ADDED_WORDS); // remember it has ADDED_WORDS extra words at the beginning
Tree subtree = findPartialSpan(tree, m.startIndex);
// There was a possible problem that with a crazy parse, extentHead could be one of the added words, not a real word!
// Now we make sure in findPartialSpan that it can't be before the real start, and in safeHead, we disallow something
// passed the right end (that is, just that final period).
Tree extentHead = safeHead(subtree, endIdx);
assert(extentHead != null);
// extentHead is a child in the local extent parse tree. we need to find the corresponding node in the main tree
// Because we deleted dashes, it's index will be >= the index in the extent parse tree
CoreLabel l = (CoreLabel) extentHead.label();
Tree realHead = funkyFindLeafWithApproximateSpan(root, l.value(), l.get(CoreAnnotations.BeginIndexAnnotation.class), approximateness);
assert(realHead != null);
return realHead;
}
// If reparsing wasn't allowed, try to find a span in the tree