EntityMention arg0,
EntityMention arg1,
List<String> types,
List<String> checklist,
Logger logger) {
SemanticGraph graph = null;
if(dependencyType == null) dependencyType = DEPENDENCY_TYPE.COLLAPSED_CCPROCESSED; // needed for backwards compatibility. old serialized models don't have it
if(dependencyType == DEPENDENCY_TYPE.COLLAPSED_CCPROCESSED)
graph = rel.getSentence().get(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class);
else if(dependencyType == DEPENDENCY_TYPE.COLLAPSED)
graph = rel.getSentence().get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class);
else if(dependencyType == DEPENDENCY_TYPE.BASIC)
graph = rel.getSentence().get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
else
throw new RuntimeException("ERROR: unknown dependency type: " + dependencyType);
if (graph == null) {
Tree tree = rel.getSentence().get(TreeAnnotation.class);
if(tree == null){
System.err.println("WARNING: found sentence without TreeAnnotation. Skipped dependency-path features.");
return;
}
try {
graph = SemanticGraphFactory.generateCollapsedDependencies(tree);
} catch(Exception e){
System.err.println("WARNING: failed to generate dependencies from tree " + tree.toString());
e.printStackTrace();
System.err.println("Skipped dependency-path features.");
return;
}
}
IndexedWord node0 = graph.getNodeByIndexSafe(arg0.getSyntacticHeadTokenPosition() + 1);
IndexedWord node1 = graph.getNodeByIndexSafe(arg1.getSyntacticHeadTokenPosition() + 1);
if (node0 == null) {
checklist.removeAll(dependencyFeatures);
return;
}
if (node1 == null) {
checklist.removeAll(dependencyFeatures);
return;
}
List<SemanticGraphEdge> edgePath = graph.getShortestUndirectedPathEdges(node0, node1);
List<IndexedWord> pathNodes = graph.getShortestUndirectedPathNodes(node0, node1);
if (edgePath == null) {
checklist.removeAll(dependencyFeatures);
return;
}
if (pathNodes == null || pathNodes.size() <= 1) { // arguments have the same head.
checklist.removeAll(dependencyFeatures);
return;
}
// dependency_path: Concatenation of relations in the path between the args in the dependency graph, including directions
// e.g. "subj-> <-prep_in <-mod"
// dependency_path_lowlevel: Same but with finer-grained syntactic relations
// e.g. "nsubj-> <-prep_in <-nn"
if (usingFeature(types, checklist, "dependency_path")) {
features.setCount("dependency_path:"+generalizedDependencyPath(edgePath, node0), 1.0);
}
if (usingFeature(types, checklist, "dependency_path_lowlevel")) {
String depLowLevel = dependencyPath(edgePath, node0);
if(logger != null && ! rel.getType().equals(RelationMention.UNRELATED)) logger.info("dependency_path_lowlevel: " + depLowLevel);
features.setCount("dependency_path_lowlevel:" + depLowLevel, 1.0);
}
List<String> pathLemmas = new ArrayList<String>();
List<String> noArgPathLemmas = new ArrayList<String>();
// do not add to pathLemmas words that belong to one of the two args
Set<Integer> indecesToSkip = new HashSet<Integer>();
for(int i = arg0.getExtentTokenStart(); i < arg0.getExtentTokenEnd(); i ++) indecesToSkip.add(i + 1);
for(int i = arg1.getExtentTokenStart(); i < arg1.getExtentTokenEnd(); i ++) indecesToSkip.add(i + 1);
for (IndexedWord node : pathNodes){
pathLemmas.add(Morphology.lemmaStatic(node.value(), node.tag(), true));
if(! indecesToSkip.contains(node.index()))
noArgPathLemmas.add(Morphology.lemmaStatic(node.value(), node.tag(), true));
}
// Verb-based features
// These features were designed on the assumption that verbs are often trigger words
// (specifically with the "Kill" relation from Roth CONLL04 in mind)
// but they didn't end up boosting performance on Roth CONLL04, so they may not be necessary.
//
// dependency_paths_to_verb: for each verb in the dependency path,
// the path to the left of the (lemmatized) verb, to the right, and both, e.g.
// "subj-> be"
// "be <-prep_in <-mod"
// "subj-> be <-prep_in <-mod"
// (Higher level relations used as opposed to "lowlevel" finer grained relations)
if (usingFeature(types, checklist, "dependency_paths_to_verb")) {
for (IndexedWord node : pathNodes) {
if (node.tag().contains("VB")) {
if (node.equals(node0) || node.equals(node1)) {
continue;
}
String lemma = Morphology.lemmaStatic(node.value(), node.tag(), true);
String node1Path = generalizedDependencyPath(graph.getShortestUndirectedPathEdges(node, node1), node);
String node0Path = generalizedDependencyPath(graph.getShortestUndirectedPathEdges(node0, node), node0);
features.setCount("dependency_paths_to_verb:" + node0Path + " " + lemma, 1.0);
features.setCount("dependency_paths_to_verb:" + lemma + " " + node1Path, 1.0);
features.setCount("dependency_paths_to_verb:" + node0Path + " " + lemma + " " + node1Path, 1.0);
}
}
}
// dependency_path_stubs_to_verb:
// For each verb in the dependency path,
// the verb concatenated with the first (high-level) relation in the path from arg0;
// the verb concatenated with the first relation in the path from arg1,
// and the verb concatenated with both relations. E.g. (same arguments and sentence as example above)
// "stub: subj-> be"
// "stub: be <-mod"
// "stub: subj-> be <-mod"
if (usingFeature(types, checklist, "dependency_path_stubs_to_verb")) {
for (IndexedWord node : pathNodes) {
SemanticGraphEdge edge0 = edgePath.get(0);
SemanticGraphEdge edge1 = edgePath.get(edgePath.size() - 1);
if (node.tag().contains("VB")) {
if (node.equals(node0) || node.equals(node1)) {
continue;
}
String lemma = Morphology.lemmaStatic(node.value(), node.tag(), true);
String edge0str, edge1str;
if (node0.equals(edge0.getGovernor())) {
edge0str = "<-" + generalizeRelation(edge0.getRelation());
} else {
edge0str = generalizeRelation(edge0.getRelation()) + "->";
}
if (node1.equals(edge1.getGovernor())) {
edge1str = generalizeRelation(edge1.getRelation()) + "->";
} else {
edge1str = "<-" + generalizeRelation(edge1.getRelation());
}
features.setCount("stub: " + edge0str + " " + lemma, 1.0);
features.setCount("stub: " + lemma + edge1str, 1.0);
features.setCount("stub: " + edge0str + " " + lemma + " " + edge1str, 1.0);
}
}
}
if (usingFeature(types, checklist, "verb_in_dependency_path")) {
for (IndexedWord node : pathNodes) {
if (node.tag().contains("VB")) {
if (node.equals(node0) || node.equals(node1)) {
continue;
}
SemanticGraphEdge rightEdge = graph.getShortestUndirectedPathEdges(node, node1).get(0);
SemanticGraphEdge leftEdge = graph.getShortestUndirectedPathEdges(node, node0).get(0);
String rightRelation, leftRelation;
boolean governsLeft = false, governsRight = false;
if (node.equals(rightEdge.getGovernor())) {
rightRelation = " <-" + generalizeRelation(rightEdge.getRelation());
governsRight = true;