package edu.stanford.nlp.semgraph;
import edu.stanford.nlp.graph.DirectedMultiGraph;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.IndexedWord;
import edu.stanford.nlp.stats.ClassicCounter;
import edu.stanford.nlp.stats.Counters;
import edu.stanford.nlp.stats.TwoDimensionalCounter;
import edu.stanford.nlp.trees.*;
import edu.stanford.nlp.util.CollectionUtils;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.MapFactory;
import edu.stanford.nlp.util.Pair;
import edu.stanford.nlp.util.StringParsingTask;
import edu.stanford.nlp.util.StringUtils;
import java.io.Serializable;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import static edu.stanford.nlp.trees.GrammaticalRelation.ROOT;
// todo [cdm 2013]: The treatment of roots in this class should probably be redone.
// todo [cdm 2013]: Probably we should put fake root node in graph and arc(s) from it.
// todo [cdm 2013]: At any rate, printing methods should print the root
/**
* Represents a semantic graph of a sentence or document, with IndexedWord
* objects for nodes.
* <p>
* Notes:
* <br>
* The root is not at present represented as a vertex in the graph.
* At present you need to get a root/roots
* from the separate roots variable and to know about it.
* This should maybe be changed, because otherwise, doing things like
* simply getting the set of nodes or edges from the graph doesn't give
* you root nodes or edges.
* <br>
* Given the kinds of representations that we normally use with
* typedDependenciesCollapsed, there can be (small) cycles in a
* SemanticGraph, and these cycles may involve the node that is conceptually the
* root of the graph, so there may be no node without a parent node. You can
* better get at the root(s) via the variable and methods provided.
* <br>
* There is no mechanism for returning all edges at once (eg <code>edgeSet()</code>).
* This is intentional. Use <code>edgeIterable()</code> to iterate over the edges if necessary.
*
* @author Christopher Cox
* @author Teg Grenager
* @see SemanticGraphEdge
* @see IndexedWord
*/
public class SemanticGraph implements Serializable {
public static final boolean addSRLArcs = false;
private static final SemanticGraphFormatter formatter = new SemanticGraphFormatter();
/**
* The distinguished root vertices, if known.
*/
private final Collection<IndexedWord> roots;
private final DirectedMultiGraph<IndexedWord, SemanticGraphEdge> graph;
private static final MapFactory<IndexedWord, Map<IndexedWord, List<SemanticGraphEdge>>> outerMapFactory = MapFactory.hashMapFactory();
private static final MapFactory<IndexedWord, List<SemanticGraphEdge>> innerMapFactory = MapFactory.hashMapFactory();
private static final MapFactory<IndexedWord, IndexedWord> wordMapFactory = MapFactory.hashMapFactory();
public int edgeCount() {
return graph.getNumEdges();
}
public int outDegree(IndexedWord vertex) {
return graph.getOutDegree(vertex);
}
public int inDegree(IndexedWord vertex) {
return graph.getInDegree(vertex);
}
public List<SemanticGraphEdge> getAllEdges(IndexedWord gov,
IndexedWord dep) {
return graph.getEdges(gov, dep);
}
// TODO: this is a bad method to use because there can be multiple
// edges. All users of this method should be switched to iterating
// over getAllEdges. This has already been done for all uses
// outside RTE.
public SemanticGraphEdge getEdge(IndexedWord gov, IndexedWord dep) {
List<SemanticGraphEdge> edges = graph.getEdges(gov, dep);
if (edges == null || edges.isEmpty())
return null;
return edges.get(0);
}
public void addVertex(IndexedWord vertex) {
graph.addVertex(vertex);
}
public boolean containsVertex(IndexedWord vertex) {
return graph.containsVertex(vertex);
}
public boolean containsEdge(IndexedWord source, IndexedWord target) {
return graph.isEdge(source, target);
}
public boolean containsEdge(SemanticGraphEdge edge) {
return containsEdge(edge.getSource(), edge.getTarget());
}
public Set<IndexedWord> vertexSet() {
return graph.getAllVertices();
}
public boolean removeEdge(SemanticGraphEdge e) {
return graph.removeEdge(e.getSource(), e.getTarget(), e);
}
public boolean removeVertex(IndexedWord vertex) {
return graph.removeVertex(vertex);
}
/**
* This returns an ordered list of vertices (based upon their
* indices in the sentence). This creates and sorts a list, so
* prefer vertexSet unless you have a good reason to want nodes in
* index order.
*
* @return Ordered list of vertices
*/
public List<IndexedWord> vertexListSorted() {
ArrayList<IndexedWord> vlist = new ArrayList<IndexedWord>(vertexSet());
Collections.sort(vlist);
return vlist;
}
/**
* Returns an ordered list of edges in the graph.
* This creates and sorts a list, so prefer edgeIterable().
*
* @return A ordered list of edges in the graph.
*/
public List<SemanticGraphEdge> edgeListSorted() {
ArrayList<SemanticGraphEdge> edgeList =
new ArrayList<SemanticGraphEdge>();
for (SemanticGraphEdge edge : edgeIterable()) {
edgeList.add(edge);
}
Collections.sort(edgeList, SemanticGraphEdge.orderByTargetComparator());
return edgeList;
}
public Iterable<SemanticGraphEdge> edgeIterable() {
return graph.edgeIterable();
}
public Iterator<SemanticGraphEdge> outgoingEdgeIterator(IndexedWord v) {
return graph.outgoingEdgeIterator(v);
}
public Iterable<SemanticGraphEdge> outgoingEdgeIterable(IndexedWord v) {
return graph.outgoingEdgeIterable(v);
}
public Iterator<SemanticGraphEdge> incomingEdgeIterator(IndexedWord v) {
return graph.incomingEdgeIterator(v);
}
public Iterable<SemanticGraphEdge> incomingEdgeIterable(IndexedWord v) {
return graph.incomingEdgeIterable(v);
}
public List<SemanticGraphEdge> outgoingEdgeList(IndexedWord v) {
return CollectionUtils.toList(outgoingEdgeIterable(v));
}
public List<SemanticGraphEdge> incomingEdgeList(IndexedWord v) {
return CollectionUtils.toList(incomingEdgeIterable(v));
}
public boolean isEmpty() {
return graph.isEmpty();
}
/**
* Searches up to 2 levels to determine how far ancestor is from child (i.e.,
* returns 1 if "ancestor" is a parent, or 2 if ancestor is a grandparent.
*
* @param child
* candidate child
* @param ancestor
* candidate ancestor
* @return the number of generations between "child" and "ancestor" (1 is an
* immediate parent), or -1 if there is no relationship found.
*/
public int isAncestor(IndexedWord child, IndexedWord ancestor) {
Set<IndexedWord> parents = this.getParents(child);
if (parents.contains(ancestor)) {
return 1;
}
for (IndexedWord parent : parents) {
Set<IndexedWord> grandparents = this.getParents(parent);
if (grandparents.contains(ancestor)) {
return 2;
}
}
return -1;
}
/**
* Return the maximum distance to a least common ancestor. We only search as
* high as grandparents. We return -1 if no common parent or grandparent is
* found.
*
* @return The maximum distance to a least common ancestor.
*/
public int commonAncestor(IndexedWord v1, IndexedWord v2) {
if (v1.equals(v2)) {
return 0;
}
Set<IndexedWord> v1Parents = this.getParents(v1);
Set<IndexedWord> v2Parents = this.getParents(v2);
Set<IndexedWord> v1GrandParents = wordMapFactory.newSet();
Set<IndexedWord> v2GrandParents = wordMapFactory.newSet();
if (v1Parents.contains(v2) || v2Parents.contains(v1)) {
return 1;
}
// does v1 have any parents that are v2's parents?
for (IndexedWord v1Parent : v1Parents) {
if (v2Parents.contains(v1Parent)) {
return 1;
}
v1GrandParents.addAll(this.getParents(v1Parent));
}
// build v2 grandparents
for (IndexedWord v2Parent : v2Parents) {
v2GrandParents.addAll(this.getParentList(v2Parent));
}
if (v1GrandParents.contains(v2) || v2GrandParents.contains(v1)) {
return 2;
}
// Are any of v1's parents a grandparent of v2?
for (IndexedWord v2GrandParent : v2GrandParents) {
if (v1Parents.contains(v2GrandParent)) {
return 2;
}
}
// Are any of v2's parents a grandparent of v1?
for (IndexedWord v1GrandParent : v1GrandParents) {
if (v2Parents.contains(v1GrandParent)) {
return 2;
}
}
for (IndexedWord v2GrandParent : v2GrandParents) {
if (v1GrandParents.contains(v2GrandParent)) {
return 2;
}
}
return -1;
}
/**
* Returns the least common ancestor. We only search as high as grandparents.
* We return null if no common parent or grandparent is found. Any of the
* input words can also be the answer if one is the parent or grandparent of
* other, or if the input words are the same.
*
* @return The least common ancestor.
*/
public IndexedWord getCommonAncestor(IndexedWord v1, IndexedWord v2) {
if (v1.equals(v2)) {
return v1;
}
if (this.isAncestor(v1, v2) >= 1) {
return v2;
}
if (this.isAncestor(v2, v1) >= 1) {
return v1;
}
Set<IndexedWord> v1Parents = this.getParents(v1);
Set<IndexedWord> v2Parents = this.getParents(v2);
Set<IndexedWord> v1GrandParents = wordMapFactory.newSet();
Set<IndexedWord> v2GrandParents = wordMapFactory.newSet();
// does v1 have any parents that are v2's parents?
for (IndexedWord v1Parent : v1Parents) {
if (v2Parents.contains(v1Parent)) {
return v1Parent;
}
v1GrandParents.addAll(this.getParents(v1Parent));
}
// does v1 have any grandparents that are v2's parents?
for (IndexedWord v1GrandParent : v1GrandParents) {
if (v2Parents.contains(v1GrandParent)) {
return v1GrandParent;
}
}
// build v2 grandparents
for (IndexedWord v2Parent : v2Parents) {
v2GrandParents.addAll(this.getParents(v2Parent));
}
// does v1 have any parents or grandparents that are v2's grandparents?
for (IndexedWord v2GrandParent : v2GrandParents) {
if (v1Parents.contains(v2GrandParent)) {
return v2GrandParent;
}
if (v1GrandParents.contains(v2GrandParent)) {
return v2GrandParent;
}
}
return null;
}
// todo [cdm 2013]: Completely RTE-specific methods like this one should be used to a static class of helper methods under RTE
// If "det" is true, the search for a child is restricted to the "determiner"
// grammatical relation.
public boolean matchPatternToVertex(String pattern, IndexedWord vertex, boolean det) {
if (!containsVertex(vertex)) {
throw new IllegalArgumentException();
}
String pat = pattern.replaceAll("<", ",<");
pat = pat.replaceAll(">", ",>");
String[] nodePath = pat.split(",");
for (String s : nodePath) {
if (s.equals("")) {
continue;
}
String word = s.substring(1);
char dir = s.charAt(0);
if (dir == '<') {
// look for a matching parent
boolean match = false;
for (IndexedWord parent : getParents(vertex)) {
String lemma = parent.get(CoreAnnotations.LemmaAnnotation.class);
if (lemma.equals(word)) {
match = true;
break;
}
}
if (!match) {
return false;
}
} else if (dir == '>') {
if (det) {
// look for a matching child with "det" relation
Set<IndexedWord> children = wordMapFactory.newSet();
children.addAll(getChildrenWithReln(vertex, EnglishGrammaticalRelations.DETERMINER));
children.addAll(getChildrenWithReln(vertex, EnglishGrammaticalRelations.PREDETERMINER));
boolean match = false;
for (IndexedWord child : children) {
String lemma = child.get(CoreAnnotations.LemmaAnnotation.class);
if (lemma.equals("")) {
lemma = child.word().toLowerCase();
}
if (lemma.equals(word)) {
match = true;
break;
}
}
if (!match) {
return false;
}
} else {// take any relation, except "det"
List<Pair<GrammaticalRelation, IndexedWord>> children = childPairs(vertex);
boolean match = false;
for (Pair<GrammaticalRelation, IndexedWord> pair : children) {
if (pair.first().toString().equals("det"))
continue;
IndexedWord child = pair.second();
String lemma = child.get(CoreAnnotations.LemmaAnnotation.class);
if (lemma.equals("")) {
lemma = child.word().toLowerCase();
}
if (lemma.equals(word)) {
match = true;
break;
}
}
if (!match) {
return false;
}
}
} else {
throw new RuntimeException("Warning: bad pattern \"%s\"\n" + pattern);
}
}
return true;
}
// todo [cdm 2013]: Completely RTE-specific methods like this one should be used to a static class of helper methods under RTE
public boolean matchPatternToVertex(String pattern, IndexedWord vertex) {
if (!containsVertex(vertex)) {
throw new IllegalArgumentException();
}
String pat = pattern.replaceAll("<", ",<");
pat = pat.replaceAll(">", ",>");
String[] nodePath = pat.split(",");
for (String s : nodePath) {
if (s.equals("")) {
continue;
}
String word = s.substring(1);
char dir = s.charAt(0);
if (dir == '<') {
// look for a matching parent
boolean match = false;
for (IndexedWord parent : getParents(vertex)) {
String lemma = parent.get(CoreAnnotations.LemmaAnnotation.class);
if (lemma.equals(word)) {
match = true;
break;
}
}
if (!match) {
return false;
}
} else if (dir == '>') {
// look for a matching child
boolean match = false;
for (IndexedWord child : getChildren(vertex)) {
String lemma = child.get(CoreAnnotations.LemmaAnnotation.class);
if (lemma == null || lemma.equals("")) {
lemma = child.word().toLowerCase();
}
if (lemma.equals(word)) {
match = true;
break;
}
}
if (!match) {
return false;
}
} else {
throw new RuntimeException("Warning: bad pattern \"%s\"\n" + pattern);
}
}
return true;
}
public List<IndexedWord> getChildList(IndexedWord vertex) {
if (!containsVertex(vertex)) {
throw new IllegalArgumentException();
}
List<IndexedWord> result = new ArrayList<IndexedWord>(getChildren(vertex));
Collections.sort(result);
return result;
}
public Set<IndexedWord> getChildren(IndexedWord vertex) {
if (!containsVertex(vertex)) {
throw new IllegalArgumentException();
}
return graph.getChildren(vertex);
}
public boolean hasChildren(IndexedWord vertex) {
return outgoingEdgeIterator(vertex).hasNext();
}
public List<SemanticGraphEdge> getIncomingEdgesSorted(IndexedWord vertex) {
List<SemanticGraphEdge> edges = incomingEdgeList(vertex);
Collections.sort(edges);
return edges;
}
public List<SemanticGraphEdge> getOutEdgesSorted(IndexedWord vertex) {
List<SemanticGraphEdge> edges = outgoingEdgeList(vertex);
Collections.sort(edges);
return edges;
}
public List<IndexedWord> getParentList(IndexedWord vertex) {
if (!containsVertex(vertex)) {
throw new IllegalArgumentException();
}
List<IndexedWord> result = new ArrayList<IndexedWord>(getParents(vertex));
Collections.sort(result);
return result;
}
public Set<IndexedWord> getParents(IndexedWord vertex) {
if (!containsVertex(vertex)) {
throw new IllegalArgumentException();
}
return graph.getParents(vertex);
}
/**
* Method for getting the siblings of a particular node. Siblings are the
* other children of your parent, where parent is determined as the parent
* returned by getParent
*
* @return collection of sibling nodes (does not include vertex)
* the collection is empty if your parent is null
*/
public Collection<IndexedWord> getSiblings(IndexedWord vertex) {
IndexedWord parent = this.getParent(vertex);
if (parent != null) {
Set<IndexedWord> result = wordMapFactory.newSet();
result.addAll(this.getChildren(parent));
result.remove(vertex);//remove this vertex - you're not your own sibling
return result;
} else {
return Collections.emptySet();
}
}
/**
* Helper function for the public function with the same name.
* <br>
* Builds up the list backwards.
*/
private List<IndexedWord> getPathToRoot(IndexedWord vertex, List<IndexedWord> used) {
used.add(vertex);
// TODO: Apparently the order of the nodes in the path to the root
// makes a difference for the RTE system. Look into this some more
List<IndexedWord> parents = getParentList(vertex);
// Set<IndexedWord> parents = wordMapFactory.newSet();
// parents.addAll(getParents(vertex));
parents.removeAll(used);
if (roots.contains(vertex) || (parents.isEmpty())) {
used.remove(used.size() - 1);
if (roots.contains(vertex))
return Generics.newArrayList();
else
return null; // no path found
}
for (IndexedWord parent : parents) {
List<IndexedWord> path = getPathToRoot(parent, used);
if (path != null) {
path.add(parent);
used.remove(used.size() - 1);
return path;
}
}
used.remove(used.size() - 1);
return null;
}
/**
* Find the path from the given node to a root. The path does not include the
* given node. Returns an empty list if vertex is a root. Returns null if a
* root is inaccessible (should never happen).
*/
public List<IndexedWord> getPathToRoot(IndexedWord vertex) {
List<IndexedWord> path = getPathToRoot(vertex, Generics.<IndexedWord>newArrayList());
if (path != null) Collections.reverse(path);
return path;
}
/**
* Return the real syntactic parent of vertex.
*/
public IndexedWord getParent(IndexedWord vertex) {
List<IndexedWord> path = getPathToRoot(vertex);
if (path != null && path.size() > 0)
return path.get(0);
else
return null;
}
/**
* Returns the <em>first</em> {@link edu.stanford.nlp.ling.IndexedWord
* IndexedWord} in this {@code SemanticGraph} having the given integer index,
* or throws {@code IllegalArgumentException} if no such node is found.
*/
public IndexedWord getNodeByIndex(int index) throws IllegalArgumentException {
IndexedWord node = getNodeByIndexSafe(index);
if (node == null)
throw new IllegalArgumentException("No SemanticGraph vertex with index " + index);
else
return node;
}
/**
* Same as above, but returns {@code null} if the index does not exist
* (instead of throwing an exception).
*/
public IndexedWord getNodeByIndexSafe(int index) {
for (IndexedWord vertex : vertexSet()) {
if (vertex.index() == index) {
return vertex;
}
}
return null;
}
/**
* Returns the <i>first</i> {@link edu.stanford.nlp.ling.IndexedWord
* IndexedWord} in this <code>SemanticGraph</code> having the given word or
* regex, or return null if no such found.
*/
public IndexedWord getNodeByWordPattern(String pattern) {
Pattern p = Pattern.compile(pattern);
for (IndexedWord vertex : vertexSet()) {
String w = vertex.word();
if ((w == null && pattern == null) || w != null && p.matcher(w).matches()) {
return vertex;
}
}
return null;
}
/**
* Returns all nodes of type {@link edu.stanford.nlp.ling.IndexedWord
* IndexedWord} in this <code>SemanticGraph</code> having the given word or
* regex, or returns empty list if no such found.
*/
public List<IndexedWord> getAllNodesByWordPattern(String pattern) {
Pattern p = Pattern.compile(pattern);
List<IndexedWord> nodes = new ArrayList<IndexedWord>();
for (IndexedWord vertex : vertexSet()) {
String w = vertex.word();
if ((w == null && pattern == null) || w != null && p.matcher(w).matches()) {
nodes.add(vertex);
}
}
return nodes;
}
/**
* Returns the set of descendants governed by this node in the graph.
*
*/
public Set<IndexedWord> descendants(IndexedWord vertex) {
if (!containsVertex(vertex)) {
throw new IllegalArgumentException();
}
// Do a depth first search
Set<IndexedWord> descendantSet = wordMapFactory.newSet();
descendantsHelper(vertex, descendantSet);
return descendantSet;
}
private void descendantsHelper(IndexedWord curr, Set<IndexedWord> descendantSet) {
if (descendantSet.contains(curr)) {
return;
}
descendantSet.add(curr);
for (IndexedWord child : getChildren(curr)) {
descendantsHelper(child, descendantSet);
}
}
/**
* Returns a list of pairs of a relation name and the child
* IndexedFeatureLabel that bears that relation.
*/
public List<Pair<GrammaticalRelation, IndexedWord>> childPairs(IndexedWord vertex) {
if (!containsVertex(vertex)) {
throw new IllegalArgumentException();
}
List<Pair<GrammaticalRelation, IndexedWord>> childPairs =
Generics.newArrayList();
for (SemanticGraphEdge e : outgoingEdgeIterable(vertex)) {
childPairs.add(new Pair<GrammaticalRelation, IndexedWord>(e.getRelation(), e.getTarget()));
}
return childPairs;
}
/**
* Returns a list of pairs of a relation name and the parent
* IndexedFeatureLabel to which we bear that relation.
*/
public List<Pair<GrammaticalRelation, IndexedWord>> parentPairs(IndexedWord vertex) {
if (!containsVertex(vertex)) {
throw new IllegalArgumentException();
}
List<Pair<GrammaticalRelation, IndexedWord>> parentPairs = Generics.newArrayList();
for (SemanticGraphEdge e : incomingEdgeIterable(vertex)) {
parentPairs.add(new Pair<GrammaticalRelation, IndexedWord>(e.getRelation(), e.getSource()));
}
return parentPairs;
}
/**
* Returns a set of relations which this node has with its parents.
*
* @return The set of relations which this node has with its parents.
*/
public Set<GrammaticalRelation> relns(IndexedWord vertex) {
if (!containsVertex(vertex)) {
throw new IllegalArgumentException();
}
Set<GrammaticalRelation> relns = Generics.newHashSet();
List<Pair<GrammaticalRelation, IndexedWord>> pairs = parentPairs(vertex);
for (Pair<GrammaticalRelation, IndexedWord> p : pairs) {
relns.add(p.first());
}
return relns;
}
/**
* Returns the relation that node a has with node b.
* <br>
* Note: there may be multiple arcs between <code>a</code> and
* <code>b</code>, and this method only returns one relation.
*/
public GrammaticalRelation reln(IndexedWord a, IndexedWord b) {
if (!containsVertex(a)) {
throw new IllegalArgumentException();
}
List<Pair<GrammaticalRelation, IndexedWord>> pairs = childPairs(a);
for (Pair<GrammaticalRelation, IndexedWord> p : pairs)
if (p.second().equals(b))
return p.first();
return null;
}
/**
* Returns a list of relations which this node has with its children.
*/
public Set<GrammaticalRelation> childRelns(IndexedWord vertex) {
if (!containsVertex(vertex)) {
throw new IllegalArgumentException();
}
Set<GrammaticalRelation> relns = Generics.newHashSet();
List<Pair<GrammaticalRelation, IndexedWord>> pairs = childPairs(vertex);
for (Pair<GrammaticalRelation, IndexedWord> p : pairs) {
relns.add(p.first());
}
return relns;
}
public Collection<IndexedWord> getRoots() {
return roots;
}
/**
* Initially looks for nodes which have no incoming arcs. If there are any, it
* returns a list of them. If not, it looks for nodes from which every other
* node is reachable. If there are any, it returns a list of them. Otherwise,
* it returns an empty list.
*
* @return A list of root nodes or an empty list.
*/
private List<IndexedWord> getVerticesWithoutParents() {
List<IndexedWord> result = new ArrayList<IndexedWord>();
for (IndexedWord v : vertexSet()) {
int inDegree = inDegree(v);
if (inDegree == 0) {
result.add(v);
}
}
Collections.sort(result);
return result;
}
/** Returns the (first) root of this SemanticGraph. */
public IndexedWord getFirstRoot() {
if (roots.isEmpty())
throw new RuntimeException("No roots in graph:\n" + this
+ "\nFind where this graph was created and make sure you're adding roots.");
return roots.iterator().next();
}
public void addRoot(IndexedWord root) {
addVertex(root);
roots.add(root);
}
/**
* This method should not be used if possible. TODO: delete it
*
* Recomputes the roots, based of actual candidates. This is done to
* ensure a rooted tree after a sequence of edits. If the none of the vertices
* can act as a root (due to a cycle), keep old rootset, retaining only the
* existing vertices on that list.
*
* TODO: this cannot deal with "Hamburg is a city which everyone likes", as
* the intended root node,'Hamburg, is also the dobj of the relative clause. A
* possible solution would be to create edgeset routines that allow filtering
* over a predicate, and specifically filter out dobj relations for choosing
* next best candidate. This could also be useful for dealing with
* non-syntactic arcs in the future. TODO: There is also the possibility the
* roots could be empty at the end, and will need to be resolved. TODO:
* determine if this is a reasonably correct solution.
*/
public void resetRoots() {
Collection<IndexedWord> newRoots = getVerticesWithoutParents();
if (newRoots.size() > 0) {
roots.clear();
roots.addAll(newRoots);
return;
}
/*
* else { Collection<IndexedWord> oldRoots = new
* ArrayList<IndexedWord>(roots); for (IndexedWord oldRoot : oldRoots) { if
* (!containsVertex(oldRoot)) removeVertex(oldRoot); } }
*/
// If no apparent root candidates are available, likely due to loop back
// edges (rcmod), find the node that dominates the most nodes, and let
// that be the new root. Note this implementation epitomizes K.I.S.S., and
// is brain dead and non-optimal, and will require further work.
TwoDimensionalCounter<IndexedWord, IndexedWord> nodeDists = TwoDimensionalCounter.identityHashMapCounter();
for (IndexedWord node1 : vertexSet()) {
for (IndexedWord node2 : vertexSet()) {
// want directed paths only
List<SemanticGraphEdge> path = getShortestDirectedPathEdges(node1, node2);
if (path != null) {
int dist = path.size();
nodeDists.setCount(node1, node2, dist);
}
}
}
// K.I.S.S. alg: just sum up and see who's on top, values don't have much
// meaning outside of determining dominance.
ClassicCounter<IndexedWord> dominatedEdgeCount = ClassicCounter.identityHashMapCounter();
for (IndexedWord outer : vertexSet()) {
for (IndexedWord inner : vertexSet()) {
dominatedEdgeCount.incrementCount(outer, nodeDists.getCount(outer, inner));
}
}
IndexedWord winner = Counters.argmax(dominatedEdgeCount);
// TODO: account for multiply rooted graphs later
setRoot(winner);
}
public void setRoot(IndexedWord word) {
roots.clear();
roots.add(word);
}
public void setRoots(Collection<IndexedWord> words) {
roots.clear();
roots.addAll(words);
}
/**
*
* @return A sorted list of the vertices
* @throws IllegalStateException if this graph is not a DAG
*/
public List<IndexedWord> topologicalSort() {
List<IndexedWord> result = Generics.newArrayList();
Set<IndexedWord> temporary = wordMapFactory.newSet();
Set<IndexedWord> permanent = wordMapFactory.newSet();
for (IndexedWord vertex : vertexSet()) {
if (!temporary.contains(vertex)) {
topologicalSortHelper(vertex, temporary, permanent, result);
}
}
Collections.reverse(result);
return result;
}
private void topologicalSortHelper(IndexedWord vertex, Set<IndexedWord> temporary, Set<IndexedWord> permanent, List<IndexedWord> result) {
temporary.add(vertex);
for (SemanticGraphEdge edge : outgoingEdgeIterable(vertex)) {
IndexedWord target = edge.getTarget();
if (permanent.contains(target)) {
continue;
}
if (temporary.contains(target)) {
throw new IllegalStateException("This graph has cycles. Topological sort not possible: " + this.toString());
}
topologicalSortHelper(target, temporary, permanent, result);
}
result.add(vertex);
permanent.add(vertex);
}
/**
* Does the given <code>vertex</code> have at least one child with the given {@code reln} and the lemma <code>childLemma</code>?
*/
public boolean hasChild(IndexedWord vertex, GrammaticalRelation reln, String childLemma) {
if (!containsVertex(vertex)) {
throw new IllegalArgumentException();
}
for (SemanticGraphEdge edge : outgoingEdgeIterable(vertex)) {
if (edge.getRelation().equals(reln)) {
if (edge.getTarget().get(CoreAnnotations.LemmaAnnotation.class).equals(childLemma)) {
return true;
}
}
}
return false;
}
/**
* Does the given <code>vertex</code> have at least one child with the given {@code reln}?
*/
public boolean hasChildWithReln(IndexedWord vertex, GrammaticalRelation reln) {
if (!containsVertex(vertex)) {
throw new IllegalArgumentException();
}
for (SemanticGraphEdge edge : outgoingEdgeIterable(vertex)) {
if (edge.getRelation().equals(reln)) {
return true;
}
}
return false;
}
/**
* Returns true if vertex has an incoming relation reln
*
* @param vertex A node in this graph
* @param reln The relation we want to check
* @return true if vertex has an incoming relation reln
*/
public boolean hasParentWithReln(IndexedWord vertex, GrammaticalRelation reln) {
if (!containsVertex(vertex)) {
throw new IllegalArgumentException();
}
for (SemanticGraphEdge edge : incomingEdgeIterable(vertex)) {
if (edge.getRelation().equals(reln)) {
return true;
}
}
return false;
}
/**
* Returns the first IndexedFeatureLabel bearing a certain grammatical
* relation, or null if none.
*/
public IndexedWord getChildWithReln(IndexedWord vertex, GrammaticalRelation reln) {
if (vertex.equals(IndexedWord.NO_WORD))
return null;
if (!containsVertex(vertex))
throw new IllegalArgumentException();
for (SemanticGraphEdge edge : outgoingEdgeIterable(vertex)) {
if (edge.getRelation().equals(reln)) {
return edge.getTarget();
}
}
return null;
}
/**
* Returns a set of all parents bearing a certain grammatical relation, or an
* empty set if none.
*/
public Set<IndexedWord> getParentsWithReln(IndexedWord vertex, GrammaticalRelation reln) {
if (vertex.equals(IndexedWord.NO_WORD))
return Collections.emptySet();
if (!containsVertex(vertex))
throw new IllegalArgumentException();
Set<IndexedWord> parentList = wordMapFactory.newSet();
for (SemanticGraphEdge edge : incomingEdgeIterable(vertex)) {
if (edge.getRelation().equals(reln)) {
parentList.add(edge.getSource());
}
}
return parentList;
}
/**
* Returns a set of all children bearing a certain grammatical relation, or
* an empty set if none.
*/
public Set<IndexedWord> getChildrenWithReln(IndexedWord vertex, GrammaticalRelation reln) {
if (vertex.equals(IndexedWord.NO_WORD))
return Collections.emptySet();
if (!containsVertex(vertex))
throw new IllegalArgumentException();
Set<IndexedWord> childList = wordMapFactory.newSet();
for (SemanticGraphEdge edge : outgoingEdgeIterable(vertex)) {
if (edge.getRelation().equals(reln)) {
childList.add(edge.getTarget());
}
}
return childList;
}
/**
* Returns a set of all children bearing one of a set of grammatical
* relations, or an empty set if none.
*
* NOTE: this will only work for relation types that are classes. Those that
* are collapsed are currently not handled correctly since they are identified
* by strings.
*/
public Set<IndexedWord> getChildrenWithRelns(IndexedWord vertex, Collection<GrammaticalRelation> relns) {
if (vertex.equals(IndexedWord.NO_WORD))
return Collections.emptySet();
if (!containsVertex(vertex)) {
throw new IllegalArgumentException();
}
Set<IndexedWord> childList = wordMapFactory.newSet();
for (SemanticGraphEdge edge : outgoingEdgeIterable(vertex)) {
if (relns.contains(edge.getRelation())) {
childList.add(edge.getTarget());
}
}
return childList;
}
/**
* Given a governor, dependent, and the relation between them, returns the
* SemanticGraphEdge object of that arc if it exists, otherwise returns null.
*/
public SemanticGraphEdge getEdge(IndexedWord gov, IndexedWord dep, GrammaticalRelation reln) {
Collection<SemanticGraphEdge> edges = getAllEdges(gov, dep);
if (edges != null) {
for (SemanticGraphEdge edge : edges) {
if (!edge.getSource().equals(gov))
continue;
if ((edge.getRelation().equals(reln))) {
return edge;
}
}
}
return null;
}
public boolean isNegatedVertex(IndexedWord vertex) {
if (vertex == IndexedWord.NO_WORD) {
return false;
}
if (!containsVertex(vertex)) {
throw new IllegalArgumentException("Vertex " + vertex + " not in graph " + this);
}
return (hasChildWithReln(vertex, EnglishGrammaticalRelations.NEGATION_MODIFIER) ||
hasChild(vertex, GrammaticalRelation.DEPENDENT, "nor"));
}
private boolean isNegatedVerb(IndexedWord vertex) {
if (!containsVertex(vertex)) {
throw new IllegalArgumentException();
}
return (vertex.tag().startsWith("VB") && isNegatedVertex(vertex));
}
/**
* Check if the vertex is in a "conditional" context. Right now it's only
* returning true if vertex has an "if" marker attached to it, i.e. the vertex
* is in a clause headed by "if".
*/
public boolean isInConditionalContext(IndexedWord vertex) {
for (IndexedWord child : getChildrenWithReln(vertex, EnglishGrammaticalRelations.MARKER)) {
if (child.word().equalsIgnoreCase("if")) {
return true;
}
}
return false;
}
// Obsolete; use functions in rte.feat.NegPolarityFeaturizers instead
public boolean attachedNegatedVerb(IndexedWord vertex) {
for (IndexedWord parent : getParents(vertex)) {
if (isNegatedVerb(parent)) {
return true;
}
}
return false;
}
/** Returns true iff this vertex stands in the "aux" relation to (any of)
* its parent(s).
*/
public boolean isAuxiliaryVerb(IndexedWord vertex) {
Set<GrammaticalRelation> relns = relns(vertex);
if (relns.isEmpty())
return false;
boolean result = relns.contains(EnglishGrammaticalRelations.AUX_MODIFIER)
|| relns.contains(EnglishGrammaticalRelations.AUX_PASSIVE_MODIFIER);
// System.err.println("I say " + vertex + (result ? " is" : " is not") +
// " an aux");
return result;
}
public Set<IndexedWord> getLeafVertices() {
Set<IndexedWord> result = wordMapFactory.newSet();
for (IndexedWord v : vertexSet()) {
if (outDegree(v) == 0) {
result.add(v);
}
}
return result;
}
public int size() {
return this.vertexSet().size();
}
/**
* @return true if the graph contains no cycles.
*/
public boolean isDag() {
Set<IndexedWord> unused = wordMapFactory.newSet();
unused.addAll(vertexSet());
while (!unused.isEmpty()) {
IndexedWord arbitrary = unused.iterator().next();
boolean result = isDagHelper(arbitrary, unused, wordMapFactory.newSet());
if (result) {
return false;
}
}
return true;
}
private boolean isDagHelper(IndexedWord current, Set<IndexedWord> unused, Set<IndexedWord> trail) {
if (trail.contains(current)) {
return true;
} else if (!unused.contains(current)) {
return false;
}
unused.remove(current);
trail.add(current);
for (IndexedWord child : getChildren(current)) {
boolean result = isDagHelper(child, unused, trail);
if (result) {
return true;
}
}
trail.remove(current);
return false;
}
// ============================================================================
// String display
// ============================================================================
/**
* Recursive depth first traversal. Returns a structured representation of the
* dependency graph.
*
* Example:
* <p/>
*
* <pre>
* /-> need-3 (root)
* -> We-0 (nsubj)
* -> do-1 (aux)
* -> n't-2 (neg)
* -> badges-6 (dobj)
* -> no-4 (det)
* -> stinking-5 (amod)
* </pre>
*/
@Override
public String toString() {
return toString(CoreLabel.OutputFormat.VALUE_TAG);
}
public String toString(CoreLabel.OutputFormat wordFormat) {
Collection<IndexedWord> rootNodes = getRoots();
if (rootNodes.isEmpty()) {
// Shouldn't happen, but return something!
return toString(OutputFormat.READABLE);
}
StringBuilder sb = new StringBuilder();
Set<IndexedWord> used = wordMapFactory.newSet();
for (IndexedWord root : rootNodes) {
sb.append("-> ").append(root.toString(wordFormat)).append(" (root)\n");
recToString(root, wordFormat, sb, 1, used);
}
Set<IndexedWord> nodes = wordMapFactory.newSet();
nodes.addAll(vertexSet());
nodes.removeAll(used);
while (!nodes.isEmpty()) {
IndexedWord node = nodes.iterator().next();
sb.append(node.toString(wordFormat)).append("\n");
recToString(node, wordFormat, sb, 1, used);
nodes.removeAll(used);
}
return sb.toString();
}
// helper for toString()
private void recToString(IndexedWord curr, CoreLabel.OutputFormat wordFormat, StringBuilder sb, int offset, Set<IndexedWord> used) {
used.add(curr);
List<SemanticGraphEdge> edges = outgoingEdgeList(curr);
Collections.sort(edges);
for (SemanticGraphEdge edge : edges) {
IndexedWord target = edge.getTarget();
sb.append(space(2 * offset)).append("-> ").append(target.toString(wordFormat)).append(" (").append(edge.getRelation()).append(")\n");
if (!used.contains(target)) { // recurse
recToString(target, wordFormat, sb, offset + 1, used);
}
}
}
private static String space(int width) {
StringBuilder b = new StringBuilder();
for (int i = 0; i < width; i++) {
b.append(' ');
}
return b.toString();
}
public String toRecoveredSentenceString() {
StringBuilder sb = new StringBuilder();
boolean pastFirst = false;
for (IndexedWord word : vertexListSorted()) {
if (pastFirst) {
sb.append(' ');
}
pastFirst = true;
sb.append(word.word());
}
return sb.toString();
}
public String toRecoveredSentenceStringWithIndexMarking() {
StringBuilder sb = new StringBuilder();
boolean pastFirst = false;
int index = 0;
for (IndexedWord word : vertexListSorted()) {
if (pastFirst) {
sb.append(' ');
}
pastFirst = true;
sb.append(word.word());
sb.append("(");
sb.append(index++);
sb.append(")");
}
return sb.toString();
}
/**
* Similar to <code>toRecoveredString</code>, but will fill in words that were
* collapsed into relations (i.e. prep_for --> 'for'). Mostly to deal with
* collapsed dependency trees.
*
* TODO: consider merging with toRecoveredString() NOTE: assumptions currently
* are for English. NOTE: currently takes immediate successors to current word
* and expands them. This assumption may not be valid for other conditions or
* languages?
*/
public String toEnUncollapsedSentenceString() {
List<IndexedWord> uncompressedList = Generics.newLinkedList(vertexSet());
List<Pair<String, IndexedWord>> specifics = Generics.newArrayList();
// Collect the specific relations and the governed nodes, and then process
// them one by one,
// to avoid concurrent modification exceptions.
for (IndexedWord word : vertexSet()) {
for (SemanticGraphEdge edge : getIncomingEdgesSorted(word)) {
GrammaticalRelation relation = edge.getRelation();
// Extract the specific: need to account for possibility that relation
// can
// be a String or GrammaticalRelation (how did it happen this way?)
String specific = relation.getSpecific();
if (specific == null) {
if (edge.getRelation().equals(EnglishGrammaticalRelations.AGENT)) {
specific = "by";
}
}
// Insert the specific at the leftmost token that is not governed by
// this node.
if (specific != null) {
Pair<String, IndexedWord> specPair = new Pair<String, IndexedWord>(specific, word);
specifics.add(specPair);
}
}
}
for (Pair<String, IndexedWord> tuple : specifics) {
insertSpecificIntoList(tuple.first(), tuple.second(), uncompressedList);
}
return StringUtils.join(uncompressedList, " ");
}
/**
* Inserts the given specific portion of an uncollapsed relation back into the
* targetList
*
* @param specific
* Specific relation to put in.
* @param relnTgtNode
* Node governed by the uncollapsed relation
* @param tgtList
* Target List of words
*/
private void insertSpecificIntoList(String specific, IndexedWord relnTgtNode, List<IndexedWord> tgtList) {
int currIndex = tgtList.indexOf(relnTgtNode);
Set<IndexedWord> descendants = descendants(relnTgtNode);
IndexedWord specificNode = new IndexedWord();
specificNode.set(CoreAnnotations.LemmaAnnotation.class, specific);
specificNode.set(CoreAnnotations.TextAnnotation.class, specific);
specificNode.set(CoreAnnotations.OriginalTextAnnotation.class, specific);
while ((currIndex >= 1) && descendants.contains(tgtList.get(currIndex - 1))) {
currIndex--;
}
tgtList.add(currIndex, specificNode);
}
public enum OutputFormat {
LIST, XML, READABLE, RECURSIVE
}
/**
* Returns a String representation of the result of this set of typed
* dependencies in a user-specified format. Currently, four formats are
* supported ({@link OutputFormat}):
* <dl>
* <dt>list</dt>
* <dd>(Default.) Formats the dependencies as logical relations, as
* exemplified by the following:
*
* <pre>
* nsubj(died-1, Sam-0)
* tmod(died-1, today-2)
* </pre>
*
* </dd>
* <dt>readable</dt>
* <dd>Formats the dependencies as a table with columns <code>dependent</code>, <code>relation</code>, and <code>governor</code>, as exemplified by the
* following:
*
* <pre>
* Sam-0 nsubj died-1
* today-2 tmod died-1
* </pre>
*
* </dd>
* <dt>xml</dt>
* <dd>Formats the dependencies as XML, as exemplified by the following:
*
* <pre>
* <dependencies>
* <dep type="nsubj">
* <governor idx="1">died</governor>
* <dependent idx="0">Sam</dependent>
* </dep>
* <dep type="tmod">
* <governor idx="1">died</governor>
* <dependent idx="2">today</dependent>
* </dep>
* </dependencies>
* </pre>
* </dd>
*
* <dt>recursive</dt>
* <dd>
* The default output for {@link #toString()}
* </dd>
*
* </dl>
*
* @param format
* a <code>String</code> specifying the desired format
* @return a <code>String</code> representation of the typed dependencies in
* this <code>GrammaticalStructure</code>
*/
public String toString(OutputFormat format) {
switch(format) {
case XML:
return toXMLString();
case READABLE:
return toReadableString();
case LIST:
return toList();
case RECURSIVE:
return toString();
default:
throw new IllegalArgumentException("Unsupported format " + format);
}
}
/**
* Returns a String representation of this graph as a list of typed
* dependencies, as exemplified by the following:
*
* <pre>
* nsubj(died-6, Sam-3)
* tmod(died-6, today-9)
* </pre>
*
* @return a <code>String</code> representation of this set of typed
* dependencies
*/
public String toList() {
StringBuilder buf = new StringBuilder();
for (IndexedWord root : getRoots()) {
buf.append("root(ROOT-0, ");
buf.append(toDepStyle(root)).append(")\n");
}
for (SemanticGraphEdge edge : this.edgeListSorted()) {
buf.append(edge.getRelation().toString()).append("(");
buf.append(toDepStyle(edge.getSource())).append(", ");
buf.append(toDepStyle(edge.getTarget())).append(")\n");
}
return buf.toString();
}
/**
* Similar to toList(), but uses POS tags instead of word and index.
*/
public String toPOSList() {
StringBuilder buf = new StringBuilder();
for (SemanticGraphEdge edge : this.edgeListSorted()) {
buf.append(edge.getRelation().toString()).append("(");
buf.append(toPOSStyle(edge.getSource())).append(",");
buf.append(toPOSStyle(edge.getTarget())).append(")\n");
}
return buf.toString();
}
// todo [cdm 2013]: These next two methods should really be toString options on indexed word but are different from all the current ones....
private static String toDepStyle(IndexedWord fl) {
return fl.toString(CoreLabel.OutputFormat.VALUE_INDEX);
}
private static String toPOSStyle(IndexedWord fl) {
return fl.toString(CoreLabel.OutputFormat.VALUE_TAG_INDEX);
}
private String toReadableString() {
StringBuilder buf = new StringBuilder();
buf.append(String.format("%-20s%-20s%-20s%n", "dep", "reln", "gov"));
buf.append(String.format("%-20s%-20s%-20s%n", "---", "----", "---"));
for (IndexedWord root : getRoots()) {
buf.append(String.format("%-20s%-20s%-20s%n", toDepStyle(root), "root", "root"));
}
for (SemanticGraphEdge edge : this.edgeListSorted()) {
buf.append(String.format("%-20s%-20s%-20s%n", toDepStyle(edge.getTarget()), edge.getRelation().toString(),
toDepStyle(edge.getSource())));
}
return buf.toString();
}
private String toXMLString() {
StringBuilder buf = new StringBuilder("<dependencies style=\"typed\">\n");
for (SemanticGraphEdge edge : this.edgeListSorted()) {
String reln = edge.getRelation().toString();
String gov = (edge.getSource()).word();
int govIdx = (edge.getSource()).index();
String dep = (edge.getTarget()).word();
int depIdx = (edge.getTarget()).index();
buf.append(" <dep type=\"").append(reln).append("\">\n");
buf.append(" <governor idx=\"").append(govIdx).append("\">").append(gov).append("</governor>\n");
buf.append(" <dependent idx=\"").append(depIdx).append("\">").append(dep).append("</dependent>\n");
buf.append(" </dep>\n");
}
buf.append("</dependencies>\n");
return buf.toString();
}
public String toCompactString() {
return toCompactString(false);
}
public String toCompactString(boolean showTags) {
StringBuilder sb = new StringBuilder();
Set<IndexedWord> used = wordMapFactory.newSet();
Collection<IndexedWord> roots = getRoots();
if (roots.isEmpty()) {
if (size() == 0) {
return "[EMPTY_SEMANTIC_GRAPH]";
} else {
return "[UNROOTED_SEMANTIC_GRAPH]";
}
// return toString("readable");
}
for (IndexedWord root : roots) {
toCompactStringHelper(root, sb, used, showTags);
}
return sb.toString();
}
private void toCompactStringHelper(IndexedWord node, StringBuilder sb, Set<IndexedWord> used, boolean showTags) {
used.add(node);
try {
boolean isntLeaf = (outDegree(node) > 0);
if (isntLeaf) {
sb.append("[");
}
sb.append(node.word());
if (showTags) {
sb.append("/");
sb.append(node.tag());
}
for (SemanticGraphEdge edge : getOutEdgesSorted(node)) {
IndexedWord target = edge.getTarget();
sb.append(" ").append(edge.getRelation()).append(":");
if (!used.contains(target)) { // avoid infinite loop
toCompactStringHelper(target, sb, used, showTags);
} else {
sb.append(target.word());
if (showTags) {
sb.append("/");
sb.append(target.tag());
}
}
}
if (isntLeaf) {
sb.append("]");
}
} catch (IllegalArgumentException e) {
System.err.println("WHOA! SemanticGraph.toCompactStringHelper() ran into problems at node " + node);
throw new IllegalArgumentException(e);
}
}
/**
* Returns a <code>String</code> representation of this semantic graph,
* formatted by the default semantic graph formatter.
*/
public String toFormattedString() {
return formatter.formatSemanticGraph(this);
}
/**
* Returns a <code>String</code> representation of this semantic graph,
* formatted by the supplied semantic graph formatter.
*/
public String toFormattedString(SemanticGraphFormatter formatter) {
return formatter.formatSemanticGraph(this);
}
/**
* Pretty-prints this semantic graph to <code>System.out</code>, formatted by
* the supplied semantic graph formatter.
*/
public void prettyPrint(SemanticGraphFormatter formatter) {
System.out.println(formatter.formatSemanticGraph(this));
}
/**
* Pretty-prints this semantic graph to <code>System.out</code>, formatted by
* the default semantic graph formatter.
*/
public void prettyPrint() {
System.out.println(formatter.formatSemanticGraph(this));
}
/**
* Returns an unnamed dot format digraph.
* Nodes will be labeled with the word and edges will be labeled
* with the dependency.
*/
public String toDotFormat() {
return toDotFormat("");
}
/**
* Returns a dot format digraph with the given name.
* Nodes will be labeled with the word and edges will be labeled
* with the dependency.
*/
public String toDotFormat(String graphname) {
return toDotFormat(graphname, CoreLabel.OutputFormat.VALUE_TAG_INDEX);
}
public String toDotFormat(String graphname, CoreLabel.OutputFormat indexedWordFormat) {
StringBuilder output = new StringBuilder();
output.append("digraph " + graphname + " {\n");
for (IndexedWord word : graph.getAllVertices()) {
output.append(" N_" + word.index() + " [label=\"" +
word.toString(indexedWordFormat) +
"\"];\n");
}
for (SemanticGraphEdge edge : graph.edgeIterable()) {
output.append(" N_" + edge.getSource().index() +
" -> N_" + edge.getTarget().index() +
" [label=\"" + edge.getRelation() + "\"];\n");
}
output.append("}\n");
return output.toString();
}
public SemanticGraphEdge addEdge(IndexedWord s, IndexedWord d, GrammaticalRelation reln, double weight, boolean isExtra) {
SemanticGraphEdge newEdge = new SemanticGraphEdge(s, d, reln, weight, isExtra);
graph.add(s, d, newEdge);
return newEdge;
}
// =======================================================================
/**
* Tries to parse a String representing a SemanticGraph. Right now it's fairly
* dumb, could be made more sophisticated.
* <p/>
*
* Example: "[ate subj:Bill dobj:[muffins nn:blueberry]]"
* <p/>
*
* This is the same format generated by toCompactString().
*/
public static SemanticGraph valueOf(String s) {
return (new SemanticGraphParsingTask(s)).parse();
}
public SemanticGraph() {
graph = new DirectedMultiGraph<IndexedWord, SemanticGraphEdge>(outerMapFactory, innerMapFactory);
roots = wordMapFactory.newSet();
}
/**
* Returns a new SemanticGraph which is a copy of the supplied SemanticGraph.
* Both the nodes ({@link IndexedWord}s) and the edges (SemanticGraphEdges)
* are copied.
*/
public SemanticGraph(SemanticGraph g) {
this(g, null);
}
/**
* Copies a the current graph, but also sets the mapping from the old to new
* graph.
*/
public SemanticGraph(SemanticGraph g,
Map<IndexedWord, IndexedWord> prevToNewMap) {
graph = new DirectedMultiGraph<IndexedWord, SemanticGraphEdge>(outerMapFactory, innerMapFactory);
if (prevToNewMap == null) {
prevToNewMap = wordMapFactory.newMap();
}
Set<IndexedWord> vertexes = g.vertexSet();
for (IndexedWord vertex : vertexes) {
IndexedWord newVertex = new IndexedWord(vertex);
addVertex(newVertex);
prevToNewMap.put(vertex, newVertex);
}
roots = wordMapFactory.newSet();
for (IndexedWord oldRoot : g.getRoots()) {
roots.add(prevToNewMap.get(oldRoot));
}
for (SemanticGraphEdge edge : g.edgeIterable()) {
IndexedWord newGov = prevToNewMap.get(edge.getGovernor());
IndexedWord newDep = prevToNewMap.get(edge.getDependent());
addEdge(newGov, newDep, edge.getRelation(), edge.getWeight(), edge.isExtra());
}
}
/**
* This is the constructor used by the parser.
*/
public SemanticGraph(Collection<TypedDependency> dependencies) {
graph = new DirectedMultiGraph<IndexedWord, SemanticGraphEdge>(outerMapFactory, innerMapFactory);
roots = wordMapFactory.newSet();
for (TypedDependency d : dependencies) {
IndexedWord gov = d.gov();
IndexedWord dep = d.dep();
GrammaticalRelation reln = d.reln();
if (reln != ROOT) { // the root relation only points to the root: the governor is a fake node that we don't want to add in the graph
// It is unnecessary to call addVertex, since addEdge will
// implicitly add vertices if needed
//addVertex(gov);
//addVertex(dep);
addEdge(gov, dep, reln, Double.NEGATIVE_INFINITY, d.extra());
} else { //it's the root and we add it
addVertex(dep);
roots.add(dep);
}
}
// there used to be an if clause that filtered out the case of empty
// dependencies. However, I could not understand (or replicate) the error
// it alluded to, and it led to empty dependency graphs for very short
// fragments,
// which meant they were ignored by the RTE system. Changed. (pado)
// See also SemanticGraphFactory.makeGraphFromTree().
}
/**
* Returns the nodes in the shortest undirected path between two edges in the
* graph. if source == target, returns a singleton list
*
* @param source
* node
* @param target
* node
* @return nodes along shortest undirected path from source to target, in
* order
*/
public List<IndexedWord> getShortestUndirectedPathNodes(IndexedWord source, IndexedWord target) {
return graph.getShortestPath(source, target, false);
}
public List<SemanticGraphEdge> getShortestUndirectedPathEdges(IndexedWord source, IndexedWord target) {
return graph.getShortestPathEdges(source, target, false);
}
/**
* Returns the shortest directed path between two edges in the graph.
*
* @param source
* node
* @param target
* node
* @return shortest directed path from source to target
*/
public List<IndexedWord> getShortestDirectedPathNodes(IndexedWord source, IndexedWord target) {
return graph.getShortestPath(source, target, true);
}
public List<SemanticGraphEdge> getShortestDirectedPathEdges(IndexedWord source, IndexedWord target) {
return graph.getShortestPathEdges(source, target, true);
}
// ============================================================================
private static final Pattern WORD_AND_INDEX_PATTERN = Pattern.compile("([^-]+)-([0-9]+)");
/**
* This nested class is a helper for valueOf(). It represents the task of
* parsing a specific String representing a SemanticGraph.
*/
private static class SemanticGraphParsingTask extends StringParsingTask<SemanticGraph> {
private SemanticGraph sg;
private Set<Integer> indexesUsed = Generics.newHashSet();
public SemanticGraphParsingTask(String s) {
super(s);
}
@Override
public SemanticGraph parse() {
sg = new SemanticGraph();
try {
readWhiteSpace();
if (!isLeftBracket(peek()))
return null;
readDep(null, null);
return sg;
} catch (ParserException e) {
System.err.println("SemanticGraphParser warning: " + e.getMessage());
return null;
}
}
private void readDep(IndexedWord gov, String reln) {
readWhiteSpace();
if (!isLeftBracket(peek())) { // it's a leaf
String label = readName();
IndexedWord dep = makeVertex(label);
sg.addVertex(dep);
if (gov == null)
sg.roots.add(dep);
sg.addEdge(gov, dep, GrammaticalRelation.valueOf(reln), Double.NEGATIVE_INFINITY, false);
} else {
readLeftBracket();
String label = readName();
IndexedWord dep = makeVertex(label);
sg.addVertex(dep);
if (gov == null)
sg.roots.add(dep);
if (gov != null && reln != null) {
sg.addEdge(gov, dep, GrammaticalRelation.valueOf(reln), Double.NEGATIVE_INFINITY, false);
}
readWhiteSpace();
while (!isRightBracket(peek()) && !isEOF) {
reln = readName();
readColon();
readDep(dep, reln);
readWhiteSpace();
}
readRightBracket();
}
}
private IndexedWord makeVertex(String word) {
Integer index; // initialized below
Pair<String, Integer> wordAndIndex = readWordAndIndex(word);
if (wordAndIndex != null) {
word = wordAndIndex.first();
index = wordAndIndex.second();
} else {
index = getNextFreeIndex();
}
indexesUsed.add(index);
// Note that, despite the use of indexesUsed and getNextFreeIndex(),
// nothing is actually enforcing that no indexes are used twice. This
// could occur if some words in the string representation being parsed
// come with index markers and some do not.
IndexedWord ifl = new IndexedWord(null, 0, index);
// System.err.println("SemanticGraphParsingTask>>> word = " + word);
// System.err.println("SemanticGraphParsingTask>>> index = " + index);
// System.err.println("SemanticGraphParsingTask>>> indexesUsed = " +
// indexesUsed);
String[] wordAndTag = word.split("/");
ifl.set(CoreAnnotations.TextAnnotation.class, wordAndTag[0]);
ifl.set(CoreAnnotations.ValueAnnotation.class, wordAndTag[0]);
if (wordAndTag.length > 1)
ifl.set(CoreAnnotations.PartOfSpeechAnnotation.class, wordAndTag[1]);
return ifl;
}
private static Pair<String, Integer> readWordAndIndex(String word) {
Matcher matcher = WORD_AND_INDEX_PATTERN.matcher(word);
if (!matcher.matches()) {
return null;
} else {
word = matcher.group(1);
Integer index = Integer.valueOf(matcher.group(2));
return new Pair<String, Integer>(word, index);
}
}
private Integer getNextFreeIndex() {
int i = 0;
while (indexesUsed.contains(i))
i++;
return i;
}
private void readLeftBracket() {
// System.out.println("Read left.");
readWhiteSpace();
char ch = read();
if (!isLeftBracket(ch))
throw new ParserException("Expected left paren!");
}
private void readRightBracket() {
// System.out.println("Read right.");
readWhiteSpace();
char ch = read();
if (!isRightBracket(ch))
throw new ParserException("Expected right paren!");
}
private void readColon() {
readWhiteSpace();
if (isColon(peek()))
read();
}
private static boolean isLeftBracket(char ch) {
return ch == '[';
}
private static boolean isRightBracket(char ch) {
return ch == ']';
}
private static boolean isColon(char ch) {
return ch == ':';
}
@Override
protected boolean isPunct(char ch) {
return isLeftBracket(ch) || isRightBracket(ch) || isColon(ch);
}
} // end SemanticGraphParsingTask
// =======================================================================
@Override
public boolean equals(Object o) {
if (o == this) {
return true;
}
if (!(o instanceof SemanticGraph)) {
return false;
}
SemanticGraph g = (SemanticGraph) o;
return graph.equals(g.graph) && roots.equals(g.roots);
}
@Override
public int hashCode() {
return graph.hashCode();
}
/**
* Given a semantic graph, and a target relation, returns a list of all
* relations (edges) matching.
*
*/
public List<SemanticGraphEdge> findAllRelns(GrammaticalRelation tgtRelation) {
ArrayList<SemanticGraphEdge> relns = new ArrayList<SemanticGraphEdge>();
for (SemanticGraphEdge edge : edgeIterable()) {
GrammaticalRelation edgeRelation = edge.getRelation();
if ((edgeRelation != null) && (edgeRelation.equals(tgtRelation))) {
relns.add(edge);
}
}
return relns;
}
/** Returns a list of TypedDependency in the graph.
* This method goes through all SemanticGraphEdge and converts them
* to TypedDependency.
*
* @return A List of TypedDependency in the graph
*/
public Collection<TypedDependency> typedDependencies() {
Collection<TypedDependency> dependencies = new ArrayList<TypedDependency>();
IndexedWord root = null;
for (IndexedWord node : roots) {
if (root == null) {
root = new IndexedWord(node.docID(), node.sentIndex(), 0);
root.setValue("ROOT");
}
TypedDependency dependency = new TypedDependency(ROOT, root, node);
dependencies.add(dependency);
}
for (SemanticGraphEdge e : this.edgeIterable()){
TypedDependency dependency = new TypedDependency(e.getRelation(), e.getGovernor(), e.getDependent());
dependencies.add(dependency);
}
return dependencies;
}
private static final long serialVersionUID = 1L;
}