// Stanford Dependencies - Code for producing and using Stanford dependencies.
// Copyright © 2005-2014 The Board of Trustees of
// The Leland Stanford Junior University. All Rights Reserved.
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
// For more information, bug reports, fixes, contact:
// Christopher Manning
// Dept of Computer Science, Gates 1A
// Stanford CA 94305-9010
// USA
// parser-support@lists.stanford.edu
// http://nlp.stanford.edu/software/stanford-dependencies.shtml
package edu.stanford.nlp.trees;
import edu.stanford.nlp.international.Languages;
import edu.stanford.nlp.trees.international.pennchinese.ChineseGrammaticalRelations;
import edu.stanford.nlp.trees.tregex.TregexMatcher;
import edu.stanford.nlp.trees.tregex.TregexPattern;
import edu.stanford.nlp.trees.tregex.TregexPatternCompiler;
import edu.stanford.nlp.util.ArraySet;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.StringUtils;
import java.io.ObjectStreamException;
import java.io.Serializable;
import java.lang.ref.SoftReference;
import java.util.*;
import java.util.regex.Pattern;
/**
* {@code GrammaticalRelation} is used to define a
* standardized, hierarchical set of grammatical relations,
* together with patterns for identifying them in
* parse trees.<p>
*
* Each <code>GrammaticalRelation</code> has:
* <ul>
* <li>A <code>String</code> short name, which should be a lowercase
* abbreviation of some kind (in the fure mainly Universal Dependency names).</li>
* <li>A <code>String</code> long name, which should be descriptive.</li>
* <li>A parent in the <code>GrammaticalRelation</code> hierarchy.</li>
* <li>A {@link Pattern <code>Pattern</code>} called
* <code>sourcePattern</code> which matches (parent) nodes from which
* this <code>GrammaticalRelation</code> could hold. (Note: this is done
* with the Java regex Pattern <code>matches()</code> predicate. The pattern
* must match the
* whole node name, and <code>^</code> or <code>$</code> aren't needed.
* Tregex constructions like __ do not work. Use ".*" to be applicable
* at all nodes. This prefiltering is used for efficiency.)</li>
* <li>A list of zero or more {@link TregexPattern
* <code>TregexPattern</code>s} called <code>targetPatterns</code>,
* which describe the local tree structure which must hold between
* the source node and a target node for the
* <code>GrammaticalRelation</code> to apply. (Note: {@code tregex}
* regular expressions match with the {@code find()} method, while
* literal string label descriptions that are not regular expressions must
* be {@code equals()}.)</li>
* </ul>
*
* The <code>targetPatterns</code> associated
* with a <code>GrammaticalRelation</code> are designed as follows.
* In order to recognize a grammatical relation X holding between
* nodes A and B in a parse tree, we want to associate with
* <code>GrammaticalRelation</code> X a {@link TregexPattern
* <code>TregexPattern</code>} such that:
* <ul>
* <li>the root of the pattern matches A, and</li>
* <li>the pattern includes a node labeled "target", which matches B.</li>
* </ul>
* For example, for the grammatical relation <code>PREDICATE</code>
* which holds between a clause and its primary verb phrase, we might
* want to use the pattern {@code "S < VP=target"}, in which the
* root will match a clause and the node labeled <code>"target"</code>
* will match the verb phrase.<p>
*
* For a given grammatical relation, the method {@link
* GrammaticalRelation#getRelatedNodes <code>getRelatedNodes()</code>}
* takes a <code>Tree</code> node as an argument and attempts to
* return other nodes which have this grammatical relation to the
* argument node. By default, this method operates as follows: it
* steps through the patterns in the pattern list, trying to match
* each pattern against the argument node, until it finds some
* matches. If a pattern matches, all matching nodes (that is, each
* node which corresponds to node label "target" in some match) are
* returned as a list; otherwise the next pattern is tried.<p>
*
* For some grammatical relations, we need more sophisticated logic to
* identify related nodes. In such cases, {@link
* GrammaticalRelation#getRelatedNodes <code>getRelatedNodes()</code>}
* can be overridden on a per-relation basis using anonymous subclassing.<p>
*
* @see GrammaticalStructure
* @see EnglishGrammaticalStructure
* @see EnglishGrammaticalRelations
* @see edu.stanford.nlp.trees.international.pennchinese.ChineseGrammaticalRelations
*
* @author Bill MacCartney
* @author Galen Andrew (refactoring English-specific stuff)
* @author Ilya Sherman (refactoring annotation-relation pairing, which is now gone)
*/
public class GrammaticalRelation implements Comparable<GrammaticalRelation>, Serializable {
private static final long serialVersionUID = 892618003417550128L;
private static final boolean DEBUG = System.getProperty("GrammaticalRelation", null) != null;
private static final EnumMap<Language, Map<String, GrammaticalRelation>>
stringsToRelations = new EnumMap<Language, Map<String, GrammaticalRelation>>(Language.class);
/**
* The "governor" grammatical relation, which is the inverse of "dependent".<p>
* <p/>
* Example: "the red car" → <code>gov</code>(red, car)
*/
public static final GrammaticalRelation GOVERNOR =
new GrammaticalRelation(Language.Any, "gov", "governor", null);
/**
* The "dependent" grammatical relation, which is the inverse of "governor".<p>
* <p/>
* Example: "the red car" → <code>dep</code>(car, red)
*/
public static final GrammaticalRelation DEPENDENT =
new GrammaticalRelation(Language.Any, "dep", "dependent", null);
/**
* The "root" grammatical relation between a faked "ROOT" node, and the root of the sentence.
*/
public static final GrammaticalRelation ROOT =
new GrammaticalRelation(Language.Any, "root", "root", null);
/**
* Dummy relation, used while collapsing relations, e.g., in English & Chinese GrammaticalStructure
*/
public static final GrammaticalRelation KILL =
new GrammaticalRelation(Language.Any, "KILL", "dummy relation kill", null);
/**
* Returns the GrammaticalRelation having the given string
* representation (e.g. "nsubj"), or null if no such is found.
*
* @param s The short name of the GrammaticalRelation
* @param values The set of GrammaticalRelations to look for it among.
* @return The GrammaticalRelation with that name
*/
public static GrammaticalRelation valueOf(String s, Collection<GrammaticalRelation> values) {
for (GrammaticalRelation reln : values) {
if (reln.toString().equals(s)) return reln;
}
return null;
}
/** Convert from a String representation of a GrammaticalRelation to a
* GrammaticalRelation. Where possible, you should avoid using this
* method and simply work with true GrammaticalRelations rather than
* String representations. Correct behavior of this method depends
* on the underlying data structure resources used being kept in sync
* with the toString() and equals() methods. However, there is really
* no choice but to use this method when storing GrammaticalRelations
* to text files and then reading them back in, so this method is not
* deprecated.
*
* @param s The String representation of a GrammaticalRelation
* @return The grammatical relation represented by this String
*/
public static GrammaticalRelation valueOf(Language language, String s) {
GrammaticalRelation reln = (stringsToRelations.get(language) != null ? valueOf(s, stringsToRelations.get(language).values()) : null);
if (reln == null) {
// TODO this breaks the hierarchical structure of the classes,
// but it makes English relations that much likelier to work.
reln = EnglishGrammaticalRelations.valueOf(s);
}
if (reln == null) {
// the block below fails when 'specific' includes underscores.
// this is possible on weird web text, which generates relations such as prep______
/*
String[] names = s.split("_");
String specific = names.length > 1? names[1] : null;
reln = new GrammaticalRelation(language, names[0], null, null, null, specific);
*/
String name;
String specific;
int underscorePosition = s.indexOf('_');
if (underscorePosition > 0) {
name = s.substring(0, underscorePosition);
specific = s.substring(underscorePosition + 1);
} else {
name = s;
specific = null;
}
reln = new GrammaticalRelation(language, name, null, null, specific);
}
return reln;
}
private static Map<String, SoftReference<GrammaticalRelation>> valueOfCache = new HashMap<String, SoftReference<GrammaticalRelation>>();
public static GrammaticalRelation valueOf(String s, Languages.Language language) {
GrammaticalRelation value = null;
SoftReference<GrammaticalRelation> possiblyCachedValue = valueOfCache.get(s);
if (possiblyCachedValue != null) { value = possiblyCachedValue.get(); }
if (value == null) { // TODO(gabor) we have the language conversion going on again...
Language depLanguage = Language.Any;
switch (language) {
case Arabic:
break;
case Chinese:
depLanguage = Language.Chinese;
break;
case English:
depLanguage = Language.English;
break;
case German:
break;
case French:
break;
case Hebrew:
break;
case Spanish:
break;
case Unknown:
depLanguage = Language.Any;
break;
}
value = valueOf(depLanguage, s);
valueOfCache.put(s, new SoftReference<>(value));
}
return value;
}
public static GrammaticalRelation valueOf(String s) {
return valueOf(s, Languages.Language.English);
}
/**
* This function is used to determine whether the GrammaticalRelation in
* question is one that was created to be a thin wrapper around a String
* representation by valueOf(String), or whether it is a full-fledged
* GrammaticalRelation created by direct invocation of the constructor.
*
* @return Whether this relation is just a wrapper created by valueOf(String)
*/
public boolean isFromString() {
return longName == null;
}
public static enum Language { Any, English, Chinese }
/* Non-static stuff */
private final Language language;
private final String shortName;
private final String longName;
private final GrammaticalRelation parent;
private final List<GrammaticalRelation> children = new ArrayList<GrammaticalRelation>();
// a regexp for node values at which this relation can hold
private final Pattern sourcePattern;
private final List<TregexPattern> targetPatterns = new ArrayList<TregexPattern>();
private final String specific; // to hold the specific prep or conjunction associated with the grammatical relation
// TODO document constructor
// TODO change to put specificString after longName, and then use String... for targetPatterns
private GrammaticalRelation(Language language,
String shortName,
String longName,
GrammaticalRelation parent,
String sourcePattern,
TregexPatternCompiler tregexCompiler,
String[] targetPatterns,
String specificString) {
this.language = language;
this.shortName = shortName;
this.longName = longName;
this.parent = parent;
this.specific = specificString; // this can be null!
if (parent != null) {
parent.addChild(this);
}
if (sourcePattern != null) {
try {
this.sourcePattern = Pattern.compile(sourcePattern);
} catch (java.util.regex.PatternSyntaxException e) {
throw new RuntimeException("Bad pattern: " + sourcePattern);
}
} else {
this.sourcePattern = null;
}
for (String pattern : targetPatterns) {
try {
TregexPattern p = tregexCompiler.compile(pattern);
this.targetPatterns.add(p);
} catch (edu.stanford.nlp.trees.tregex.TregexParseException pe) {
throw new RuntimeException("Bad pattern: " + pattern, pe);
}
}
Map<String, GrammaticalRelation> sToR = stringsToRelations.get(language);
if (sToR == null) {
sToR = Generics.newHashMap();
stringsToRelations.put(language, sToR);
}
GrammaticalRelation previous = sToR.put(toString(), this);
if (previous != null) {
if (!previous.isFromString() && !isFromString()) {
throw new IllegalArgumentException("There is already a relation named " + toString() + '!');
} else {
/* We get here if we previously just built a fake relation from a string
* we previously read in from a file.
*/
// TODO is it worth copying all of the information from this real
// relation into the old fake one?
}
}
}
// This is the main constructor used
public GrammaticalRelation(Language language,
String shortName,
String longName,
GrammaticalRelation parent,
String sourcePattern,
TregexPatternCompiler tregexCompiler,
String... targetPatterns) {
this(language, shortName, longName, parent, sourcePattern, tregexCompiler, targetPatterns, null);
}
// Used for non-leaf relations with no patterns
public GrammaticalRelation(Language language,
String shortName,
String longName,
GrammaticalRelation parent) {
this(language, shortName, longName, parent, null, null, StringUtils.EMPTY_STRING_ARRAY, null);
}
// used to create collapsed relations with specificString
public GrammaticalRelation(Language language,
String shortName,
String longName,
GrammaticalRelation parent,
String specificString) {
this(language, shortName, longName, parent, null, null, StringUtils.EMPTY_STRING_ARRAY, specificString);
}
private void addChild(GrammaticalRelation child) {
children.add(child);
}
/** Given a {@code Tree} node {@code t}, attempts to
* return a list of nodes to which node {@code t} has this
* grammatical relation, with {@code t} as the governor.
*
* @param t Target for finding dependents of t related by this GR
* @param root The root of the Tree
* @return A Collection of dependent nodes to which t bears this GR
*/
public Collection<TreeGraphNode> getRelatedNodes(TreeGraphNode t, TreeGraphNode root, HeadFinder headFinder) {
Set<TreeGraphNode> nodeList = new ArraySet<TreeGraphNode>();
for (TregexPattern p : targetPatterns) { // cdm: I deleted: && nodeList.isEmpty()
// Initialize the TregexMatcher with the HeadFinder so that we
// can use the same HeadFinder through the entire process of
// building the dependencies
TregexMatcher m = p.matcher(root, headFinder);
while (m.findAt(t)) {
TreeGraphNode target = (TreeGraphNode) m.getNode("target");
if (target == null) {
throw new AssertionError("Expression has no target: " + p);
}
nodeList.add(target);
if (DEBUG) {
System.err.println("found " + this + "(" + t + "-" + t.headWordNode() + ", " + m.getNode("target") + "-" + ((TreeGraphNode) m.getNode("target")).headWordNode() + ") using pattern " + p);
for (String nodeName : m.getNodeNames()) {
if (nodeName.equals("target"))
continue;
System.err.println(" node " + nodeName + ": " + m.getNode(nodeName));
}
}
}
}
return nodeList;
}
/** Returns <code>true</code> iff the value of <code>Tree</code>
* node <code>t</code> matches the <code>sourcePattern</code> for
* this <code>GrammaticalRelation</code>, indicating that this
* <code>GrammaticalRelation</code> is one that could hold between
* <code>Tree</code> node <code>t</code> and some other node.
*/
public boolean isApplicable(Tree t) {
// System.err.println("Testing whether " + sourcePattern + " matches " + ((TreeGraphNode) t).toOneLineString());
return (sourcePattern != null) && (t.value() != null) &&
sourcePattern.matcher(t.value()).matches();
}
/** Returns whether this is equal to or an ancestor of gr in the grammatical relations hierarchy. */
public boolean isAncestor(GrammaticalRelation gr) {
while (gr != null) {
// Changed this test from this == gr (mrsmith)
if (this.equals(gr)) { return true; }
gr = gr.parent;
}
return false;
}
/**
* Returns short name (abbreviation) for this
* <code>GrammaticalRelation</code>. toString() for collapsed
* relations will include the word that was collapsed.
* <br/>
* <i>Implementation note:</i> Note that this method must be synced with
* the equals() and valueOf(String) methods
*/
@Override
public final String toString() {
if (specific == null) {
return shortName;
} else {
return shortName + '_' + specific;
}
}
/**
* Returns a <code>String</code> representation of this
* <code>GrammaticalRelation</code> and the hierarchy below
* it, with one node per line, indented according to level.
*
* @return <code>String</code> representation of this
* <code>GrammaticalRelation</code>
*/
public String toPrettyString() {
StringBuilder buf = new StringBuilder("\n");
toPrettyString(0, buf);
return buf.toString();
}
/**
* Returns a <code>String</code> representation of this
* <code>GrammaticalRelation</code> and the hierarchy below
* it, with one node per line, indented according to
* <code>indentLevel</code>.
*
* @param indentLevel how many levels to indent (0 for root node)
*/
private void toPrettyString(int indentLevel, StringBuilder buf) {
for (int i = 0; i < indentLevel; i++) {
buf.append(" ");
}
buf.append(shortName).append(" (").append(longName).append("): ").append(targetPatterns);
for (GrammaticalRelation child : children) {
buf.append('\n');
child.toPrettyString(indentLevel + 1, buf);
}
}
/** Grammatical relations are equal with other grammatical relations if they
* have the same shortName and specific (if present).
* <i>Implementation note:</i> Note that this method must be synced with
* the toString() and valueOf(String) methods
*
* @param o Object to be compared
* @return Whether equal
*/
@SuppressWarnings({"StringEquality", "ThrowableInstanceNeverThrown"})
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o instanceof String) {
// TODO: Remove this. It's broken but was meant to cover legacy code. It would be correct to just return false.
new Throwable("Warning: comparing GrammaticalRelation to String").printStackTrace();
return this.toString().equals(o);
}
if (!(o instanceof GrammaticalRelation)) return false;
final GrammaticalRelation gr = (GrammaticalRelation) o;
// == okay for language as enum!
// TODO(gabor) perhaps Language.Any shouldn't be equal to any language? This is a bit of a hack around some dependencies caring about language and others not.
return (this.language == Language.Any || gr.language == Language.Any || this.language == gr.language) &&
this.shortName.equals(gr.shortName) &&
(this.specific == gr.specific ||
(this.specific != null && this.specific.equals(gr.specific)));
}
@Override
public int hashCode() {
int result = 17;
result = 29 * result + (language != null ? language.toString().hashCode() : 0);
result = 29 * result + (shortName != null ? shortName.hashCode() : 0);
result = 29 * result + (specific != null ? specific.hashCode() : 0);
return result;
}
@Override
public int compareTo(GrammaticalRelation o) {
String thisN = this.toString();
String oN = o.toString();
return thisN.compareTo(oN);
}
public String getLongName() {
return longName;
}
public String getShortName() {
return shortName;
}
// TODO(gabor) this is nontrivially brittle. I guess in the long term we should only have one "Language" enum?
/**
* Get the language of the grammatical relation.
*/
public Languages.Language getLanguage() {
switch (this.language) {
case Any: return Languages.Language.Unknown;
case English: return Languages.Language.English;
case Chinese: return Languages.Language.Chinese;
default:
throw new IllegalStateException("Unknown language: " + this.language);
}
}
public String getSpecific() {
return specific;
}
/**
* When deserializing a GrammaticalRelation, it needs to be matched
* up with the existing singleton relation of the same type.
*
* TODO: there are a bunch of things wrong with this. For one
* thing, it's crazy slow, since it goes through all the existing
* relations in an array. For another, it would be cleaner to have
* subclasses for the English and Chinese relations
*/
protected Object readResolve() throws ObjectStreamException {
switch (language) {
case Any: {
if (shortName.equals(GOVERNOR.shortName)) {
return GOVERNOR;
} else if (shortName.equals(DEPENDENT.shortName)) {
return DEPENDENT;
} else if (shortName.equals(ROOT.shortName)) {
return ROOT;
} else if (shortName.equals(KILL.shortName)) {
return KILL;
} else {
throw new RuntimeException("Unknown general relation " + shortName);
}
}
case English: {
GrammaticalRelation rel = EnglishGrammaticalRelations.valueOf(toString());
if (rel == null) {
switch (shortName) {
case "conj":
return EnglishGrammaticalRelations.getConj(specific);
case "prep":
return EnglishGrammaticalRelations.getPrep(specific);
case "prepc":
return EnglishGrammaticalRelations.getPrepC(specific);
default:
// TODO: we need to figure out what to do with relations
// which were serialized and then deprecated. Perhaps there
// is a good way to make them singletons
return this;
//throw new RuntimeException("Unknown English relation " + this);
}
} else {
return rel;
}
}
case Chinese: {
GrammaticalRelation rel = ChineseGrammaticalRelations.valueOf(toString());
if (rel == null) {
// TODO: we need to figure out what to do with relations
// which were serialized and then deprecated. Perhaps there
// is a good way to make them singletons
return this;
//throw new RuntimeException("Unknown Chinese relation " + this);
}
return rel;
}
default: {
throw new RuntimeException("Unknown language " + language);
}
}
}
/**
* Returns the parent of this <code>GrammaticalRelation</code>.
*/
public GrammaticalRelation getParent() {
return parent;
}
public static void main(String[] args) {
final String[] names = {"dep", "pred", "prep_to","rcmod"};
for (String name : names) {
GrammaticalRelation reln = valueOf(Language.English, name);
System.out.println("Data for GrammaticalRelation loaded as valueOf(\"" + name + "\"):");
System.out.println("\tShort name: " + reln.getShortName());
System.out.println("\tLong name: " + reln.getLongName());
System.out.println("\tSpecific name: " + reln.getSpecific());
}
}
}