package edu.stanford.nlp.parser.shiftreduce;
import java.io.Serializable;
import java.util.List;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeCoreAnnotations;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.TreeShapedStack;
public abstract class FeatureFactory implements Serializable {
public List<String> featurize(State state) {
return featurize(state, Generics.<String>newArrayList(200));
}
abstract public List<String> featurize(State state, List<String> features);
enum Transition {
LEFT, RIGHT, UNARY
};
enum FeatureComponent {
HEADWORD ("W"), HEADTAG ("T"), VALUE ("C");
private final String shortName;
FeatureComponent(String shortName) {
this.shortName = shortName;
}
public String shortName() { return shortName; }
};
static final String NULL = "*NULL*";
public static String getFeatureFromCoreLabel(CoreLabel label, FeatureComponent feature) {
String value = null;
switch(feature) {
case HEADWORD:
value = (label == null) ? NULL : label.get(TreeCoreAnnotations.HeadWordAnnotation.class).label().value();
break;
case HEADTAG:
value = (label == null) ? NULL : label.get(TreeCoreAnnotations.HeadTagAnnotation.class).label().value();
break;
case VALUE:
value = (label == null) ? NULL : label.value();
break;
default:
throw new IllegalArgumentException("Unexpected feature type: " + feature);
}
return value;
}
public static CoreLabel getRecentDependent(TreeShapedStack<Tree> stack, Transition transition, int nodeNum) {
if (stack.size() <= nodeNum) {
return null;
}
for (int i = 0; i < nodeNum; ++i) {
stack = stack.pop();
}
Tree node = stack.peek();
if (node == null) {
return null;
}
if (!(node.label() instanceof CoreLabel)) {
throw new IllegalArgumentException("Can only featurize CoreLabel trees");
}
Tree head = ((CoreLabel) node.label()).get(TreeCoreAnnotations.HeadWordAnnotation.class);
switch (transition) {
case LEFT: {
while (true) {
if (node.children().length == 0) {
return null;
}
Tree child = node.children()[0];
if (!(child.label() instanceof CoreLabel)) {
throw new IllegalArgumentException("Can only featurize CoreLabel trees");
}
if (((CoreLabel) child.label()).get(TreeCoreAnnotations.HeadWordAnnotation.class) != head) {
return (CoreLabel) child.label();
}
node = child;
}
}
case RIGHT: {
while (true) {
if (node.children().length == 0) {
return null;
}
if (node.children().length == 1) {
node = node.children()[0];
continue;
}
Tree child = node.children()[1];
if (!(child.label() instanceof CoreLabel)) {
throw new IllegalArgumentException("Can only featurize CoreLabel trees");
}
if (((CoreLabel) child.label()).get(TreeCoreAnnotations.HeadWordAnnotation.class) != head) {
return (CoreLabel) child.label();
}
node = child;
}
}
default:
throw new IllegalArgumentException("Can only get left or right heads");
}
}
public static CoreLabel getStackLabel(TreeShapedStack<Tree> stack, int nodeNum, Transition ... transitions) {
if (stack.size() <= nodeNum) {
return null;
}
for (int i = 0; i < nodeNum; ++i) {
stack = stack.pop();
}
Tree node = stack.peek();
// TODO: this is nice for code readability, but might be expensive
for (Transition t : transitions) {
switch (t) {
case LEFT:
if (node.children().length != 2) {
return null;
}
node = node.children()[0];
break;
case RIGHT:
if (node.children().length != 2) {
return null;
}
node = node.children()[1];
break;
case UNARY:
if (node.children().length != 1) {
return null;
}
node = node.children()[0];
break;
default:
throw new IllegalArgumentException("Unknown transition type " + t);
}
}
if (!(node.label() instanceof CoreLabel)) {
throw new IllegalArgumentException("Can only featurize CoreLabel trees");
}
return (CoreLabel) node.label();
}
public static CoreLabel getQueueLabel(State state, int offset) {
return getQueueLabel(state.sentence, state.tokenPosition, offset);
}
public static CoreLabel getQueueLabel(List<Tree> sentence, int tokenPosition, int offset) {
if (tokenPosition + offset < 0 || tokenPosition + offset >= sentence.size()) {
return null;
}
Tree node = sentence.get(tokenPosition + offset);
if (!(node.label() instanceof CoreLabel)) {
throw new IllegalArgumentException("Can only featurize CoreLabel trees");
}
return (CoreLabel) node.label();
}
public static CoreLabel getCoreLabel(Tree node) {
if (!(node.label() instanceof CoreLabel)) {
throw new IllegalArgumentException("Can only featurize CoreLabel trees");
}
return (CoreLabel) node.label();
}
private static final long serialVersionUID = -9086427962537286031L;
}