package edu.stanford.nlp.pipeline;
import java.util.Collections;
import java.util.List;
import java.util.Set;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.Annotator;
import edu.stanford.nlp.ie.regexp.RegexNERSequenceClassifier;
import edu.stanford.nlp.ie.machinereading.structure.MachineReadingAnnotations;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.Timing;
/**
* This class adds gender information (MALE / FEMALE) to tokens as GenderAnnotations. It uses the
* RegexNERSequenceClassifier and a manual mapping from token text to gender labels. Assumes
* that the Annotation has already been split into sentences, then tokenized into Lists of CoreLabels.
*
* @author jtibs
*
*/
public class GenderAnnotator implements Annotator {
private RegexNERSequenceClassifier classifier;
private Timing timer;
private boolean verbose;
public GenderAnnotator() {
this(false, DefaultPaths.DEFAULT_GENDER_FIRST_NAMES);
}
public GenderAnnotator(boolean verbose, String mapping) {
classifier = new RegexNERSequenceClassifier(mapping, true, true);
timer = new Timing();
this.verbose = verbose;
}
public void annotate(Annotation annotation) {
if (verbose) {
timer.start();
System.err.print("Adding gender annotation...");
}
if (! annotation.containsKey(CoreAnnotations.SentencesAnnotation.class))
throw new RuntimeException("Unable to find sentences in " + annotation);
List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
for (CoreMap sentence : sentences) {
List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
classifier.classify(tokens);
for (CoreLabel token : tokens)
token.set(MachineReadingAnnotations.GenderAnnotation.class, token.get(CoreAnnotations.AnswerAnnotation.class));
}
if (verbose)
timer.stop("done.");
}
@Override
public Set<Requirement> requires() {
return TOKENIZE_SSPLIT_POS;
}
@Override
public Set<Requirement> requirementsSatisfied() {
return Collections.singleton(GENDER_REQUIREMENT);
}
}