Package edu.stanford.nlp.pipeline

Source Code of edu.stanford.nlp.pipeline.GenderAnnotator

package edu.stanford.nlp.pipeline;

import java.util.Collections;
import java.util.List;
import java.util.Set;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.Annotator;
import edu.stanford.nlp.ie.regexp.RegexNERSequenceClassifier;
import edu.stanford.nlp.ie.machinereading.structure.MachineReadingAnnotations;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.Timing;

/**
* This class adds gender information (MALE / FEMALE) to tokens as GenderAnnotations. It uses the
* RegexNERSequenceClassifier and a manual mapping from token text to gender labels. Assumes
* that the Annotation has already been split into sentences, then tokenized into Lists of CoreLabels.
*
* @author jtibs
*
*/

public class GenderAnnotator implements Annotator {
  private RegexNERSequenceClassifier classifier;
  private Timing timer;
  private boolean verbose;
 
  public GenderAnnotator() {
    this(false, DefaultPaths.DEFAULT_GENDER_FIRST_NAMES);
  }
 
  public GenderAnnotator(boolean verbose, String mapping) {
    classifier = new RegexNERSequenceClassifier(mapping, true, true);
    timer = new Timing();
    this.verbose = verbose;
  }

  public void annotate(Annotation annotation) {
    if (verbose) {   
      timer.start();
      System.err.print("Adding gender annotation...");
    }
   
    if (! annotation.containsKey(CoreAnnotations.SentencesAnnotation.class))
      throw new RuntimeException("Unable to find sentences in " + annotation);
 
    List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
    for (CoreMap sentence : sentences) {
      List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
      classifier.classify(tokens);
 
      for (CoreLabel token : tokens)
        token.set(MachineReadingAnnotations.GenderAnnotation.class, token.get(CoreAnnotations.AnswerAnnotation.class));
    }
   
    if (verbose)
      timer.stop("done.");
  }


  @Override
  public Set<Requirement> requires() {
    return TOKENIZE_SSPLIT_POS;
  }

  @Override
  public Set<Requirement> requirementsSatisfied() {
    return Collections.singleton(GENDER_REQUIREMENT);
  }
}
TOP

Related Classes of edu.stanford.nlp.pipeline.GenderAnnotator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.