Package edu.stanford.nlp.pipeline

Source Code of edu.stanford.nlp.pipeline.QuantifiableEntityNormalizingAnnotator

package edu.stanford.nlp.pipeline;

import edu.stanford.nlp.ie.QuantifiableEntityNormalizer;
import edu.stanford.nlp.ie.regexp.NumberSequenceClassifier;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.PropertiesUtils;
import edu.stanford.nlp.util.Timing;

import java.util.*;

/**
* This class provides a facility for normalizing content of numerical named
* entities (number, money, date, time) in the pipeline package world. It uses a
* lot of code with {@link edu.stanford.nlp.ie.QuantifiableEntityNormalizer}.
* New stuff should generally be added there so as to reduce code duplication.
*
* @author Jenny Finkel
* @author Christopher Manning (extended for RTE)
* @author Chris Cox (original version)
*/

public class QuantifiableEntityNormalizingAnnotator implements Annotator {

  private Timing timer = new Timing();
  private final boolean VERBOSE;
  private static final String DEFAULT_BACKGROUND_SYMBOL = "O";
  private final boolean collapse;  // TODO: collpase = true won't work properly (see annotateTokens)

  public static final String BACKGROUND_SYMBOL_PROPERTY = "background";
  public static final String COLLAPSE_PROPERTY = "collapse";

  public QuantifiableEntityNormalizingAnnotator() {
    this(DEFAULT_BACKGROUND_SYMBOL, true);
  }

  public QuantifiableEntityNormalizingAnnotator(boolean verbose) {
    this(DEFAULT_BACKGROUND_SYMBOL, verbose);
  }

  public QuantifiableEntityNormalizingAnnotator(String name, Properties props) {
    String property = name + "." + BACKGROUND_SYMBOL_PROPERTY;
    String backgroundSymbol = props.getProperty(property, DEFAULT_BACKGROUND_SYMBOL);
    // this next line is yuck as QuantifiableEntityNormalizer is still static
    QuantifiableEntityNormalizer.BACKGROUND_SYMBOL = backgroundSymbol;
    property = name + "." + COLLAPSE_PROPERTY;
    collapse = PropertiesUtils.getBool(props, property, false);
    if (this.collapse) {
      System.err.println("WARNING: QuantifiableEntityNormalizingAnnotator does not work well with collapse=true");
    }
    VERBOSE = false;
  }

  /**
   * Do quantity entity normalization and collapse together multitoken quantity
   * entities into a single token.
   *
   * @param backgroundSymbol
   *          NER background symbol
   * @param verbose
   *          Whether to write messages
   */
  public QuantifiableEntityNormalizingAnnotator(String backgroundSymbol, boolean verbose) {
    this(backgroundSymbol, verbose, false);
  }

  /**
   * Do quantity entity normalization and collapse together multitoken quantity
   * entities into a single token.
   *
   * @param verbose
   *          Whether to write messages
   * @param collapse
   *          Whether to collapse multitoken quantity entities.
   */
  public QuantifiableEntityNormalizingAnnotator(boolean verbose, boolean collapse) {
    this(DEFAULT_BACKGROUND_SYMBOL, verbose, collapse);
  }

  public QuantifiableEntityNormalizingAnnotator(String backgroundSymbol, boolean verbose, boolean collapse) {
    // this next line is yuck as QuantifiableEntityNormalizer is still static
    QuantifiableEntityNormalizer.BACKGROUND_SYMBOL = backgroundSymbol;
    VERBOSE = verbose;
    this.collapse = collapse;
    if (this.collapse) {
      System.err.println("WARNING: QuantifiableEntityNormalizingAnnotator does not work well with collapse=true");
    }
  }

  public void annotate(Annotation annotation) {
    if (VERBOSE) {
      timer.start();
      System.err.print("Normalizing quantifiable entities...");
    }
    if (annotation.containsKey(CoreAnnotations.SentencesAnnotation.class)) {
      List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
      for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
        List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
        annotateTokens(tokens);
      }
      if (VERBOSE) {
        timer.stop("done.");
        System.err.println("output: " + sentences + '\n');
      }
    } else if (annotation.containsKey(CoreAnnotations.TokensAnnotation.class)) {
      List<CoreLabel> tokens = annotation.get(CoreAnnotations.TokensAnnotation.class);
      annotateTokens(tokens);
    } else {
      throw new RuntimeException("unable to find sentences in: " + annotation);
    }
  }

  private <TOKEN extends CoreLabel> void annotateTokens(List<TOKEN> tokens) {
    // Make a copy of the tokens before annotating because QuantifiableEntityNormalizer may change the POS too
    List<CoreLabel> words = new ArrayList<CoreLabel>();
    for (CoreLabel token : tokens) {
      CoreLabel word = new CoreLabel();
      word.setWord(token.word());
      word.setNER(token.ner());
      word.setTag(token.tag());

      // copy fields potentially set by SUTime
      NumberSequenceClassifier.transferAnnotations(token, word);

      words.add(word);
    }
    doOneSentence(words);
    // TODO: If collapsed is set, tokens for entities are collapsed into one node then
    // (words.size() != tokens.size() and the logic below just don't work!!!
    for (int i = 0; i < words.size(); i++) {
      String ner = words.get(i).ner();
      tokens.get(i).setNER(ner);
      tokens.get(i).set(CoreAnnotations.NormalizedNamedEntityTagAnnotation.class,
              words.get(i).get(CoreAnnotations.NormalizedNamedEntityTagAnnotation.class));
    }
  }

  private <TOKEN extends CoreLabel> void doOneSentence(List<TOKEN> words) {
    QuantifiableEntityNormalizer.addNormalizedQuantitiesToEntities(words, collapse);
  }


  @Override
  public Set<Requirement> requires() {
    return Collections.singleton(TOKENIZE_REQUIREMENT);
  }

  @Override
  public Set<Requirement> requirementsSatisfied() {
    // technically it adds some NER, but someone who wants full NER
    // labels will be very disappointed, so we do not claim to produce NER
    return Collections.singleton(QUANTIFIABLE_ENTITY_NORMALIZATION_REQUIREMENT);
  }
}
TOP

Related Classes of edu.stanford.nlp.pipeline.QuantifiableEntityNormalizingAnnotator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.