Package edu.stanford.nlp.ie.machinereading.structure

Source Code of edu.stanford.nlp.ie.machinereading.structure.ExtractionObject$CompByExtent

package edu.stanford.nlp.ie.machinereading.structure;

import java.io.Serializable;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Set;

import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.stats.Counter;
import edu.stanford.nlp.stats.Counters;
import edu.stanford.nlp.util.ArrayCoreMap;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.Pair;

/**
* Represents any object that can be extracted - entity, relation, event
*
* @author Andrey Gusev
* @author Mihai
*
*/
public class ExtractionObject implements Serializable {

  private static final long serialVersionUID = 1L;

  /** Unique identifier of the object in its document */
  protected final String objectId;
 
  /**
   * Sentence that contains this object
   * This assumes that each extraction object is intra-sentential (true in ACE, Roth, BioNLP, MR)
   */
  protected CoreMap sentence;
 
  /** Type of this mention, e.g., GPE */
  protected String type;
 
  /** Subtype, if available, e.g., GPE.CITY */
  protected final String subType;
 
  /**
   * Maximal token span relevant for this object, e.g., the largest NP for an entity mention
   * The offsets are relative to the sentence that contains this object
   */
  protected Span extentTokenSpan;
 
  /** This stores any optional attributes of ExtractionObjects */
  protected CoreMap attributeMap;

  /**
   * Probabilities associated with this object
   * We report probability values for each possible type for this object
   */
  protected Counter<String> typeProbabilities;

  public ExtractionObject(String objectId,
      CoreMap sentence,
      Span span,
      String type,
      String subtype) {
    this.objectId = objectId;
    this.sentence = sentence;
    this.extentTokenSpan = span;
    this.type = type.intern();
    this.subType = (subtype != null ? subtype.intern() : null);
    this.attributeMap = null;
  }

  public String getObjectId() {
    return objectId;
  }

  public String getDocumentId() {
    return sentence.get(CoreAnnotations.DocIDAnnotation.class);
  }
 
  public CoreMap getSentence() {
    return sentence;
  }
 
  public void setSentence(CoreMap sent) {
    this.sentence = sent;
  }
 
  public int getExtentTokenStart() { return extentTokenSpan.start(); }

  public int getExtentTokenEnd() { return extentTokenSpan.end(); }
 
  public Span getExtent() { return extentTokenSpan; }
 
  public void setExtent(Span s) {
    extentTokenSpan = s;
  }
 
  public String getExtentString() {
    List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
    StringBuilder sb = new StringBuilder();
    for (int i = extentTokenSpan.start(); i < extentTokenSpan.end(); i ++){
      CoreLabel token = tokens.get(i);
      if(i > extentTokenSpan.start()) sb.append(" ");
      sb.append(token.word());
    }
    return sb.toString();
  }
 
  public String getType() { return type; }
 
  public String getSubType() { return subType; }
 
  @Override
  public boolean equals(Object other) {
    if(! (other instanceof ExtractionObject)) return false;
    ExtractionObject o = (ExtractionObject) other;
    return o.objectId.equals(objectId) && o.sentence.get(CoreAnnotations.TextAnnotation.class).equals(sentence.get(CoreAnnotations.TextAnnotation.class));
  }

  static class CompByExtent implements Comparator<ExtractionObject> {
    public int compare(ExtractionObject o1, ExtractionObject o2) {
      if(o1.getExtentTokenStart() < o2.getExtentTokenStart()){
        return -1;
      } else if(o1.getExtentTokenStart() > o2.getExtentTokenStart()){
        return 1;
      } else if(o1.getExtentTokenEnd() < o2.getExtentTokenEnd()) {
        return -1;
      } else if(o1.getExtentTokenEnd() > o2.getExtentTokenEnd()) {
        return 1;
      } else {
        return 0;
      }
    }
  }
 
  public static void sortByExtent(List<ExtractionObject> objects) {
    Collections.sort(objects, new CompByExtent());
  }
 
  /**
   * Returns the smallest span that covers the extent of all these objects
   * @param objs
   */
  public static Span getSpan(ExtractionObject ... objs) {
    int left = Integer.MAX_VALUE;
    int right = Integer.MIN_VALUE;
    for(int i = 0; i < objs.length; i ++){
      if(objs[i].getExtentTokenStart() < left){
        left = objs[i].getExtentTokenStart();
      }
      if(objs[i].getExtentTokenEnd() > right) {
        right = objs[i].getExtentTokenEnd();
      }
    }
    assert(left < Integer.MAX_VALUE);
    assert(right > Integer.MIN_VALUE);
    return new Span(left, right);
  }
 
  /**
   * Returns the text corresponding to the extent of this object
   */
  public String getValue() {
    return getFullValue();
  }
 
  /**
   * Always returns the text corresponding to the extent of this object, even when
   * getValue is overridden by subclass.
   */
  final public String getFullValue() {
    List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
    StringBuilder sb = new StringBuilder();
    if(tokens != null && extentTokenSpan != null){
      for(int i = extentTokenSpan.start(); i < extentTokenSpan.end(); i ++){
        if(i > extentTokenSpan.start()) sb.append(" ");
        sb.append(tokens.get(i).word());
      }
    }
    return sb.toString();
  }

  public void setType(String t) {
    this.type = t;
  }
 
  private static final String TYPE_SEP = "/";
 
  /**
   * Concatenates two types
   * @param t1
   * @param t2
   */
  public static String concatenateTypes(String t1, String t2) {
    String [] t1Toks = t1.split(TYPE_SEP);
    String [] t2Toks = t2.split(TYPE_SEP);
    Set<String> uniqueTypes = Generics.newHashSet();
    for(String t: t1Toks) uniqueTypes.add(t);
    for(String t: t2Toks) uniqueTypes.add(t);
    String [] types = new String[uniqueTypes.size()];
    uniqueTypes.toArray(types);
    Arrays.sort(types);
    StringBuffer os = new StringBuffer();
    for(int i = 0; i < types.length; i ++){
      if(i > 0) os.append(TYPE_SEP);
      os.append(types[i]);
    }
    return os.toString();
  }
 
  public CoreMap attributeMap() {
    if(attributeMap == null){
      attributeMap = new ArrayCoreMap();
    }
    return attributeMap;
  }
 
  public void setTypeProbabilities(Counter<String> probs) {
    typeProbabilities = probs;
  }
  public Counter<String> getTypeProbabilities() {
    return typeProbabilities;
  }
  String probsToString() {
    List<Pair<String, Double>> sorted = Counters.toDescendingMagnitudeSortedListWithCounts(typeProbabilities);
    StringBuffer os = new StringBuffer();
    os.append("{");
    boolean first = true;
    for(Pair<String, Double> lv: sorted) {
      if(! first) os.append("; ");
      os.append(lv.first + ", " + lv.second);
      first = false;
    }
    os.append("}");
    return os.toString();
  }
 
  /**
   * Returns true if it's worth saving/printing this object
   * This happens in two cases:
   * 1. The type of the object is not nilLabel
   * 2. The type of the object is nilLabel but the second ranked label is within the given beam (0 -- 100) of the first choice
   * @param beam
   * @param nilLabel
   */
  public boolean printableObject(double beam, String nilLabel) {
    if (typeProbabilities == null) { return false; }
    List<Pair<String, Double>> sorted = Counters.toDescendingMagnitudeSortedListWithCounts(typeProbabilities);
   
    // first choice not nil
    if(sorted.size() > 0 && ! sorted.get(0).first.equals(nilLabel)){
      return true;
    }
   
    // first choice is nil, but second is within beam
    if(sorted.size() > 1 && sorted.get(0).first.equals(nilLabel) && beam > 0 &&
        100.0 * (sorted.get(0).second - sorted.get(1).second) < beam){
      return true;
    }

    return false;
  }
}
TOP

Related Classes of edu.stanford.nlp.ie.machinereading.structure.ExtractionObject$CompByExtent

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.