Package edu.stanford.nlp.ling

Source Code of edu.stanford.nlp.ling.CoreLabel

package edu.stanford.nlp.ling;

import java.util.Comparator;
import java.util.Map;
import java.util.TreeMap;

import edu.stanford.nlp.ling.AnnotationLookup.KeyLookup;
import edu.stanford.nlp.util.ArrayCoreMap;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.StringUtils;


/**
* A CoreLabel represents a single word with ancillary information
* attached using CoreAnnotations.  If the proper annotations are set,
* the CoreLabel also provides convenient methods to access tags,
* lemmas, etc.
* <p>
* A CoreLabel is a Map from keys (which are Class objects) to values,
* whose type is determined by the key.  That is, it is a heterogeneous
* typesafe Map (see Josh Bloch, Effective Java, 2nd edition).
* <p>
* The CoreLabel class in particular bridges the gap between old-style JavaNLP
* Labels and the new CoreMap infrastructure.  Instances of this class can be
* used (almost) anywhere that the now-defunct FeatureLabel family could be
* used.  This data structure is backed by an {@link ArrayCoreMap}.
*
* @author dramage
* @author rafferty
*/
public class CoreLabel extends ArrayCoreMap implements AbstractCoreLabel, HasWord, HasTag, HasCategory, HasLemma, HasContext, HasIndex, HasOffset {

  private static final long serialVersionUID = 2L;


  // /**
  //  * Should warnings be printed when converting from MapLabel family.
  //  */
  // private static final boolean VERBOSE = false;


  /** Default constructor, calls super() */
  public CoreLabel() {
    super();
  }

  /**
   * Initializes this CoreLabel, pre-allocating arrays to hold
   * up to capacity key,value pairs.  This array will grow if necessary.
   *
   * @param capacity Initial capacity of object in key,value pairs
   */
  public CoreLabel(int capacity) {
    super(capacity);
  }

  /**
   * Returns a new CoreLabel instance based on the contents of the given
   * CoreLabel.  It copies the contents of the other CoreLabel.
   * <i>Implementation note:</i> this is a the same as the constructor
   * that takes a CoreMap, but is needed to ensure unique most specific
   * type inference for selecting a constructor at compile-time.
   *
   * @param label The CoreLabel to copy
   */
  public CoreLabel(CoreLabel label) {
    this((CoreMap) label);
  }

  /**
   * Returns a new CoreLabel instance based on the contents of the given
   * CoreMap.  It copies the contents of the other CoreMap.
   *
   * @param label The CoreMap to copy
   */
  @SuppressWarnings({"unchecked"})
  public CoreLabel(CoreMap label) {
    super(label.size());
    for (Class key : label.keySet()) {
      set(key, label.get(key));
    }
  }

  /**
   * Returns a new CoreLabel instance based on the contents of the given
   * label.   Warning: The behavior of this method is a bit disjunctive!
   * If label is a CoreMap (including CoreLabel), then its entire
   * contents is copied into this label.  But, otherwise, just the
   * value() and word iff it implements HasWord is copied.
   *
   * @param label Basis for this label
   */
  @SuppressWarnings("unchecked")
  public CoreLabel(Label label) {
    super(0);
    if (label instanceof CoreMap) {
      CoreMap cl = (CoreMap) label;
      setCapacity(cl.size());
      for (Class key : cl.keySet()) {
        set(key, cl.get(key));
      }
    } else {
      if (label instanceof HasWord) {
         setWord(((HasWord)label).word());
      }
      setValue(label.value());
    }
  }

  /**
   * This constructor attempts to parse the String keys
   * into Class keys.  It's mainly useful for reading from
   * a file.  A best effort attempt is made to correctly
   * parse the keys according to the String lookup function
   * in {@link CoreAnnotations}.
   *
   * @param keys Array of Strings that are class names
   * @param values Array of values (as String)
   */
  public CoreLabel(String[] keys, String[] values) {
    super(keys.length);
    //this.map = new ArrayCoreMap();
    initFromStrings(keys, values);
  }


  /**
   * Class that all "generic" annotations extend.
   * This allows you to read in arbitrary values from a file as features, for example.
   */
  public static interface GenericAnnotation<T> extends CoreAnnotation<T> {  }
  //Unchecked is below because eclipse can't handle the level of type inference if we correctly parameterize GenericAnnotation with String
  @SuppressWarnings("unchecked")
  public static final Map<String, Class<? extends GenericAnnotation>> genericKeys = Generics.newHashMap();
  @SuppressWarnings("unchecked")
  public static final Map<Class<? extends GenericAnnotation>, String> genericValues = Generics.newHashMap();


  @SuppressWarnings("unchecked")
  private void initFromStrings(String[] keys, String[] values) {
    for (int i = 0; i < Math.min(keys.length, values.length); i++) {
      String key = keys[i];
      String value = values[i];
      KeyLookup lookup = AnnotationLookup.getCoreKey(key);

      //now work with the key we got above
      if (lookup == null) {
        if (key != null) {
          throw new UnsupportedOperationException("Unknown key " + key);
        }

        // It used to be that the following code let you put unknown keys
        // in the CoreLabel.  However, you can't create classes dynamically
        // at run time, which meant only one of these classes could ever
        // exist, which meant multiple unknown keys would clobber each
        // other and be very annoying.  It's easier just to not allow
        // it at all.
        // If it becomes possible to create classes dynamically,
        // we could add this code back.
        //if(genericKeys.containsKey(key)) {
        //  this.set(genericKeys.get(key), value);
        //} else {
        //  GenericAnnotation<String> newKey = new GenericAnnotation<String>() {
        //    public Class<String> getType() { return String.class;} };
        //  this.set(newKey.getClass(), values[i]);
        //  genericKeys.put(keys[i], newKey.getClass());
        //  genericValues.put(newKey.getClass(), keys[i]);
        //}
        // unknown key; ignore
        //if (VERBOSE) {
        //  System.err.println("CORE: CoreLabel.fromAbstractMapLabel: " +
        //      "Unknown key "+key);
        //}
      } else {
        try {
          Class<?> valueClass = AnnotationLookup.getValueType(lookup.coreKey);
          if(valueClass.equals(String.class)) {
            this.set(lookup.coreKey, values[i]);
          } else if(valueClass == Integer.class) {
            this.set(lookup.coreKey, Integer.parseInt(values[i]));
          } else if(valueClass == Double.class) {
            this.set(lookup.coreKey, Double.parseDouble(values[i]));
          } else if(valueClass == Long.class) {
            this.set(lookup.coreKey, Long.parseLong(values[i]));
          }
        } catch (Exception e) {
          e.printStackTrace();
          // unexpected value type
          System.err.println("CORE: CoreLabel.initFromStrings: "
              + "Bad type for " + key
              + ". Value was: " + value
              + "; expected "+AnnotationLookup.getValueType(lookup.coreKey));
        }
      }
    }
  }


  private static class CoreLabelFactory implements LabelFactory {

    @Override
    public Label newLabel(String labelStr) {
      CoreLabel label = new CoreLabel();
      label.setValue(labelStr);
      return label;
    }

    @Override
    public Label newLabel(String labelStr, int options) {
      return newLabel(labelStr);
    }

    @Override
    public Label newLabel(Label oldLabel) {
      if (oldLabel instanceof CoreLabel) {
        return new CoreLabel((CoreLabel)oldLabel);

      } else {
        //Map the old interfaces to the correct key/value pairs
        //Don't need to worry about HasIndex, which doesn't appear in any legacy code
        CoreLabel label = new CoreLabel();
        if (oldLabel instanceof HasWord)
          label.setWord(((HasWord) oldLabel).word());
        if (oldLabel instanceof HasTag)
          label.setTag(((HasTag) oldLabel).tag());
        if (oldLabel instanceof HasOffset) {
          label.setBeginPosition(((HasOffset) oldLabel).beginPosition());
          label.setEndPosition(((HasOffset) oldLabel).endPosition());
        }
        if (oldLabel instanceof HasCategory)
          label.setCategory(((HasCategory) oldLabel).category());
        if (oldLabel instanceof HasIndex)
          label.setIndex(((HasIndex) oldLabel).index());

        label.setValue(oldLabel.value());

        return label;
      }
    }

    @Override
    public Label newLabelFromString(String encodedLabelStr) {
      throw new UnsupportedOperationException("This code branch left blank" +
      " because we do not understand what this method should do.");
    }

  }


  /**
   * Return a factory for this kind of label
   *
   * @return The label factory
   */
  public static LabelFactory factory() {
    return new CoreLabelFactory();
  }

  /**
   * {@inheritDoc}
   */
  @Override
  public LabelFactory labelFactory() {
    return CoreLabel.factory();
  }

  /**
   * Return a non-null String value for a key.
   * This method is included for backwards compatibility with AbstractMapLabel.
   * It is guaranteed to not return null; if the key is not present or
   * has a null value, it returns the empty string ("").  It is only valid to
   * call this method when key is paired with a value of type String.
   *
   * @param <KEY> A key type with a String value
   * @param key The key to return the value of.
   * @return "" if the key is not in the map or has the value <code>null</code>
   *     and the String value of the key otherwise
   */
  @Override
  public <KEY extends Key<String>> String getString(Class<KEY> key) {
    String value = get(key);
    if (value == null) {
      return "";
    }
    return value;
  }


  /**
   * {@inheritDoc}
   */
//  public int size() {
//    return map.size();
//  }

  /**
   * {@inheritDoc}
   */
  @Override
  public void setFromString(String labelStr) {
    throw new UnsupportedOperationException("Cannot set from string");
  }

  /**
   * {@inheritDoc}
   */
  @Override
  public final void setValue(String value) {
    set(CoreAnnotations.ValueAnnotation.class, value);
  }

  /**
   * {@inheritDoc}
   */
  @Override
  public final String value() {
    return get(CoreAnnotations.ValueAnnotation.class);
  }

  /**
   * Set the word value for the label.  Also, clears the lemma, since
   * that may have changed if the word changed.
   */
  @Override
  public void setWord(String word) {
    String originalWord = get(CoreAnnotations.TextAnnotation.class);
    set(CoreAnnotations.TextAnnotation.class, word);
    // pado feb 09: if you change the word, delete the lemma.
    // gabor dec 2012: check if there was a real change -- this remove is actually rather expensive if it gets called a lot
    if (word != null && !word.equals(originalWord) && containsKey(CoreAnnotations.LemmaAnnotation.class)) {
      remove(CoreAnnotations.LemmaAnnotation.class);
    }
  }

  /**
   * {@inheritDoc}
   */
  @Override
  public String word() {
    return get(CoreAnnotations.TextAnnotation.class);
  }

  /**
   * {@inheritDoc}
   */
  @Override
  public void setTag(String tag) {
    set(CoreAnnotations.PartOfSpeechAnnotation.class, tag);
  }

  /**
   * {@inheritDoc}
   */
  @Override
  public String tag() {
    return get(CoreAnnotations.PartOfSpeechAnnotation.class);
  }

  /**
   * {@inheritDoc}
   */
  @Override
  public void setCategory(String category) {
    set(CoreAnnotations.CategoryAnnotation.class, category);
  }

  /**
   * {@inheritDoc}
   */
  @Override
  public String category() {
    return get(CoreAnnotations.CategoryAnnotation.class);
  }

  /**
   * {@inheritDoc}
   */
  @Override
  public void setAfter(String after) {
    set(CoreAnnotations.AfterAnnotation.class, after);
  }

  /**
   * {@inheritDoc}
   */
  @Override
  public String after() {
    return getString(CoreAnnotations.AfterAnnotation.class);
  }

  /**
   * {@inheritDoc}
   */
  @Override
  public void setBefore(String before) {
    set(CoreAnnotations.BeforeAnnotation.class, before);
  }


  /**
   * {@inheritDoc}
   */
  @Override
  public String before() {
    return getString(CoreAnnotations.BeforeAnnotation.class);
  }

  /**
   * {@inheritDoc}
   */
  @Override
  public void setOriginalText(String originalText) {
    set(CoreAnnotations.OriginalTextAnnotation.class, originalText);
  }

  /**
   * {@inheritDoc}
   */
  @Override
  public String originalText() {
    return getString(CoreAnnotations.OriginalTextAnnotation.class);
  }

  /**
   * {@inheritDoc}
   */
  @Override
  public String docID() {
    return get(CoreAnnotations.DocIDAnnotation.class);
  }

  /**
   * {@inheritDoc}
   */
  @Override
  public void setDocID(String docID) {
    set(CoreAnnotations.DocIDAnnotation.class, docID);
  }

  /**
   * Return the named entity class of the label (or null if none).
   *
   * @return String the word value for the label
   */
  public String ner() {
    return get(CoreAnnotations.NamedEntityTagAnnotation.class);
  }

  public void setNER(String ner) {
    set(CoreAnnotations.NamedEntityTagAnnotation.class, ner);
  }

  /**
   * {@inheritDoc}
   */
  @Override
  public String lemma() {
    return get(CoreAnnotations.LemmaAnnotation.class);
  }

  /**
   * {@inheritDoc}
   */
  @Override
  public void setLemma(String lemma) {
    set(CoreAnnotations.LemmaAnnotation.class, lemma);
  }


  /**
   * {@inheritDoc}
   */
  @Override
  public int index() {
    Integer n = get(CoreAnnotations.IndexAnnotation.class);
    if(n == null)
      return -1;
    return n;
  }

  /**
   * {@inheritDoc}
   */
  @Override
  public void setIndex(int index) {
    set(CoreAnnotations.IndexAnnotation.class, index);
  }

  /**
   * {@inheritDoc}
   */
  @Override
  public int sentIndex() {
    Integer n = get(CoreAnnotations.SentenceIndexAnnotation.class);
    if(n == null)
      return -1;
    return n;
  }

  /**
   * {@inheritDoc}
   */
  @Override
  public void setSentIndex(int sentIndex) {
    set(CoreAnnotations.SentenceIndexAnnotation.class, sentIndex);
  }

  /**
   * {@inheritDoc}
   */
  @Override
  public int beginPosition() {
    Integer i = get(CoreAnnotations.CharacterOffsetBeginAnnotation.class);
    if(i != null) return i;
    return -1;
  }

  /**
   * {@inheritDoc}
   */
  @Override
  public int endPosition() {
    Integer i = get(CoreAnnotations.CharacterOffsetEndAnnotation.class);
    if(i != null) return i;
    return -1;
  }

  /**
   * {@inheritDoc}
   */
  @Override
  public void setBeginPosition(int beginPos) {
    set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, beginPos);
  }

  /**
   * {@inheritDoc}
   */
  @Override
  public void setEndPosition(int endPos) {
    set(CoreAnnotations.CharacterOffsetEndAnnotation.class, endPos);
  }

  public int copyCount() {
    Integer copy = get(CoreAnnotations.CopyAnnotation.class);
    if (copy == null)
      return 0;
    return copy;
  }

  public void setCopyCount(int count) {
    set(CoreAnnotations.CopyAnnotation.class, count);
  }

  /**
   * Tag separator to use by default
   */
  public static final String TAG_SEPARATOR = "/";

  public enum OutputFormat {
    VALUE_INDEX, VALUE, VALUE_TAG, VALUE_TAG_INDEX, MAP, VALUE_MAP, VALUE_INDEX_MAP, WORD, WORD_INDEX
  };

  public static final OutputFormat DEFAULT_FORMAT = OutputFormat.VALUE_INDEX;

  @Override
  public String toString() {
    return toString(DEFAULT_FORMAT);
  }

  /**
   * Returns a formatted string representing this label.  The
   * desired format is passed in as a <code>String</code>.
   * Currently supported formats include:
   * <ul>
   * <li>"value": just prints the value</li>
   * <li>"{map}": prints the complete map</li>
   * <li>"value{map}": prints the value followed by the contained
   * map (less the map entry containing key <code>CATEGORY_KEY</code>)</li>
   * <li>"value-index": extracts a value and an integer index from
   * the contained map using keys  <code>INDEX_KEY</code>,
   * respectively, and prints them with a hyphen in between</li>
   * <li>"value-tag"
   * <li>"value-tag-index"
   * <li>"value-index{map}": a combination of the above; the index is
   * displayed first and then not shown in the map that is displayed</li>
   * <li>"word": Just the value of HEAD_WORD_KEY in the map</li>
   * </ul>
   * <p/>
   * Map is printed in alphabetical order of keys.
   */
  @SuppressWarnings("unchecked")
  public String toString(OutputFormat format) {
    StringBuilder buf = new StringBuilder();
    switch(format) {
    case VALUE:
      buf.append(value());
      break;
    case MAP: {
      Map map2 = new TreeMap();
      for(Class key : this.keySet()) {
        map2.put(key.getName(), get(key));
      }
      buf.append(map2);
      break;
    }
    case VALUE_MAP: {
      buf.append(value());
      Map map2 = new TreeMap(asClassComparator);
      for(Class key : this.keySet()) {
        map2.put(key, get(key));
      }
      map2.remove(CoreAnnotations.ValueAnnotation.class);
      buf.append(map2);
      break;
    }
    case VALUE_INDEX: {
      buf.append(value());
      Integer index = this.get(CoreAnnotations.IndexAnnotation.class);
      if (index != null) {
        buf.append('-').append((index).intValue());
      }
      buf.append(toPrimes());
      break;
    }
    case VALUE_TAG: {
      buf.append(value());
      buf.append(toPrimes());
      String tag = tag();
      if (tag != null) {
        buf.append(TAG_SEPARATOR).append(tag);
      }
      break;
    }
    case VALUE_TAG_INDEX: {
      buf.append(value());
      String tag = tag();
      if (tag != null) {
        buf.append(TAG_SEPARATOR).append(tag);
      }
      Integer index = this.get(CoreAnnotations.IndexAnnotation.class);
      if (index != null) {
        buf.append('-').append((index).intValue());
      }
      buf.append(toPrimes());
      break;
    }
    case VALUE_INDEX_MAP: {
      buf.append(value());
      Integer index = this.get(CoreAnnotations.IndexAnnotation.class);
      if (index != null) {
        buf.append('-').append((index).intValue());
      }
      Map<String,Object> map2 = new TreeMap<String,Object>();
      for(Class key : this.keySet()) {
        String cls = key.getName();
        // special shortening of all the Annotation classes
        int idx = cls.indexOf('$');
        if (idx >= 0) {
          cls = cls.substring(idx + 1);
        }
        map2.put(cls, this.get(key));
      }
      map2.remove("IndexAnnotation");
      map2.remove("ValueAnnotation");
      if (!map2.isEmpty()) {
        buf.append(map2);
      }
      break;
    }
    case WORD:
      // TODO: we should unify word() and value()
      buf.append(word());
      break;
    case WORD_INDEX: {
      buf.append(this.get(CoreAnnotations.TextAnnotation.class));
      Integer index = this.get(CoreAnnotations.IndexAnnotation.class);
      if (index != null) {
        buf.append('-').append((index).intValue());
      }
      buf.append(toPrimes());
      break;
    }
    default:
      throw new IllegalArgumentException("Unknown format " + format);
    }
    return buf.toString();
  }

  public String toPrimes() {
    return StringUtils.repeat('\'', copyCount());
  }

  private static final Comparator<Class<?>> asClassComparator = new Comparator<Class<?>>() {
    @Override
    public int compare(Class<?> o1, Class<?> o2) {
      return o1.getName().compareTo(o2.getName());
    }
  };

}
TOP

Related Classes of edu.stanford.nlp.ling.CoreLabel

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.