Package edu.stanford.nlp.ling

Examples of edu.stanford.nlp.ling.CoreLabel


      // wsg (Feb. 2010) - More hacks for trees with CoreLabels in which the type implements
      // HasWord but only the value field is populated. This can happen if legacy code uses
      // LabeledScoredTreeFactory but passes in a StringLabel to e.g. newLeaf().
      if (lab instanceof HasWord) {
        if(lab instanceof CoreLabel) {
          CoreLabel cl = (CoreLabel) lab;
          if(cl.word() == null)
            cl.setWord(cl.value());
          y.add((X) cl);
        } else {
          y.add((X) lab);
        }
View Full Code Here


  /**
   * see merge(CoreMap base, CoreMap toBeMerged)
   */
  public static CoreLabel merge(CoreLabel base, CoreLabel toBeMerged){
    //(variables)
    CoreLabel rtn = new CoreLabel(base.size());
    //(copy base)
    for(Class key : base.keySet()){
      rtn.set(key,base.get(key));
    }
    //(merge)
    for(Class key : toBeMerged.keySet()){
      rtn.set(key,toBeMerged.get(key));
    }
    //(return)
    return rtn;
  }
View Full Code Here

    return ty;
  }

  private int taggedLabeledYield(List<CoreLabel> ty, int termIdx) {
    if(isPreTerminal()) {
      CoreLabel taggedWord = new CoreLabel();
      final String tag = (value() == null) ? "" : value();
      taggedWord.setValue(tag);
      taggedWord.setTag(tag);
      taggedWord.setIndex(termIdx);
      taggedWord.setWord(firstChild().value());
      ty.add(taggedWord);

      return termIdx + 1;

    } else {
View Full Code Here

  public static boolean fixTokenOffsets(CoreMap docAnnotation)
  {
    List<CoreLabel> docTokens = docAnnotation.get(CoreAnnotations.TokensAnnotation.class);
    List<CoreMap> sentences = docAnnotation.get(CoreAnnotations.SentencesAnnotation.class);
    int i = 0;
    CoreLabel curDocToken = docTokens.get(0);
    for (CoreMap sentence:sentences) {
      List<CoreLabel> sentTokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
      CoreLabel sentTokenFirst = sentTokens.get(0);
      while (curDocToken != sentTokenFirst) {
        i++;
        if (i >= docTokens.size()) { return false; }
        curDocToken = docTokens.get(i);
      }
      int sentTokenBegin = i;
      CoreLabel sentTokenLast = sentTokens.get(sentTokens.size()-1);
      while (curDocToken != sentTokenLast) {
        i++;
        if (i >= docTokens.size()) { return false; }
        curDocToken = docTokens.get(i);
      }
View Full Code Here

    // First identify any tokens that need to be fixed
    String text = docAnnotation.get(CoreAnnotations.TextAnnotation.class);
    List<CoreLabel> tokens = docAnnotation.get(CoreAnnotations.TokensAnnotation.class);
    List<CoreLabel> output = new ArrayList<CoreLabel>(tokens.size());
    int i = 0;
    CoreLabel token = tokens.get(i);
    for (IntPair offsets:chunkCharOffsets) {
      assert(token.beginPosition() >= 0);
      assert(token.endPosition() >= 0);
      int offsetBegin = offsets.getSource();
      int offsetEnd = offsets.getTarget();
      // Find tokens where token begins after chunk starts
      // and token ends after chunk starts
      while (offsetBegin < token.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class)
              || offsetBegin >= token.get(CoreAnnotations.CharacterOffsetEndAnnotation.class)) {
        output.add(token);
        i++;
        if (i >= tokens.size()) { return false; }
        token = tokens.get(i);
      }
      // offsetBegin is now >= token begin and < token end
      // go until we find a token that starts after our chunk has ended
      while (offsetEnd > token.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class)) {
        // Check if chunk includes token
        if (offsetBegin > token.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class)) {
          // Chunk starts in the middle of the token
          if (offsetEnd < token.get(CoreAnnotations.CharacterOffsetEndAnnotation.class)) {
            output.add(tokenFactory.makeToken(text.substring(token.beginPosition(), offsetBegin),
                    token.beginPosition(), offsetBegin-token.beginPosition()));
            output.add(tokenFactory.makeToken(text.substring(offsetBegin,offsetEnd),
                    offsetBegin, offsetEnd-offsetBegin));
            output.add(tokenFactory.makeToken(text.substring(offsetEnd,token.endPosition()),
                    offsetEnd, token.endPosition()-offsetEnd));
          } else {
            output.add(tokenFactory.makeToken(text.substring(token.beginPosition(), offsetBegin),
                    token.beginPosition(), offsetBegin-token.beginPosition()));
            output.add(tokenFactory.makeToken(text.substring(offsetBegin,token.endPosition()),
                    offsetBegin, token.endPosition()-offsetBegin));
          }
        } else if (offsetEnd < token.get(CoreAnnotations.CharacterOffsetEndAnnotation.class)) {
          output.add(tokenFactory.makeToken(text.substring(token.beginPosition(),offsetEnd),
                  token.beginPosition(), offsetEnd-token.beginPosition()));
          output.add(tokenFactory.makeToken(text.substring(offsetEnd,token.endPosition()), offsetEnd,
                  token.endPosition()-offsetEnd));
        } else {
          // success!  chunk contains token
          output.add(token);
        }
        i++;
View Full Code Here

   * from the enclosing document, and are not tracked in the protobuf.
   * @param proto The serialized protobuf to read the CoreLabel from.
   * @return A CoreLabel, missing the fields that are not stored in the CoreLabel protobuf.
   */
  public CoreLabel fromProto(CoreNLPProtos.Token proto) {
    CoreLabel word = new CoreLabel();
    // Required fields
    word.setWord(proto.getWord());
    // Optional fields
    if (proto.hasPos()) { word.setTag(proto.getPos()); }
    if (proto.hasValue()) { word.setValue(proto.getValue()); }
    if (proto.hasCategory()) { word.setCategory(proto.getCategory()); }
    if (proto.hasBefore()) { word.setBefore(proto.getBefore()); }
    if (proto.hasAfter()) { word.setAfter(proto.getAfter()); }
    if (proto.hasOriginalText()) { word.setOriginalText(proto.getOriginalText()); }
    if (proto.hasNer()) { word.setNER(proto.getNer()); }
    if (proto.hasLemma()) { word.setLemma(proto.getLemma()); }
    if (proto.hasBeginChar()) { word.setBeginPosition(proto.getBeginChar()); }
    if (proto.hasEndChar()) { word.setEndPosition(proto.getEndChar()); }
    if (proto.hasSpeaker()) { word.set(SpeakerAnnotation.class, proto.getSpeaker()); }
    if (proto.hasUtterance()) { word.set(UtteranceAnnotation.class, proto.getUtterance()); }
    if (proto.hasBeginIndex()) { word.set(BeginIndexAnnotation.class, proto.getBeginIndex()); }
    if (proto.hasEndIndex()) { word.set(EndIndexAnnotation.class, proto.getEndIndex()); }
    if (proto.hasTokenBeginIndex()) { word.set(TokenBeginAnnotation.class, proto.getTokenBeginIndex()); }
    if (proto.hasTokenEndIndex()) { word.set(TokenEndAnnotation.class, proto.getTokenEndIndex()); }
    if (proto.hasNormalizedNER()) { word.set(NormalizedNamedEntityTagAnnotation.class, proto.getNormalizedNER()); }
    if (proto.hasTimexValue()) { word.set(TimexAnnotation.class, fromProto(proto.getTimexValue())); }
    if (proto.hasHasXmlContext() && proto.getHasXmlContext()) { word.set(XmlContextAnnotation.class, proto.getXmlContextList()); }
    if (proto.hasCorefClusterID()) { word.set(CorefClusterIdAnnotation.class, proto.getCorefClusterID()); }
    if (proto.hasAnswer()) { word.set(AnswerAnnotation.class, proto.getAnswer()); }
    // Non-default annotators
    if (proto.hasGender()) { word.set(GenderAnnotation.class, proto.getGender()); }
    if (proto.hasTrueCase()) { word.set(TrueCaseAnnotation.class, proto.getTrueCase()); }
    if (proto.hasTrueCaseText()) { word.set(TrueCaseTextAnnotation.class, proto.getTrueCaseText()); }
    // Return
    return word;
  }
View Full Code Here

        while (sentence.hasTokenOffsetBegin() && tokens.size() < sentence.getTokenOffsetBegin()) {
          tokens.add(null);
        }
        // Read the sentence
        for (CoreNLPProtos.Token token : sentence.getTokenList()) {
          CoreLabel coreLabel = fromProto(token);
          // Set docid
          if (proto.hasDocID()) { coreLabel.setDocID(proto.getDocID()); }
          if (token.hasTokenBeginIndex() && token.hasTokenEndIndex()) {
            // This is usually true, if enough annotators are defined
            while (tokens.size() < sentence.getTokenOffsetEnd()) {
              tokens.add(null);
            }
            for (int i = token.getTokenBeginIndex(); i < token.getTokenEndIndex(); ++i) {
              tokens.set(token.getTokenBeginIndex(), coreLabel);
            }
          } else {
            // Assume this token spans a single token, and just add it to the tokens list
            tokens.add(coreLabel);
          }
        }
      }
    } else if (proto.getSentencelessTokenCount() > 0) {
      // Eek -- no sentences. Try to recover tokens directly
      if (proto.getSentencelessTokenCount() > 0) {
        for (CoreNLPProtos.Token token : proto.getSentencelessTokenList()) {
          CoreLabel coreLabel = fromProto(token);
          // Set docid
          if (proto.hasDocID()) { coreLabel.setDocID(proto.getDocID()); }
          tokens.add(coreLabel);
        }
      }
    }
    if (!tokens.isEmpty()) { ann.set(TokensAnnotation.class, tokens); }
View Full Code Here

   */
  public Tree fromProto(CoreNLPProtos.ParseTree proto) {
    LabeledScoredTreeNode node = new LabeledScoredTreeNode();
    // Set label
    if (proto.hasValue()) {
      CoreLabel value = new CoreLabel();
      value.setCategory(proto.getValue());
      value.setValue(proto.getValue());
      node.setLabel(value);
      // Set span
      if (proto.hasYieldBeginIndex() && proto.hasYieldEndIndex()) {
        IntPair span = new IntPair(proto.getYieldBeginIndex(), proto.getYieldEndIndex());
        value.set(SpanAnnotation.class, span);
      }
    }
    // Set score
    if (proto.hasScore()) { node.setScore(proto.getScore()); }
    // Set children
View Full Code Here

  public UnnamedDependency(String regent, String dependent) {
    if (regent == null || dependent == null) {
      throw new IllegalArgumentException("governor or dependent cannot be null");
    }
   
    CoreLabel headLabel = new CoreLabel();
    headLabel.setValue(regent);
    headLabel.setWord(regent);
    this.regent = headLabel;
   
    CoreLabel depLabel = new CoreLabel();
    depLabel.setValue(dependent);
    depLabel.setWord(dependent);
    this.dependent = depLabel;

    regentText = regent;
    dependentText = dependent;
  }
View Full Code Here

      min = in.getIndex() < min ? in.getIndex() : min;
      max = in.getIndex() > max ? in.getIndex() : max;
    }
    TwoDimensionalMap<Integer, Integer, IndexedWord> nodes = TwoDimensionalMap.hashMap();
    for(CoreNLPProtos.DependencyGraph.Node in: proto.getNodeList()){
      CoreLabel token = sentence.get(in.getIndex() - 1); // index starts at 1!
      IndexedWord word;
      if (in.hasCopyAnnotation() && in.getCopyAnnotation() > 0) {
        // TODO: if we make a copy wrapper CoreLabel, use it here instead
        word = new IndexedWord(new CoreLabel(token));
        word.set(CopyAnnotation.class, in.getCopyAnnotation());
      } else {
        word = new IndexedWord(token);
      }
View Full Code Here

TOP

Related Classes of edu.stanford.nlp.ling.CoreLabel

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.