Examples of edu.stanford.nlp.ling.CoreLabel

edu.stanford.nlp.ling.CoreLabel
A CoreLabel represents a single word with ancillary information attached using CoreAnnotations. If the proper annotations are set, the CoreLabel also provides convenient methods to access tags, lemmas, etc.
A CoreLabel is a Map from keys (which are Class objects) to values, whose type is determined by the key. That is, it is a heterogeneous typesafe Map (see Josh Bloch, Effective Java, 2nd edition).
The CoreLabel class in particular bridges the gap between old-style JavaNLP Labels and the new CoreMap infrastructure. Instances of this class can be used (almost) anywhere that the now-defunct FeatureLabel family could be used. This data structure is backed by an {@link ArrayCoreMap}. @author dramage @author rafferty

   * @param tokens The list of tokens representing this sentence.
   * @return The original text of the sentence.
   */
  protected String recoverOriginalText(List<CoreLabel> tokens, CoreNLPProtos.Sentence sentence) {
    StringBuilder text = new StringBuilder();
    CoreLabel last = null;
    if (tokens.size() > 0) {
      CoreLabel token = tokens.get(0);
      if (token.originalText() != null) { text.append(token.originalText()); } else { text.append(token.word()); }
      last = tokens.get(0);
    }
    for (int i = 1; i < tokens.size(); ++i) {
      CoreLabel token = tokens.get(i);
      if (token.before() != null) {
        text.append(token.before());
        assert last != null;
        int missingWhitespace = (token.beginPosition() - last.endPosition()) - token.before().length();
        while (missingWhitespace > 0) {
          text.append(' ');
          missingWhitespace -= 1;
        }
      }
      if (token.originalText() != null) { text.append(token.originalText()); } else { text.append(token.word()); }
      last = token;
    }
    return text.toString();
  }

View Full Code Here

        assertEquals("Wrong number of sentences", 1, doc.size());
        List<CoreLabel> tokens = doc.get(0);
        assertEquals("Wrong number of tokens", offsets.length, tokens.size());


        for (int j = 0, sz = tokens.size(); j < sz; j++) {
          CoreLabel token = tokens.get(j);
          assertEquals("Wrong begin offset", offsets[j][0], (int) token.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class));
          assertEquals("Wrong end offset", offsets[j][1], (int) token.get(CoreAnnotations.CharacterOffsetEndAnnotation.class));
        }
      }
    }
  }

View Full Code Here

  public static State initialStateFromTaggedSentence(List<? extends HasWord> words) {
    List<Tree> preterminals = Generics.newArrayList();
    for (int index = 0; index < words.size(); ++index) {
      HasWord hw = words.get(index);


      CoreLabel wordLabel;
      String tag;
      if (hw instanceof CoreLabel) {
        wordLabel = (CoreLabel) hw;
        tag = wordLabel.tag();
        CoreLabel cl = (CoreLabel) hw;
      } else {
        wordLabel = new CoreLabel();
        wordLabel.setValue(hw.word());
        wordLabel.setWord(hw.word());
        if (!(hw instanceof HasTag)) {
          throw new IllegalArgumentException("Expected tagged words");
        }
        tag = ((HasTag) hw).tag();
        wordLabel.setTag(tag);
      }
      if (tag == null) {
        throw new IllegalArgumentException("Input word not tagged");
      }
      CoreLabel tagLabel = new CoreLabel();
      tagLabel.setValue(tag);


      // Index from 1.  Tools downstream from the parser expect that
      // Internally this parser uses the index, so we have to
      // overwrite incorrect indices if the label is already indexed
      wordLabel.setIndex(index + 1);
      tagLabel.setIndex(index + 1);


      LabeledScoredTreeNode wordNode = new LabeledScoredTreeNode(wordLabel);
      LabeledScoredTreeNode tagNode = new LabeledScoredTreeNode(tagLabel);
      tagNode.addChild(wordNode);


      wordLabel.set(TreeCoreAnnotations.HeadWordAnnotation.class, wordNode);
      wordLabel.set(TreeCoreAnnotations.HeadTagAnnotation.class, tagNode);
      tagLabel.set(TreeCoreAnnotations.HeadWordAnnotation.class, wordNode);
      tagLabel.set(TreeCoreAnnotations.HeadTagAnnotation.class, tagNode);


      preterminals.add(tagNode);
    }
    return new State(preterminals);
  }

View Full Code Here


    return featuresCpC;
  }


  protected Collection<String> featuresCp2C(PaddedList<IN> cInfo, int loc) {
    CoreLabel c = cInfo.get(loc);
    CoreLabel p = cInfo.get(loc - 1);
    CoreLabel p2 = cInfo.get(loc - 2);


    String cWord = getWord(c);
    String pWord = getWord(p);
    String p2Word = getWord(p2);
    Collection<String> featuresCp2C = new ArrayList<String>();


    if (flags.useMoreAbbr) {
      featuresCp2C.add(p2.get(CoreAnnotations.AbbrAnnotation.class) + '-' + c.get(CoreAnnotations.AbbrAnnotation.class) + "-P2ABBRANS");
    }


    if (flags.useMinimalAbbr) {
      featuresCp2C.add(p2.get(CoreAnnotations.AbbrAnnotation.class) + '-' + c.get(CoreAnnotations.AbbrAnnotation.class) + "-P2AP2CABB");
    }


    if (flags.useMinimalAbbr1) {
      if (!c.get(CoreAnnotations.AbbrAnnotation.class).equals("XX")) {
        featuresCp2C.add(p2.get(CoreAnnotations.AbbrAnnotation.class) + '-' + c.get(CoreAnnotations.AbbrAnnotation.class) + "-P2AP2CABB");
      }
    }


    if (flags.useParenMatching) {
      if (flags.useReverse) {

View Full Code Here


    return featuresCp2C;
  }


  protected Collection<String> featuresCp3C(PaddedList<IN> cInfo, int loc) {
    CoreLabel c = cInfo.get(loc);
    CoreLabel p = cInfo.get(loc - 1);
    CoreLabel p2 = cInfo.get(loc - 2);
    CoreLabel p3 = cInfo.get(loc - 3);


    String cWord = getWord(c);
    String pWord = getWord(p);
    String p2Word = getWord(p2);
    String p3Word = getWord(p3);

View Full Code Here


    return featuresCp3C;
  }


  protected Collection<String> featuresCp4C(PaddedList<IN> cInfo, int loc) {
    CoreLabel c = cInfo.get(loc);
    CoreLabel p = cInfo.get(loc - 1);
    CoreLabel p2 = cInfo.get(loc - 2);
    CoreLabel p3 = cInfo.get(loc - 3);
    CoreLabel p4 = cInfo.get(loc - 4);


    String cWord = getWord(c);
    String pWord = getWord(p);
    String p2Word = getWord(p2);
    String p3Word = getWord(p3);

View Full Code Here


    return featuresCp4C;
  }


  protected Collection<String> featuresCp5C(PaddedList<IN> cInfo, int loc) {
    CoreLabel c = cInfo.get(loc);
    CoreLabel p = cInfo.get(loc - 1);
    CoreLabel p2 = cInfo.get(loc - 2);
    CoreLabel p3 = cInfo.get(loc - 3);
    CoreLabel p4 = cInfo.get(loc - 4);
    CoreLabel p5 = cInfo.get(loc - 5);


    String cWord = getWord(c);
    String pWord = getWord(p);
    String p2Word = getWord(p2);
    String p3Word = getWord(p3);

View Full Code Here

    return featuresCp5C;
  }




  protected Collection<String> featuresCpCp2C(PaddedList<IN> cInfo, int loc) {
    CoreLabel c = cInfo.get(loc);
    CoreLabel p = cInfo.get(loc - 1);
    CoreLabel p2 = cInfo.get(loc - 2);


    String pWord = getWord(p);
    // String p2Word = getWord(p2);


    Collection<String> featuresCpCp2C = new ArrayList<String>();


    if (flags.useInternal && flags.useExternal) {


      if (flags.strictGoodCoNLL && ! flags.removeStrictGoodCoNLLDuplicates && flags.useTypeySequences && flags.maxLeft >= 2) {
        // this feature duplicates -TYPETYPES below, so probably don't include it, but it was in original tests of CMM goodCoNLL
        featuresCpCp2C.add(p2.get(CoreAnnotations.ShapeAnnotation.class) + '-' + p.get(CoreAnnotations.ShapeAnnotation.class) + '-' + c.get(CoreAnnotations.ShapeAnnotation.class) + "-TTPS");
      }


      if (flags.useAbbr) {
        featuresCpCp2C.add(p2.get(CoreAnnotations.AbbrAnnotation.class) + '-' + p.get(CoreAnnotations.AbbrAnnotation.class) + '-' + c.get(CoreAnnotations.AbbrAnnotation.class) + "-2PABBRANS");
      }


      if (flags.useChunks) {
        featuresCpCp2C.add(p2.get(CoreAnnotations.ChunkAnnotation.class) + '-' + p.get(CoreAnnotations.ChunkAnnotation.class) + '-' + c.get(CoreAnnotations.ChunkAnnotation.class) + "-2PCHUNKS");
      }


      if (flags.useLongSequences) {
        featuresCpCp2C.add("PPSEQ");
      }
      if (flags.useBoundarySequences && pWord.equals(CoNLLDocumentReaderAndWriter.BOUNDARY)) {
        featuresCpCp2C.add("BNDRY-SPAN-PPSEQ");
      }
      // This more complex consistency checker didn't help!
      // if (flags.useBoundarySequences) {
      //   // try enforce consistency over "and" and "," as well as boundary
      //   if (pWord.equals(CoNLLDocumentIteratorFactory.BOUNDARY) ||
      //       pWord.equalsIgnoreCase("and") || pWord.equalsIgnoreCase("or") ||
      //       pWord.equals(",")) {
      //   }
      // }


      if (flags.useTaggySequences) {
        if (flags.useTags) {
          featuresCpCp2C.add(p2.getString(CoreAnnotations.PartOfSpeechAnnotation.class) + '-' + p.getString(CoreAnnotations.PartOfSpeechAnnotation.class) + '-' + c.getString(CoreAnnotations.PartOfSpeechAnnotation.class) + "-TTS");
          if (flags.useTaggySequencesShapeInteraction) {
            featuresCpCp2C.add(p2.getString(CoreAnnotations.PartOfSpeechAnnotation.class) + '-' + p.getString(CoreAnnotations.PartOfSpeechAnnotation.class) + '-' + c.getString(CoreAnnotations.PartOfSpeechAnnotation.class) + '-' + c.get(CoreAnnotations.ShapeAnnotation.class) + "-TTS-CS");
          }
        }
        if (flags.useDistSim) {
          featuresCpCp2C.add(p2.get(CoreAnnotations.DistSimAnnotation.class) + '-' + p.get(CoreAnnotations.DistSimAnnotation.class) + '-' + c.get(CoreAnnotations.DistSimAnnotation.class) + "-DISTSIM_TTS1");
          if (flags.useTaggySequencesShapeInteraction) {
            featuresCpCp2C.add(p2.get(CoreAnnotations.DistSimAnnotation.class) + '-' + p.get(CoreAnnotations.DistSimAnnotation.class) + '-' + c.get(CoreAnnotations.DistSimAnnotation.class) + '-' + c.get(CoreAnnotations.ShapeAnnotation.class) + "-DISTSIM_TTS1-CS");
          }
        }
      }


      if (((flags.wordShape > WordShapeClassifier.NOWORDSHAPE) ||
           flags.useShapeStrings)
          && flags.useTypeSeqs && flags.useTypeSeqs2 && flags.maxLeft >= 2) {
        String cShape = c.get(CoreAnnotations.ShapeAnnotation.class);
        String pShape = p.get(CoreAnnotations.ShapeAnnotation.class);
        String p2Shape = p2.get(CoreAnnotations.ShapeAnnotation.class);
        featuresCpCp2C.add(p2Shape + '-' + pShape + '-' + cShape + "-TYPETYPES");
      }
    } else if (flags.useInternal) {


      if (flags.useLongSequences) {
        featuresCpCp2C.add("PPSEQ");
      }
    } else if (flags.useExternal) {


      if (flags.useLongSequences) {
        featuresCpCp2C.add("PPSEQ");
      }


      if (((flags.wordShape > WordShapeClassifier.NOWORDSHAPE) ||
           flags.useShapeStrings)
          && flags.useTypeSeqs && flags.useTypeSeqs2 && flags.maxLeft >= 2) {
        String cShape = c.get(CoreAnnotations.ShapeAnnotation.class);
        String pShape = p.get(CoreAnnotations.ShapeAnnotation.class);
        String p2Shape = p2.get(CoreAnnotations.ShapeAnnotation.class);
        featuresCpCp2C.add(p2Shape + '-' + pShape + '-' + cShape + "-TYPETYPES");
      }
    }


    return featuresCpCp2C;

View Full Code Here

          } else if (endMatcher.matches()) {
            //System.out.println("matched end");
            label = backgroundSymbol;
          } else {


            CoreLabel c = new CoreLabel();


            List<String> toks = new ArrayList<String>();


            toks.add(tok);


            for (String toksplit : toks) {


              sentStr += " " + toksplit;


              c.setWord(toksplit);
              c.setLemma(toksplit);
              c.setValue(toksplit);
              c.set(CoreAnnotations.TextAnnotation.class, toksplit);
              c.set(CoreAnnotations.OriginalTextAnnotation.class, tok);


              if (setGoldClass){
                 
                c.set(CoreAnnotations.GoldAnswerAnnotation.class, label);
              }
              
              if (setClassForTheseLabels != null
                  && setClassForTheseLabels.containsKey(label))
                c.set(setClassForTheseLabels.get(label), label);


              sent.add(c);
            }
          }
        }

View Full Code Here


    if (flags.lowerNewgeneThreshold) {
      // Used to raise recall for task 1B
      System.err.println("Using NEWGENE threshold: " + flags.newgeneThreshold);
      for (int i = 0, docSize = document.size(); i < docSize; i++) {
        CoreLabel wordInfo = document.get(i);
        Datum<String, String> d = makeDatum(document, i, featureFactories);
        Counter<String> scores = classifier.scoresOf(d);
        //String answer = BACKGROUND;
        String answer = flags.backgroundSymbol;
        // HN: The evaluation of scoresOf seems to result in some
        // kind of side effect.  Specifically, the symptom is that
        // if scoresOf is not evaluated at every position, the
        // answers are different
        if ("NEWGENE".equals(wordInfo.get(CoreAnnotations.GazAnnotation.class))) {
          for (String label : scores.keySet()) {
            if ("G".equals(label)) {
              System.err.println(wordInfo.word() + ':' + scores.getCount(label));
              if (scores.getCount(label) > flags.newgeneThreshold) {
                answer = label;
              }
            }
          }
        }
        wordInfo.set(CoreAnnotations.AnswerAnnotation.class, answer);
      }
    } else {
      for (int i = 0, listSize = document.size(); i < listSize; i++) {
        String answer = classOf(document, i);
        CoreLabel wordInfo = document.get(i);
        //System.err.println("XXX answer for " +
        //        wordInfo.word() + " is " + answer);
        wordInfo.set(CoreAnnotations.AnswerAnnotation.class, answer);
      }
      if (flags.justify && (classifier instanceof LinearClassifier)) {
        LinearClassifier<String, String> lc = (LinearClassifier<String, String>) classifier;
        for (int i = 0, lsize = document.size(); i < lsize; i++) {
          CoreLabel lineInfo = document.get(i);
          System.err.print("@@ Position " + i + ": ");
          System.err.println(lineInfo.word() + " chose " + lineInfo.get(CoreAnnotations.AnswerAnnotation.class));
          lc.justificationOf(makeDatum(document, i, featureFactories));
        }
      }
    }
    if (flags.useReverse) {

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of edu.stanford.nlp.ling.CoreLabel

edu.arizona.sista.processors.bionlp.BioNLPTokenizer

edu.stanford.nlp.dcoref.Mention

edu.stanford.nlp.dcoref.MentionExtractor

edu.stanford.nlp.dcoref.RuleBasedCorefMentionFinder

edu.stanford.nlp.ie.AbstractSequenceClassifierTest

edu.stanford.nlp.ie.machinereading.GenericDataSetReader

edu.stanford.nlp.ie.ner.CMMClassifier

edu.stanford.nlp.ie.ner.CMMClassifier$Scorer

edu.stanford.nlp.ie.NERClassifierCombiner

edu.stanford.nlp.ie.NERFeatureFactory

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.