Examples of edu.stanford.nlp.util.CoreMap

edu.stanford.nlp.util.CoreMap

Base type for all annotatable core objects. Should usually be instantiated as {@link ArrayCoreMap}. Many common key definitions live in {@link edu.stanford.nlp.ling.CoreAnnotations}, but others may be defined elsewhere. See {@link edu.stanford.nlp.ling.CoreAnnotations} for details.

Note that implementations of this interface must take care to implement equality correctly: by default, two CoreMaps are .equal if they contain the same keys and all corresponding values are .equal. Subclasses that wish to change this behavior (such as {@link HashableCoreMap}) must make sure that all other CoreMap implementations have a special case in their .equals to use that equality definition when appropriate. Similarly, care must be taken when defining hashcodes. The default hashcode is 37 * sum of all keys' hashcodes plus the sum of all values' hashcodes. However, use of this class as HashMap keys is discouraged because the hashcode can change over time. Consider using a {@link HashableCoreMap}.
@author dramage @author rafferty

   */
  public void testMultipleSentencesAnnotation() {
    List<CoreLabel> firstLabels = makeSentence(testSentences[0]);
    List<CoreLabel> secondLabels = makeSentence(testSentences[1]);


    CoreMap firstSentence = new ArrayCoreMap();
    firstSentence.set(CoreAnnotations.TokensAnnotation.class, firstLabels);
    CoreMap secondSentence = new ArrayCoreMap();
    secondSentence.set(CoreAnnotations.TokensAnnotation.class, secondLabels);
    List<CoreMap> sentences = new ArrayList<CoreMap>();
    sentences.add(firstSentence);
    sentences.add(secondSentence);


    Annotation annotation = new Annotation(longText);

View Full Code Here

    int offset = 0;
    List<CoreMap> sentences = new ArrayList<CoreMap>();
    Elements sentenceElements = textElem.getChildElements("SENT");
    for (int crtsent = 0; crtsent < sentenceElements.size(); crtsent ++){
      Element sentElem = sentenceElements.get(crtsent);
      CoreMap sentence = new ArrayCoreMap();
      sentence.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, offset);
      Tree tree = Tree.valueOf(sentElem.getChild(0).getValue()); // XXX ms: is this the same as sentElem.getText() in JDOM?
      List<CoreLabel> tokens = new ArrayList<CoreLabel>();
      List<Tree> preTerminals = preTerminals(tree);
      for (Tree preTerminal: preTerminals) {
        String posTag = preTerminal.value();
        for (Tree wordTree: preTerminal.children()) {
          String word = wordTree.value();
          CoreLabel token = new CoreLabel();
          token.set(CoreAnnotations.TextAnnotation.class, word);
          token.set(CoreAnnotations.TextAnnotation.class, word);
          token.set(CoreAnnotations.PartOfSpeechAnnotation.class, posTag);
          token.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, offset);
          offset += word.length();
          token.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, offset);
          text.append(word);
          text.append(' ');
          offset += 1;
          tokens.add(token);
        }
      }
      if (preTerminals.size() > 0) {
        text.setCharAt(text.length() - 1, '\n');
      }
      sentence.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, offset - 1);
      sentence.set(CoreAnnotations.TokensAnnotation.class, tokens);
      sentence.set(TreeCoreAnnotations.TreeAnnotation.class, tree);
      sentences.add(sentence);
    }


    String docID = docElem.getAttributeValue("id");
    Matcher matcher = datePattern.matcher(docID);

View Full Code Here


  public static Annotation makeAnnotation(String ... testText) {
    List<CoreMap> sentences = new ArrayList<CoreMap>();
    for (String text : testText) {
      List<CoreLabel> labels = makeSentence(text);      
      CoreMap sentence = new ArrayCoreMap();
      sentence.set(CoreAnnotations.TokensAnnotation.class, labels);
      sentences.add(sentence);
    }
    Annotation annotation = new Annotation(StringUtils.join(testText));
    annotation.set(CoreAnnotations.SentencesAnnotation.class, sentences);
    return annotation;

View Full Code Here

    Annotation doc = new Annotation(text);
    pipe.annotate(doc);


    assertTrue(doc.get(CoreAnnotations.SentencesAnnotation.class) != null);
    assertTrue(doc.get(CoreAnnotations.SentencesAnnotation.class).size() > 0);
    CoreMap sent = doc.get(CoreAnnotations.SentencesAnnotation.class).get(0);
    assertTrue(sent.get(CoreAnnotations.TokensAnnotation.class) != null);
    List<CoreLabel> tokens = sent.get(CoreAnnotations.TokensAnnotation.class);
    if(VERBOSE){
      for(CoreLabel token: tokens) {
        System.out.println("\t" + token.word() + " " + 
            token.tag() + " " + 
            token.ner() + " " +

View Full Code Here

      }
    }
  }
  
  private static int beginOffset(Tree tree, List<CoreLabel> tokens) {
    CoreMap label = (CoreMap)tree.label();
    int beginToken = label.get(CoreAnnotations.BeginIndexAnnotation.class);
    return beginOffset(tokens.get(beginToken));
  }

View Full Code Here

        baseCharOffset = 0;
      }


      chunkOffsets = ChunkAnnotationUtils.getChunkOffsetsUsingCharOffsets((List<? extends CoreMap>) sourceAnnotation.get(tokensAnnotationKey),
              charOffsets.getBegin() + baseCharOffset, charOffsets.getEnd()  + baseCharOffset);
      CoreMap annotation2 = ChunkAnnotationUtils.getMergedChunk((List<? extends CoreMap>) sourceAnnotation.get(tokensAnnotationKey),
              chunkOffsets.getBegin(), chunkOffsets.getEnd(), aggregators );


      annotation = ChunkAnnotationUtils.getAnnotatedChunkUsingCharOffsets(sourceAnnotation, charOffsets.getBegin(), charOffsets.getEnd());
      tokenOffsets = Interval.toInterval(annotation.get(CoreAnnotations.TokenBeginAnnotation.class),
              annotation.get(CoreAnnotations.TokenEndAnnotation.class), Interval.INTERVAL_OPEN_END);
      annotation.set(tokensAnnotationKey, annotation2.get(tokensAnnotationKey));
    }
    text = annotation.get(CoreAnnotations.TextAnnotation.class);
    extractFunc.annotate(this, (List<? extends CoreMap>) annotation.get(tokensAnnotationKey));
    return true;
  }

View Full Code Here

    int beginToken = label.get(CoreAnnotations.BeginIndexAnnotation.class);
    return beginOffset(tokens.get(beginToken));
  }
  
  private static int endOffset(Tree tree, List<CoreLabel> tokens) {
    CoreMap label = (CoreMap)tree.label();
    int endToken = label.get(CoreAnnotations.EndIndexAnnotation.class);
    if (endToken > tokens.size()) {
      String msg = "no token %d in tree:\n%s\ntokens:\n%s";
      throw new RuntimeException(String.format(msg, endToken - 1, tree, tokens));
    }
    return endOffset(tokens.get(endToken - 1));

View Full Code Here

    for (MatchedExpression expr:matchedExprs) {
      int start = expr.chunkOffsets.first();
      int end = expr.chunkOffsets.second();
      if (start >= last) {
        merged.addAll(list.subList(last,start));
        CoreMap m = expr.getAnnotation();
        merged.add(m);
        last = end;
      }
    }
    // Add rest of elements

View Full Code Here

  {
    if (matchedExprs == null) return list;
    Map<Integer, Integer> tokenBeginToListIndexMap = new HashMap<Integer, Integer>();//Generics.newHashMap();
    Map<Integer, Integer> tokenEndToListIndexMap = new HashMap<Integer, Integer>();//Generics.newHashMap();
    for (int i = 0; i < list.size(); i++) {
      CoreMap cm = list.get(i);
      if (cm.has(CoreAnnotations.TokenBeginAnnotation.class) && cm.has(CoreAnnotations.TokenEndAnnotation.class)) {
        tokenBeginToListIndexMap.put(cm.get(CoreAnnotations.TokenBeginAnnotation.class), i);
        tokenEndToListIndexMap.put(cm.get(CoreAnnotations.TokenEndAnnotation.class), i+1);
      } else {
        tokenBeginToListIndexMap.put(i, i);
        tokenEndToListIndexMap.put(i+1, i+1);
      }
    }
    Collections.sort(matchedExprs, EXPR_TOKEN_OFFSET_COMPARATOR);
    List<CoreMap> merged = new ArrayList<CoreMap>(list.size());   // Approximate size
    int last = 0;
    for (MatchedExpression expr:matchedExprs) {
      int start = expr.tokenOffsets.first();
      int end = expr.tokenOffsets.second();
      Integer istart = tokenBeginToListIndexMap.get(start);
      Integer iend = tokenEndToListIndexMap.get(end);
      if (istart != null && iend != null) {
        if (istart >= last) {
          merged.addAll(list.subList(last,istart));
          CoreMap m = expr.getAnnotation();
          merged.add(m);
          last = iend;
        }
      }
    }

View Full Code Here

        .extractExpressions(sentence);
      for (MatchedExpression matched:matchedExpressions) {
        // Print out matched text and value
        out.println("matched: " + matched.getText() + " with value " + matched.getValue());
        // Print out token information
        CoreMap cm = matched.getAnnotation();
        for (CoreLabel token : cm.get(CoreAnnotations.TokensAnnotation.class)) {
          String word = token.get(CoreAnnotations.TextAnnotation.class);
          String lemma = token.get(CoreAnnotations.LemmaAnnotation.class);
          String pos = token.get(CoreAnnotations.PartOfSpeechAnnotation.class);
          String ne = token.get(CoreAnnotations.NamedEntityTagAnnotation.class);
          out.println("matched token: " + "word="+word + ", lemma="+lemma + ", pos=" + pos + ", ne=" + ne);

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of edu.stanford.nlp.util.CoreMap

edu.stanford.nlp.dcoref.ACEMentionExtractor

edu.stanford.nlp.dcoref.CoNLLMentionExtractor

edu.stanford.nlp.dcoref.Document

edu.stanford.nlp.dcoref.MUCMentionExtractor

edu.stanford.nlp.dcoref.RuleBasedCorefMentionFinder

edu.stanford.nlp.dcoref.SieveCoreferenceSystem

edu.stanford.nlp.ie.crf.TestSequenceModel

edu.stanford.nlp.ie.machinereading.BasicRelationFeatureFactory

edu.stanford.nlp.ie.machinereading.domains.ace.AceReader

edu.stanford.nlp.ie.machinereading.EntityExtractorResultsPrinter

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.