Package edu.stanford.nlp.util

Examples of edu.stanford.nlp.util.CoreMap


   */
  public void testMultipleSentencesAnnotation() {
    List<CoreLabel> firstLabels = makeSentence(testSentences[0]);
    List<CoreLabel> secondLabels = makeSentence(testSentences[1]);

    CoreMap firstSentence = new ArrayCoreMap();
    firstSentence.set(CoreAnnotations.TokensAnnotation.class, firstLabels);
    CoreMap secondSentence = new ArrayCoreMap();
    secondSentence.set(CoreAnnotations.TokensAnnotation.class, secondLabels);
    List<CoreMap> sentences = new ArrayList<CoreMap>();
    sentences.add(firstSentence);
    sentences.add(secondSentence);

    Annotation annotation = new Annotation(longText);
View Full Code Here


    int offset = 0;
    List<CoreMap> sentences = new ArrayList<CoreMap>();
    Elements sentenceElements = textElem.getChildElements("SENT");
    for (int crtsent = 0; crtsent < sentenceElements.size(); crtsent ++){
      Element sentElem = sentenceElements.get(crtsent);
      CoreMap sentence = new ArrayCoreMap();
      sentence.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, offset);
      Tree tree = Tree.valueOf(sentElem.getChild(0).getValue()); // XXX ms: is this the same as sentElem.getText() in JDOM?
      List<CoreLabel> tokens = new ArrayList<CoreLabel>();
      List<Tree> preTerminals = preTerminals(tree);
      for (Tree preTerminal: preTerminals) {
        String posTag = preTerminal.value();
        for (Tree wordTree: preTerminal.children()) {
          String word = wordTree.value();
          CoreLabel token = new CoreLabel();
          token.set(CoreAnnotations.TextAnnotation.class, word);
          token.set(CoreAnnotations.TextAnnotation.class, word);
          token.set(CoreAnnotations.PartOfSpeechAnnotation.class, posTag);
          token.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, offset);
          offset += word.length();
          token.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, offset);
          text.append(word);
          text.append(' ');
          offset += 1;
          tokens.add(token);
        }
      }
      if (preTerminals.size() > 0) {
        text.setCharAt(text.length() - 1, '\n');
      }
      sentence.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, offset - 1);
      sentence.set(CoreAnnotations.TokensAnnotation.class, tokens);
      sentence.set(TreeCoreAnnotations.TreeAnnotation.class, tree);
      sentences.add(sentence);
    }

    String docID = docElem.getAttributeValue("id");
    Matcher matcher = datePattern.matcher(docID);
View Full Code Here

  public static Annotation makeAnnotation(String ... testText) {
    List<CoreMap> sentences = new ArrayList<CoreMap>();
    for (String text : testText) {
      List<CoreLabel> labels = makeSentence(text);     
      CoreMap sentence = new ArrayCoreMap();
      sentence.set(CoreAnnotations.TokensAnnotation.class, labels);
      sentences.add(sentence);
    }
    Annotation annotation = new Annotation(StringUtils.join(testText));
    annotation.set(CoreAnnotations.SentencesAnnotation.class, sentences);
    return annotation;
View Full Code Here

    Annotation doc = new Annotation(text);
    pipe.annotate(doc);

    assertTrue(doc.get(CoreAnnotations.SentencesAnnotation.class) != null);
    assertTrue(doc.get(CoreAnnotations.SentencesAnnotation.class).size() > 0);
    CoreMap sent = doc.get(CoreAnnotations.SentencesAnnotation.class).get(0);
    assertTrue(sent.get(CoreAnnotations.TokensAnnotation.class) != null);
    List<CoreLabel> tokens = sent.get(CoreAnnotations.TokensAnnotation.class);
    if(VERBOSE){
      for(CoreLabel token: tokens) {
        System.out.println("\t" + token.word() + " " +
            token.tag() + " " +
            token.ner() + " " +
View Full Code Here

      }
    }
  }
 
  private static int beginOffset(Tree tree, List<CoreLabel> tokens) {
    CoreMap label = (CoreMap)tree.label();
    int beginToken = label.get(CoreAnnotations.BeginIndexAnnotation.class);
    return beginOffset(tokens.get(beginToken));
  }
View Full Code Here

        baseCharOffset = 0;
      }

      chunkOffsets = ChunkAnnotationUtils.getChunkOffsetsUsingCharOffsets((List<? extends CoreMap>) sourceAnnotation.get(tokensAnnotationKey),
              charOffsets.getBegin() + baseCharOffset, charOffsets.getEnd()  + baseCharOffset);
      CoreMap annotation2 = ChunkAnnotationUtils.getMergedChunk((List<? extends CoreMap>) sourceAnnotation.get(tokensAnnotationKey),
              chunkOffsets.getBegin(), chunkOffsets.getEnd(), aggregators );

      annotation = ChunkAnnotationUtils.getAnnotatedChunkUsingCharOffsets(sourceAnnotation, charOffsets.getBegin(), charOffsets.getEnd());
      tokenOffsets = Interval.toInterval(annotation.get(CoreAnnotations.TokenBeginAnnotation.class),
              annotation.get(CoreAnnotations.TokenEndAnnotation.class), Interval.INTERVAL_OPEN_END);
      annotation.set(tokensAnnotationKey, annotation2.get(tokensAnnotationKey));
    }
    text = annotation.get(CoreAnnotations.TextAnnotation.class);
    extractFunc.annotate(this, (List<? extends CoreMap>) annotation.get(tokensAnnotationKey));
    return true;
  }
View Full Code Here

    int beginToken = label.get(CoreAnnotations.BeginIndexAnnotation.class);
    return beginOffset(tokens.get(beginToken));
  }
 
  private static int endOffset(Tree tree, List<CoreLabel> tokens) {
    CoreMap label = (CoreMap)tree.label();
    int endToken = label.get(CoreAnnotations.EndIndexAnnotation.class);
    if (endToken > tokens.size()) {
      String msg = "no token %d in tree:\n%s\ntokens:\n%s";
      throw new RuntimeException(String.format(msg, endToken - 1, tree, tokens));
    }
    return endOffset(tokens.get(endToken - 1));
View Full Code Here

    for (MatchedExpression expr:matchedExprs) {
      int start = expr.chunkOffsets.first();
      int end = expr.chunkOffsets.second();
      if (start >= last) {
        merged.addAll(list.subList(last,start));
        CoreMap m = expr.getAnnotation();
        merged.add(m);
        last = end;
      }
    }
    // Add rest of elements
View Full Code Here

  {
    if (matchedExprs == null) return list;
    Map<Integer, Integer> tokenBeginToListIndexMap = new HashMap<Integer, Integer>();//Generics.newHashMap();
    Map<Integer, Integer> tokenEndToListIndexMap = new HashMap<Integer, Integer>();//Generics.newHashMap();
    for (int i = 0; i < list.size(); i++) {
      CoreMap cm = list.get(i);
      if (cm.has(CoreAnnotations.TokenBeginAnnotation.class) && cm.has(CoreAnnotations.TokenEndAnnotation.class)) {
        tokenBeginToListIndexMap.put(cm.get(CoreAnnotations.TokenBeginAnnotation.class), i);
        tokenEndToListIndexMap.put(cm.get(CoreAnnotations.TokenEndAnnotation.class), i+1);
      } else {
        tokenBeginToListIndexMap.put(i, i);
        tokenEndToListIndexMap.put(i+1, i+1);
      }
    }
    Collections.sort(matchedExprs, EXPR_TOKEN_OFFSET_COMPARATOR);
    List<CoreMap> merged = new ArrayList<CoreMap>(list.size());   // Approximate size
    int last = 0;
    for (MatchedExpression expr:matchedExprs) {
      int start = expr.tokenOffsets.first();
      int end = expr.tokenOffsets.second();
      Integer istart = tokenBeginToListIndexMap.get(start);
      Integer iend = tokenEndToListIndexMap.get(end);
      if (istart != null && iend != null) {
        if (istart >= last) {
          merged.addAll(list.subList(last,istart));
          CoreMap m = expr.getAnnotation();
          merged.add(m);
          last = iend;
        }
      }
    }
View Full Code Here

        .extractExpressions(sentence);
      for (MatchedExpression matched:matchedExpressions) {
        // Print out matched text and value
        out.println("matched: " + matched.getText() + " with value " + matched.getValue());
        // Print out token information
        CoreMap cm = matched.getAnnotation();
        for (CoreLabel token : cm.get(CoreAnnotations.TokensAnnotation.class)) {
          String word = token.get(CoreAnnotations.TextAnnotation.class);
          String lemma = token.get(CoreAnnotations.LemmaAnnotation.class);
          String pos = token.get(CoreAnnotations.PartOfSpeechAnnotation.class);
          String ne = token.get(CoreAnnotations.NamedEntityTagAnnotation.class);
          out.println("matched token: " + "word="+word + ", lemma="+lemma + ", pos=" + pos + ", ne=" + ne);
View Full Code Here

TOP

Related Classes of edu.stanford.nlp.util.CoreMap

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.