Package edu.stanford.nlp.util

Examples of edu.stanford.nlp.util.CoreMap


    }

    // display each sentence in this annotation
    if (sentences != null) {
      for(int i = 0, sz = sentences.size(); i < sz; i ++) {
        CoreMap sentence = sentences.get(i);
        List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
        os.printf("Sentence #%d (%d tokens):%n", (i + 1), tokens.size());

        String text = sentence.get(CoreAnnotations.TextAnnotation.class);
        os.println(text);

        // display the token-level annotations
        String[] tokenAnnotations = {
            "Text", "PartOfSpeech", "Lemma", "Answer", "NamedEntityTag", "CharacterOffsetBegin", "CharacterOffsetEnd", "NormalizedNamedEntityTag", "Timex", "TrueCase", "TrueCaseText" };
        for (CoreLabel token: tokens) {
          os.print(token.toShorterString(tokenAnnotations));
          os.print(' ');
        }
        os.println();

        // display the parse tree for this sentence
        Tree tree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
        if (tree != null) {
          options.constituentTreePrinter.printTree(tree, os);
        }

        // It is possible turn off the semantic graphs, in which
        // case we don't want to recreate them using the dependency
        // printer.  This might be relevant if using corenlp for a
        // language which doesn't have dependencies, for example.
        if (sentence.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class) != null) {
          os.print(sentence.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class).toList());
          os.printf("%n");
        }

        // display MachineReading entities and relations
        List<EntityMention> entities = sentence.get(MachineReadingAnnotations.EntityMentionsAnnotation.class);
        if (entities != null) {
          os.println("Extracted the following MachineReading entity mentions:");
          for (EntityMention e : entities) {
            os.println("\t" + e);
          }
        }
        List<RelationMention> relations = sentence.get(MachineReadingAnnotations.RelationMentionsAnnotation.class);
        if(relations != null){
          os.println("Extracted the following MachineReading relation mentions:");
          for(RelationMention r: relations){
            if(r.printableObject(beam)){
              os.println(r);
View Full Code Here


              sent.add(c);
            }
          }
        }
        CoreMap sentcm = new ArrayCoreMap();
        sentcm.set(CoreAnnotations.TextAnnotation.class, sentStr.trim());
        sentcm.set(CoreAnnotations.TokensAnnotation.class, sent);
        sentcm.set(CoreAnnotations.DocIDAnnotation.class, id + "-" + sentNum);
        sentences.add(sentcm);
      }
    }
    return sentences;
  }
View Full Code Here

          Timex timex = new Timex(child);
          if (child.getChildNodes().getLength() != 1) {
            throw new RuntimeException("TIMEX3 should only contain text " + child);
          }
          String timexText = child.getTextContent();
          CoreMap timexMap = new ArrayCoreMap();
          //(timex)
          timexMap.set(TimeAnnotations.TimexAnnotation.class, timex);
          //(text)
          timexMap.set(CoreAnnotations.TextAnnotation.class, timexText);
          //(characters)
          int charBegin = offset;
          timexMap.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, charBegin);
          offset += timexText.length();
          int charEnd = offset;
          timexMap.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, charEnd);
          //(tokens)
          if(haveTokenOffsets){
            Integer tokBegin = beginMap.get(charBegin);
            int searchStep = 1;          //if no exact match, search around the character offset
            while(tokBegin == null){
              tokBegin = beginMap.get(charBegin - searchStep);
              if(tokBegin == null){
                tokBegin = beginMap.get(charBegin + searchStep);
              }
              searchStep += 1;
            }
            searchStep = 1;
            Integer tokEnd = endMap.get(charEnd);
            while(tokEnd == null){
              tokEnd = endMap.get(charEnd - searchStep);
              if(tokEnd == null){
                tokEnd = endMap.get(charEnd + searchStep);
              }
              searchStep += 1;
            }
            timexMap.set(CoreAnnotations.TokenBeginAnnotation.class, tokBegin);
            timexMap.set(CoreAnnotations.TokenEndAnnotation.class, tokEnd);
          }
          //(add)
          timexMaps.add(timexMap);
        } else {
          throw new RuntimeException("unexpected element " + child);
View Full Code Here

  private List<CoreMap> toCoreMaps(CoreMap annotation, List<TimeExpression> timeExpressions, SUTime.TimeIndex timeIndex)
  {
    if (timeExpressions == null) return null;
    List<CoreMap> coreMaps = new ArrayList<CoreMap>(timeExpressions.size());
    for (TimeExpression te:timeExpressions) {
      CoreMap cm = te.getAnnotation();
      SUTime.Temporal temporal = te.getTemporal();
      if (temporal != null) {
        String origText = annotation.get(CoreAnnotations.TextAnnotation.class);
        String text = cm.get(CoreAnnotations.TextAnnotation.class);
        if (origText != null) {
          // Make sure the text is from original (and not from concatenated tokens)
          ChunkAnnotationUtils.annotateChunkText(cm, annotation);
          text = cm.get(CoreAnnotations.TextAnnotation.class);
        }
        Map<String,String> timexAttributes;
        try {
          timexAttributes = temporal.getTimexAttributes(timeIndex);
          if (options.includeRange) {
            SUTime.Temporal rangeTemporal = temporal.getRange();
            if (rangeTemporal != null) {
              timexAttributes.put("range", rangeTemporal.toString());
            }
          }
        } catch (Exception e) {
          logger.log(Level.WARNING, "Failed to get attributes from " + text + ", timeIndex " + timeIndex, e);
          continue;
        }
        Timex timex;
        try {
          timex = Timex.fromMap(text, timexAttributes);
        } catch (Exception e) {
          logger.log(Level.WARNING, "Failed to process timex " + text + " with attributes " + timexAttributes, e);
          continue;
        }
        assert timex != null// Timex.fromMap never returns null and if it exceptions, we've already done a continue
        cm.set(TimeAnnotations.TimexAnnotation.class, timex);
        coreMaps.add(cm);
      }
    }
    return coreMaps;
  }
View Full Code Here

   * TODO: remove this (listToString does the same thing)
   */
  public static String toSentence(List<? extends CoreMap> sentence) {
    StringBuilder text = new StringBuilder();
    for (int i = 0, sz = sentence.size(); i < sz; i++) {
      CoreMap iw = sentence.get(i);
      text.append(iw.get(CoreAnnotations.TextAnnotation.class));
      if (i < sz - 1) {
        text.append(" ");
      }
    }
    return text.toString();
View Full Code Here

   */
  @SuppressWarnings("unchecked")
  public CoreLabel(Label label) {
    super(0);
    if (label instanceof CoreMap) {
      CoreMap cl = (CoreMap) label;
      setCapacity(cl.size());
      for (Class key : cl.keySet()) {
        set(key, cl.get(key));
      }
    } else {
      if (label instanceof HasWord) {
         setWord(((HasWord)label).word());
      }
View Full Code Here

        }
      }

      List<Annotation> annotations = Generics.newArrayList();
      for (Tree tree : trees) {
        CoreMap sentence = new Annotation(Sentence.listToString(tree.yield()));
        sentence.set(TreeCoreAnnotations.TreeAnnotation.class, tree);
        List<CoreMap> sentences = Collections.singletonList(sentence);
        Annotation annotation = new Annotation("");
        annotation.set(CoreAnnotations.SentencesAnnotation.class, sentences);
        annotations.add(annotation);
      }
View Full Code Here

      boolean inMultiTokenExpr = false;
      boolean discardToken = false;
      if (o instanceof CoreMap) {
        // Hacky stuff to ensure sentence breaks do not happen in certain cases
        CoreMap cm = (CoreMap) o;
        Boolean forcedUntilEndValue = cm.get(CoreAnnotations.ForcedSentenceUntilEndAnnotation.class);
        if (!forcedEnd) {
          if (forcedUntilEndValue != null && forcedUntilEndValue)
            inWaitForForcedEnd = true;
          else {
            MultiTokenTag mt = cm.get(CoreAnnotations.MentionTokenAnnotation.class);
            if (mt != null && !mt.isEnd()) {
              // In the middle of a multi token mention, make sure sentence is not ended here
              inMultiTokenExpr = true;
            }
          }
View Full Code Here

    int maxEnd = nodes.size();
    if (maxNodes >= 0 && maxNodes + start < nodes.size()) {
      maxEnd = maxNodes + start;
    }
    for (int end = minEnd; end <= maxEnd; end++) {
      CoreMap chunk = ChunkAnnotationUtils.getMergedChunk(nodes, start, end, aggregators);
      if (nodePattern.match(chunk)) {
        matched.add(Interval.toInterval(start, end));
      }
    }
    return matched;
View Full Code Here

    return new SequencePattern.NodePatternExpr(CoreMapNodePattern.valueOf(textRegex));
  }

  private static final String testText = "the number were one, two and fifty.";
  public void testTokenSequenceMatcherValue() throws IOException {
    CoreMap doc = createDocument(testText);

    // Test simple sequence with value
    TokenSequencePattern p = TokenSequencePattern.compile(getOrPatternExpr(
            new Pair<String,Object>("one", 1), new Pair<String,Object>("two", null), new Pair<String,Object>("fifty", 50)));
    TokenSequenceMatcher m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));

    boolean match = m.find();
    assertTrue(match);
    assertEquals("one", m.group());
    assertEquals(1, m.groupValue());
View Full Code Here

TOP

Related Classes of edu.stanford.nlp.util.CoreMap

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.