Examples of edu.stanford.nlp.util.CoreMap

edu.stanford.nlp.util.CoreMap

Base type for all annotatable core objects. Should usually be instantiated as {@link ArrayCoreMap}. Many common key definitions live in {@link edu.stanford.nlp.ling.CoreAnnotations}, but others may be defined elsewhere. See {@link edu.stanford.nlp.ling.CoreAnnotations} for details.

Note that implementations of this interface must take care to implement equality correctly: by default, two CoreMaps are .equal if they contain the same keys and all corresponding values are .equal. Subclasses that wish to change this behavior (such as {@link HashableCoreMap}) must make sure that all other CoreMap implementations have a special case in their .equals to use that equality definition when appropriate. Similarly, care must be taken when defining hashcodes. The default hashcode is 37 * sum of all keys' hashcodes plus the sum of all values' hashcodes. However, use of this class as HashMap keys is discouraged because the hashcode can change over time. Consider using a {@link HashableCoreMap}.
@author dramage @author rafferty

    }
    return findNextParagraphSpeaker(paragraph, paragraphOffset, dict);
  }


  private String findNextParagraphSpeaker(List<CoreMap> paragraph, int paragraphOffset, Dictionaries dict) {
    CoreMap lastSent = paragraph.get(paragraph.size()-1);
    String speaker = "";
    for(CoreLabel w : lastSent.get(CoreAnnotations.TokensAnnotation.class)) {
      if(w.get(CoreAnnotations.LemmaAnnotation.class).equals("report") || w.get(CoreAnnotations.LemmaAnnotation.class).equals("say")) {
        String word = w.get(CoreAnnotations.TextAnnotation.class);
        SemanticGraph dependency = lastSent.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class);
        IndexedWord t = dependency.getNodeByWordPattern(word);


        for(Pair<GrammaticalRelation,IndexedWord> child : dependency.childPairs(t)){
          if(child.first().getShortName().equals("nsubj")) {
            int subjectIndex = child.second().index();  // start from 1

View Full Code Here


    // Update some fields in original document
    if (doc.containsKey(CoreAnnotations.SentencesAnnotation.class)) {
      for (int i = 0; i < doc.get(CoreAnnotations.SentencesAnnotation.class).size(); ++i) {
        // Set docid and sentence index
        CoreMap sentence = doc.get(CoreAnnotations.SentencesAnnotation.class).get(i);
        for (int k = 0; k < sentence.get(CoreAnnotations.TokensAnnotation.class).size(); ++k) {
          CoreLabel token = sentence.get(CoreAnnotations.TokensAnnotation.class).get(k);
          // Set docID
          if (doc.containsKey(CoreAnnotations.DocIDAnnotation.class)) { token.setDocID(doc.get(CoreAnnotations.DocIDAnnotation.class)); }
          // Set sentence index if not already there
          token.setSentIndex(i);
          // Set index annotation if not already there
          if (!token.containsKey(CoreAnnotations.IndexAnnotation.class)) {
            token.set(CoreAnnotations.IndexAnnotation.class, k + 1);
          }
        }
        // Set normalized named entity for chunked tokens
        if (sentence.containsKey(CoreAnnotations.NumerizedTokensAnnotation.class)) {
          for (CoreMap numerizedToken : sentence.get(CoreAnnotations.NumerizedTokensAnnotation.class)) {
            if (numerizedToken.containsKey(CoreAnnotations.TokensAnnotation.class)) {
              // The normalized named entity got deleted?
              numerizedToken.set(CoreAnnotations.NormalizedNamedEntityTagAnnotation.class,
                  numerizedToken.get(CoreAnnotations.TokensAnnotation.class)
                      .get(0).get(CoreAnnotations.NormalizedNamedEntityTagAnnotation.class));
              // The named entity type got deleted?
              numerizedToken.set(CoreAnnotations.NamedEntityTagAnnotation.class,
                  numerizedToken.get(CoreAnnotations.TokensAnnotation.class)
                      .get(0).get(CoreAnnotations.NamedEntityTagAnnotation.class));
            }
          }
        }
      }
    }
    if (doc.containsKey(CoreAnnotations.TokensAnnotation.class)) {
      for (int i = 0; i < doc.get(CoreAnnotations.TokensAnnotation.class).size(); i++) {
        CoreLabel token = doc.get(CoreAnnotations.TokensAnnotation.class).get(i);
        // Remove null gender
        if (token.get(MachineReadingAnnotations.GenderAnnotation.class) == null) {
          token.remove(MachineReadingAnnotations.GenderAnnotation.class);
        }
      }
    }


    // Find out what doesn't match
    if (!doc.equals(readDoc)) {
      if (doc.containsKey(CorefCoreAnnotations.CorefChainAnnotation.class) && !doc.get(CorefCoreAnnotations.CorefChainAnnotation.class).equals(doc.get(CorefCoreAnnotations.CorefChainAnnotation.class))) {
        assertTrue("Coref chain differs between documents", false);
      } else if (doc.containsKey(CoreAnnotations.TokensAnnotation.class) && !doc.get(CoreAnnotations.TokensAnnotation.class).equals(readDoc.get(CoreAnnotations.TokensAnnotation.class))) {
        for (int i = 0; i < doc.get(CoreAnnotations.TokensAnnotation.class).size(); ++i) {
          CoreLabel tokA = doc.get(CoreAnnotations.TokensAnnotation.class).get(i);
          CoreLabel tokB = readDoc.get(CoreAnnotations.TokensAnnotation.class).get(i);
          if (!tokA.equals(tokB)) {
            for (Class keyA : tokA.keySet()) {
              if (!tokB.containsKey(keyA)) {
                assertTrue("Read document doesn't have key: " + keyA, false);
              } else if (tokA.get(keyA) != null && !tokA.get(keyA).equals(tokB.get(keyA))) {
                assertTrue("Documents disagree on key: " + keyA, false);
              }
            }
            for (Class keyB : tokB.keySet()) {
              if (!tokA.containsKey(keyB)) {
                assertTrue("Read document doesn't have key: " + keyB, false);
              } else if (tokB.get(keyB) != null && !tokB.get(keyB).equals(tokA.get(keyB))) {
                assertTrue("Documents disagree on key: " + keyB, false);
              }
            }
            assertTrue("Token " + i + " doesn't match", false);
          }
        }
      } else if (doc.containsKey(CoreAnnotations.SentencesAnnotation.class) && !doc.get(CoreAnnotations.SentencesAnnotation.class).equals(readDoc.get(CoreAnnotations.SentencesAnnotation.class))) {
        for (int i = 0; i < doc.get(CoreAnnotations.SentencesAnnotation.class).size(); ++i) {
          CoreMap sentA = doc.get(CoreAnnotations.SentencesAnnotation.class).get(i);
          CoreMap sentB = readDoc.get(CoreAnnotations.SentencesAnnotation.class).get(i);
          if (!sentA.equals(sentB)) {
            if (sentA.containsKey(TreeCoreAnnotations.TreeAnnotation.class) && !sentA.get(TreeCoreAnnotations.TreeAnnotation.class).equals(sentB.get(TreeCoreAnnotations.TreeAnnotation.class))) {
              assertTrue("Tree for sentence " + i + " doesn't match", false);
            } else if (sentA.containsKey(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class) && !sentA.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class).equals(sentB.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class))) {
              System.err.println("Graph A:");
              System.err.println("========");
              System.err.println(sentA.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class));
              System.err.println("Graph B:");
              System.err.println("========");
              System.err.println(sentB.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class));
              assertTrue("Basic graph for sentence " + i + " doesn't match", false);
            } else if (sentA.containsKey(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class) && !sentA.get(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class).equals(sentB.get(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class))) {
              assertTrue("Collapsed CC processed graph for sentence " + i + " doesn't match", false);
            } else if (sentA.containsKey(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class) && !sentA.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class).equals(sentB.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class))) {
              assertTrue("Collapsed graph for sentence " + i + " doesn't match", false);
            } else {
              for (Class x : sentA.keySet()) {
                if (!sentA.get(x).equals(sentB.get(x))) {
                  assertTrue("" + x.getSimpleName() + " for sentence " + i + " does not match", false);
                }
              }
              for (Class x : sentB.keySet()) {
                if (!sentB.get(x).equals(sentA.get(x))) {
                  assertTrue("" + x.getSimpleName() + " for sentence " + i + " does not match", false);
                }
              }
              assertTrue("Sentence " + i + " doesn't match (don't know why?)", false);
            }

View Full Code Here

          Timex timex = new Timex(child);
          if (child.getChildNodes().getLength() != 1) {
            throw new RuntimeException("TIMEX3 should only contain text " + child);
          }
          String timexText = child.getTextContent();
          CoreMap timexMap = new ArrayCoreMap();
          timexMap.set(TimeAnnotations.TimexAnnotation.class, timex);
          timexMap.set(CoreAnnotations.TextAnnotation.class, timexText);
          int charBegin = offset;
          timexMap.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, offset);
          offset += timexText.length();
          timexMap.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, offset);
          int charEnd = offset;
          //(tokens)
          if(haveTokenOffsets){
            Integer tokBegin = beginMap.get(charBegin);
            int searchStep = 1;          //if no exact match, search around the character offset
            while(tokBegin == null){
              tokBegin = beginMap.get(charBegin - searchStep);
              if(tokBegin == null){
                tokBegin = beginMap.get(charBegin + searchStep);
              }
              searchStep += 1;
            }
            searchStep = 1;
            Integer tokEnd = endMap.get(charEnd);
            while(tokEnd == null){
              tokEnd = endMap.get(charEnd - searchStep);
              if(tokEnd == null){
                tokEnd = endMap.get(charEnd + searchStep);
              }
              searchStep += 1;
            }
            timexMap.set(CoreAnnotations.TokenBeginAnnotation.class, tokBegin);
            timexMap.set(CoreAnnotations.TokenEndAnnotation.class, tokEnd);
          }
          timexMaps.add(timexMap);
        } else {
          throw new RuntimeException("unexpected element " + child);
        }

View Full Code Here

    Assert.assertNotNull(sentences);
    Assert.assertEquals(2, sentences.size());
    
    // sentence 1
    String text1 = "Dan Ramage is working for\nMicrosoft.";
    CoreMap sentence1 = sentences.get(0);
    Assert.assertEquals(text1, sentence1.toString());
    Assert.assertEquals(text1, sentence1.get(CoreAnnotations.TextAnnotation.class));
    Assert.assertEquals(0, (int)sentence1.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class));
    Assert.assertEquals(36, (int)sentence1.get(CoreAnnotations.CharacterOffsetEndAnnotation.class));
    Assert.assertEquals(0, (int)sentence1.get(CoreAnnotations.TokenBeginAnnotation.class));
    Assert.assertEquals(7, (int)sentence1.get(CoreAnnotations.TokenEndAnnotation.class));
    
    // sentence 1 tree
    Tree tree1 = Tree.valueOf("(ROOT (S (NP (NNP Dan) (NNP Ramage)) (VP (VBZ is) " +
        "(VP (VBG working) (PP (IN for) (NP (NNP Microsoft))))) (. .)))");
    Assert.assertEquals(tree1, sentence1.get(TreeCoreAnnotations.TreeAnnotation.class));
    
    // sentence 1 tokens
    String tokenText1 = "Dan Ramage is working for Microsoft .";
    List<CoreLabel> tokens1 = sentence1.get(CoreAnnotations.TokensAnnotation.class);
    Assert.assertNotNull(tokens1);
    Assert.assertEquals(7, tokens1.size());
    Assert.assertEquals(tokenText1, join(tokens1));
    Assert.assertEquals(4, (int)tokens1.get(1).get(CoreAnnotations.CharacterOffsetBeginAnnotation.class));
    Assert.assertEquals(10, (int)tokens1.get(1).get(CoreAnnotations.CharacterOffsetEndAnnotation.class));
    Assert.assertEquals("IN", tokens1.get(4).get(CoreAnnotations.PartOfSpeechAnnotation.class));
    Assert.assertEquals("NNP", tokens1.get(5).get(CoreAnnotations.PartOfSpeechAnnotation.class));
    Assert.assertEquals("work", tokens1.get(3).get(CoreAnnotations.LemmaAnnotation.class));
    Assert.assertEquals(".", tokens1.get(6).get(CoreAnnotations.LemmaAnnotation.class));
    Assert.assertEquals("ORGANIZATION", tokens1.get(5).get(CoreAnnotations.NamedEntityTagAnnotation.class));
    
    // sentence 2
    String text2 = "He's in Seattle!";
    CoreMap sentence2 = sentences.get(1);
    Assert.assertEquals(text2, sentence2.toString());
    Assert.assertEquals(text2, sentence2.get(CoreAnnotations.TextAnnotation.class));
    Assert.assertEquals(37, (int)sentence2.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class));
    Assert.assertEquals(53, (int)sentence2.get(CoreAnnotations.CharacterOffsetEndAnnotation.class));
    Assert.assertEquals(7, (int)sentence2.get(CoreAnnotations.TokenBeginAnnotation.class));
    Assert.assertEquals(12, (int)sentence2.get(CoreAnnotations.TokenEndAnnotation.class));


    // sentence 2 tree (note error on Seattle, caused by part of speech tagger)
    Tree tree2 = Tree.valueOf("(ROOT (S (NP (PRP He)) (VP (VBZ 's) (PP (IN in) " +
        "(NP (NNP Seattle)))) (. !)))");
    Assert.assertEquals(tree2, sentence2.get(TreeCoreAnnotations.TreeAnnotation.class));
    
    // sentence 2 tokens
    String tokenText2 = "He 's in Seattle !";
    List<CoreLabel> tokens2 = sentence2.get(CoreAnnotations.TokensAnnotation.class);
    Assert.assertNotNull(tokens2);
    Assert.assertEquals(5, tokens2.size());
    Assert.assertEquals(tokenText2, join(tokens2));
    Assert.assertEquals(39, (int)tokens2.get(1).get(CoreAnnotations.CharacterOffsetBeginAnnotation.class));
    Assert.assertEquals(41, (int)tokens2.get(1).get(CoreAnnotations.CharacterOffsetEndAnnotation.class));

View Full Code Here

            }
          }
          // Get merged element
          int groupEnd = matchResult.end(group);
          if (groupEnd - groupStart >= 1) {
            CoreMap merged = aggregator.merge(elements, groupStart, groupEnd);
            mergedElements.add(merged);
            last = groupEnd;


            // Fiddle with matched group indices
            res.matchedGroups[mergedGroup].matchBegin = mergedElements.size()-1;

View Full Code Here

    List<Tree> leaves = tree.getLeaves();
    List<CoreLabel> tokens = rel.getSentence().get(TokensAnnotation.class);


    // this assumes that both args are in the same sentence as the relation object
    // let's check for this to be safe
    CoreMap relSentence = rel.getSentence();
    CoreMap arg0Sentence = arg0.getSentence();
    CoreMap arg1Sentence = arg1.getSentence();
    if(arg0Sentence != relSentence){
      System.err.println("WARNING: Found relation with arg0 in a different sentence: " + rel);
      System.err.println("Relation sentence: " + relSentence.get(TextAnnotation.class));
      System.err.println("Arg0 sentence: " + arg0Sentence.get(TextAnnotation.class));
      return false;
    }
    if(arg1Sentence != relSentence){
      System.err.println("WARNING: Found relation with arg1 in a different sentence: " + rel);
      System.err.println("Relation sentence: " + relSentence.get(TextAnnotation.class));
      System.err.println("Arg1 sentence: " + arg1Sentence.get(TextAnnotation.class));
      return false;
    }


    // Checklist keeps track of which features have been handled by an if clause
    // Should be empty after all the clauses have been gone through.
    List<String> checklist = new ArrayList<String>(types);


    // arg_type: concatenation of the entity types of the args, e.g.
    // "arg1type=Loc_and_arg2type=Org"
    // arg_subtype: similar, for entity subtypes
    if (usingFeature(types, checklist, "arg_type")) {
      features.setCount("arg1type=" + arg0.getType() + "_and_arg2type=" + arg1.getType(), 1.0);
    }
    if (usingFeature(types,checklist,"arg_subtype")) {
      features.setCount("arg1subtype="+arg0.getSubType()+"_and_arg2subtype="+arg1.getSubType(),1.0);
    }


    // arg_order: which arg comes first in the sentence
    if (usingFeature(types, checklist, "arg_order")) {
      if (arg0.getSyntacticHeadTokenPosition() < arg1.getSyntacticHeadTokenPosition())
        features.setCount("arg1BeforeArg2", 1.0);
    }
    // same_head: whether the two args share the same syntactic head token
    if (usingFeature(types, checklist, "same_head")) {
      if (arg0.getSyntacticHeadTokenPosition() == arg1.getSyntacticHeadTokenPosition())
        features.setCount("arguments_have_same_head",1.0);
    }


    // full_tree_path: Path from one arg to the other in the phrase structure tree,
    // e.g., NNP -> PP -> NN <- NNP
    if (usingFeature(types, checklist, "full_tree_path")) {
      //System.err.println("ARG0: " + arg0);
      //System.err.println("ARG0 HEAD: " + arg0.getSyntacticHeadTokenPosition());
      //System.err.println("TREE: " + tree);
      //System.err.println("SENTENCE: " + sentToString(arg0.getSentence()));
      if(arg0.getSyntacticHeadTokenPosition() < leaves.size() && arg1.getSyntacticHeadTokenPosition() < leaves.size()){
        Tree arg0preterm = leaves.get(arg0.getSyntacticHeadTokenPosition()).parent(tree);
        Tree arg1preterm = leaves.get(arg1.getSyntacticHeadTokenPosition()).parent(tree);
        Tree join = tree.joinNode(arg0preterm, arg1preterm);
        StringBuilder pathStringBuilder = new StringBuilder();
        List<Tree> pathUp = join.dominationPath(arg0preterm);
        Collections.reverse(pathUp);
        for (Tree node : pathUp) {
          if (node != join) {
            pathStringBuilder.append(node.label().value() + " <- ");
          }
        }


        for (Tree node : join.dominationPath(arg1preterm)) {
          pathStringBuilder.append(((node == join) ? "" : " -> ") + node.label().value());
        }
        String pathString = pathStringBuilder.toString();
        if(logger != null && ! rel.getType().equals(RelationMention.UNRELATED)) logger.info("full_tree_path: " + pathString);
        features.setCount("treepath:"+pathString, 1.0);
      } else {
        System.err.println("WARNING: found weird argument offsets. Most likely because arguments appear in different sentences than the relation:");
        System.err.println("ARG0: " + arg0);
        System.err.println("ARG0 HEAD: " + arg0.getSyntacticHeadTokenPosition());
        System.err.println("ARG0 SENTENCE: " + sentToString(arg0.getSentence()));
        System.err.println("ARG1: " + arg1);
        System.err.println("ARG1 HEAD: " + arg1.getSyntacticHeadTokenPosition());
        System.err.println("ARG1 SENTENCE: " + sentToString(arg1.getSentence()));
        System.err.println("RELATION TREE: " + tree);
      }
    }


    int pathLength = tree.pathNodeToNode(tree.getLeaves().get(arg0.getSyntacticHeadTokenPosition()),
            tree.getLeaves().get(arg1.getSyntacticHeadTokenPosition())).size();
    // path_length: Length of the path in the phrase structure parse tree, integer-valued feature
    if (usingFeature(types, checklist, "path_length")) {
      features.setCount("path_length", pathLength);
    }
    // path_length_binary: Length of the path in the phrase structure parse tree, binary features
    if (usingFeature(types, checklist, "path_length_binary")) {
      features.setCount("path_length_" + pathLength, 1.0);
    }


    /* entity_order
           * This tells you for each of the two args
           * whether there are other entities before or after that arg.
           * In particular, it can tell whether an arg is the first entity of its type in the sentence
           * (which can be useful for example for telling the gameWinner and gameLoser in NFL).
           * TODO: restrict this feature so that it only looks for
           * entities of the same type?
           * */
    if (usingFeature(types, checklist, "entity_order")) {
      for (int i = 0; i < rel.getArgs().size(); i++) {
        // We already checked the class of the args at the beginning of the method
        EntityMention arg = (EntityMention) rel.getArgs().get(i);
        if(rel.getSentence().get(MachineReadingAnnotations.EntityMentionsAnnotation.class) != null) { // may be null due to annotation error
          for (EntityMention otherArg : rel.getSentence().get(MachineReadingAnnotations.EntityMentionsAnnotation.class)) {
            String feature;
            if (otherArg.getSyntacticHeadTokenPosition() > arg.getSyntacticHeadTokenPosition()) {
              feature = "arg" + i + "_before_" + otherArg.getType();
              features.setCount(feature, 1.0);
            }
            if (otherArg.getSyntacticHeadTokenPosition() < arg.getSyntacticHeadTokenPosition()) {
              feature = "arg" + i + "_after_" + otherArg.getType();
              features.setCount(feature, 1.0);
            }
          }
        }
      }
    }


    // surface_distance: Number of tokens in the sentence between the two words, integer-valued feature
    int surfaceDistance = Math.abs(arg0.getSyntacticHeadTokenPosition() - arg1.getSyntacticHeadTokenPosition());
    if (usingFeature(types, checklist, "surface_distance")) {
      features.setCount("surface_distance", surfaceDistance);
    }
    // surface_distance_binary: Number of tokens in the sentence between the two words, binary features
    if (usingFeature(types, checklist, "surface_distance_binary")) {
      features.setCount("surface_distance_" + surfaceDistance, 1.0);
    }
    // surface_distance_bins: number of tokens between the two args, binned to several intervals
    if(usingFeature(types, checklist, "surface_distance_bins")) {
      if(surfaceDistance < 4){
        features.setCount("surface_distance_bin" + surfaceDistance, 1.0);
      } else if(surfaceDistance < 6){
        features.setCount("surface_distance_bin_lt6", 1.0);
      } else if(surfaceDistance < 10) {
        features.setCount("surface_distance_bin_lt10", 1.0);
      } else {
        features.setCount("surface_distance_bin_ge10", 1.0);
      }
    }


    // separate_surface_windows: windows of 1,2,3 tokens before and after args, for each arg separately
    // Separate features are generated for windows to the left and to the right of the args.
    // Features are concatenations of words in the window (or NULL for sentence boundary).
    //
    // conjunction_surface_windows: concatenation of the windows of the two args
    //
    // separate_surface_windows_POS: windows of POS tags of size 1,2,3 for each arg
    //
    // conjunction_surface_windows_POS: concatenation of windows of the args


    List<EntityMention> args = new ArrayList<EntityMention>();
    args.add(arg0); args.add(arg1);
    for (int windowSize = 1; windowSize <= 3; windowSize++) {


      String[] leftWindow, rightWindow, leftWindowPOS, rightWindowPOS;
      leftWindow = new String[2];
      rightWindow = new String[2];
      leftWindowPOS = new String[2];
      rightWindowPOS = new String[2];


      for (int argn = 0; argn <= 1; argn++) {
        int ind = args.get(argn).getSyntacticHeadTokenPosition();
        for (int winnum = 1; winnum <= windowSize; winnum++) {
          int windex = ind - winnum;
          if (windex > 0) {
            leftWindow[argn] = leaves.get(windex).label().value() + "_" + leftWindow[argn];
            leftWindowPOS[argn] = leaves.get(windex).parent(tree).label().value() + "_" + leftWindowPOS[argn];
          } else {
            leftWindow[argn] = "NULL_" + leftWindow[argn];
            leftWindowPOS[argn] = "NULL_" + leftWindowPOS[argn];
          }
          windex = ind + winnum;
          if (windex < leaves.size()) {
            rightWindow[argn] = rightWindow[argn] + "_" + leaves.get(windex).label().value();
            rightWindowPOS[argn] = rightWindowPOS[argn] + "_" + leaves.get(windex).parent(tree).label().value();
          } else {
            rightWindow[argn] = rightWindow[argn] + "_NULL";
            rightWindowPOS[argn] = rightWindowPOS[argn] + "_NULL";
          }
        }
        if (usingFeature(types, checklist, "separate_surface_windows")) {
          features.setCount("left_window_"+windowSize+"_arg_" + argn + ": " + leftWindow[argn], 1.0);
          features.setCount("left_window_"+windowSize+"_POS_arg_" + argn + ": " + leftWindowPOS[argn], 1.0);
        }
        if (usingFeature(types, checklist, "separate_surface_windows_POS")) {
          features.setCount("right_window_"+windowSize+"_arg_" + argn + ": " + rightWindow[argn], 1.0);
          features.setCount("right_window_"+windowSize+"_POS_arg_" + argn + ": " + rightWindowPOS[argn], 1.0);
        }


      }
      if (usingFeature(types, checklist, "conjunction_surface_windows")) {
        features.setCount("left_windows_"+windowSize+": " + leftWindow[0] + "__" + leftWindow[1], 1.0);
        features.setCount("right_windows_"+windowSize+": " + rightWindow[0] + "__" + rightWindow[1], 1.0);
      }
      if (usingFeature(types, checklist, "conjunction_surface_windows_POS")) {
        features.setCount("left_windows_"+windowSize+"_POS: " + leftWindowPOS[0] + "__" + leftWindowPOS[1], 1.0);
        features.setCount("right_windows_"+windowSize+"_POS: " + rightWindowPOS[0] + "__" + rightWindowPOS[1], 1.0);
      }
    }


    // arg_words:  The actual arg tokens as separate features, and concatenated
    String word0 = leaves.get(arg0.getSyntacticHeadTokenPosition()).label().value();
    String word1 = leaves.get(arg1.getSyntacticHeadTokenPosition()).label().value();
    if (usingFeature(types, checklist, "arg_words")) {
      if(doNotLexicalizeFirstArg == false)
        features.setCount("word_arg0: " + word0, 1.0);
      features.setCount("word_arg1: " + word1, 1.0);
      if(doNotLexicalizeFirstArg == false)
        features.setCount("words: " + word0 + "__" + word1, 1.0);
    }


    // arg_POS:  POS tags of the args, as separate features and concatenated
    String pos0 = leaves.get(arg0.getSyntacticHeadTokenPosition()).parent(tree).label().value();
    String pos1 = leaves.get(arg1.getSyntacticHeadTokenPosition()).parent(tree).label().value();
    if (usingFeature(types, checklist, "arg_POS")) {
      features.setCount("POS_arg0: " + pos0, 1.0);
      features.setCount("POS_arg1: " + pos1, 1.0);
      features.setCount("POSs: " + pos0 + "__" + pos1, 1.0);
    }


    // adjacent_words: words immediately to the left and right of the args
    if(usingFeature(types, checklist, "adjacent_words")){
      for(int i = 0; i < rel.getArgs().size(); i ++){
        Span s = ((EntityMention) rel.getArg(i)).getHead();
        if(s.start() > 0){
          String v = tokens.get(s.start() - 1).word();
          features.setCount("leftarg" + i + "-" + v, 1.0);
        }
        if(s.end() < tokens.size()){
          String v = tokens.get(s.end()).word();
          features.setCount("rightarg" + i + "-" + v, 1.0);
        }
      }
    }


    // entities_between_args:  binary feature for each type specifying whether there is an entity of that type in the sentence
    // between the two args.
    // e.g. "entity_between_args: Loc" means there is at least one entity of type Loc between the two args
    if (usingFeature(types, checklist, "entities_between_args")) {
      CoreMap sent = rel.getSentence();
      if(sent == null) throw new RuntimeException("NULL sentence for relation " + rel);
      List<EntityMention> relArgs = sent.get(MachineReadingAnnotations.EntityMentionsAnnotation.class);
      if(relArgs != null) { // may be null due to annotation errors!
        for (EntityMention arg : relArgs) {
          if ((arg.getSyntacticHeadTokenPosition() > arg0.getSyntacticHeadTokenPosition() && arg.getSyntacticHeadTokenPosition() < arg1.getSyntacticHeadTokenPosition())
                  || (arg.getSyntacticHeadTokenPosition() > arg1.getSyntacticHeadTokenPosition() && arg.getSyntacticHeadTokenPosition() < arg0.getSyntacticHeadTokenPosition())) {
            features.setCount("entity_between_args: " + arg.getType(), 1.0);

View Full Code Here

   * Test a bug a user reported where the text would wind up having the list toString used, adding extra []
   */
  public void testFromList() {
    List<CoreMap> sentences = Generics.newArrayList();


    CoreMap sentence = new ArrayCoreMap();
    List<CoreLabel> words = Sentence.toCoreLabelList("This", "is", "a", "test", ".");
    sentence.set(CoreAnnotations.TokensAnnotation.class, words);
    sentences.add(sentence);


    Annotation annotation = new Annotation(sentences);
    assertEquals("This is a test .", annotation.toString());


    sentence.set(CoreAnnotations.TextAnnotation.class, "This is a test.");
    annotation = new Annotation(sentences);
    assertEquals("This is a test.", annotation.toString());
  }

View Full Code Here

        isCompatible = checkTokensCompatible.apply(p);
      }
      if (isEndOfChunk(prevTagType, curTagType) || !isCompatible) {
        int tokenEnd = i;
        if (tokenBegin >= 0 && tokenEnd > tokenBegin) {
          CoreMap chunk = ChunkAnnotationUtils.getAnnotatedChunk(tokens, tokenBegin, tokenEnd, totalTokensOffset,
              tokenChunkKey, textKey, tokenLabelKey);
          chunk.set(labelKey, prevTagType.type);
          chunks.add(chunk);
          tokenBegin = -1;
        }
      }
      if (isStartOfChunk(prevTagType, curTagType) || (!isCompatible && isChunk(curTagType))) {
        if (tokenBegin >= 0) {
          throw new RuntimeException("New chunk started, prev chunk not ended yet!");
        }
        tokenBegin = i;
      }
      prevTagType = curTagType;
    }
    if (tokenBegin >= 0) {
      CoreMap chunk = ChunkAnnotationUtils.getAnnotatedChunk(tokens, tokenBegin, tokens.size(), totalTokensOffset,
          tokenChunkKey, textKey, tokenLabelKey);
      chunk.set(labelKey, prevTagType.type);
      chunks.add(chunk);
    }
//    System.out.println("number of chunks " +  chunks.size());
    return chunks;
  }

View Full Code Here

  }


  private static String getIndexStrOrEmpty(Label lab) {
    String ans = "";
    if (lab instanceof CoreMap) {
      CoreMap aml = (CoreMap) lab;
      int idx = aml.<Integer, IndexAnnotation>get(IndexAnnotation.class);
      if (idx >= 0) {
        ans = " idx=\"" + idx + "\"";
      }
    }
    return ans;

View Full Code Here

    private ComparatorHolder() {}


    private static class DependencyIdxComparator implements Comparator<Dependency> {


      public int compare(Dependency dep1, Dependency dep2) {
        CoreMap dep1lab = (CoreMap) dep1.dependent();
        CoreMap dep2lab = (CoreMap) dep2.dependent();
        int dep1idx = dep1lab.<Integer, IndexAnnotation>get(IndexAnnotation.class);
        int dep2idx = dep2lab.<Integer, IndexAnnotation>get(IndexAnnotation.class);
        return dep1idx - dep2idx;
      }

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of edu.stanford.nlp.util.CoreMap

edu.stanford.nlp.dcoref.ACEMentionExtractor

edu.stanford.nlp.dcoref.CoNLLMentionExtractor

edu.stanford.nlp.dcoref.Document

edu.stanford.nlp.dcoref.MUCMentionExtractor

edu.stanford.nlp.dcoref.RuleBasedCorefMentionFinder

edu.stanford.nlp.dcoref.SieveCoreferenceSystem

edu.stanford.nlp.ie.crf.TestSequenceModel

edu.stanford.nlp.ie.machinereading.BasicRelationFeatureFactory

edu.stanford.nlp.ie.machinereading.domains.ace.AceReader

edu.stanford.nlp.ie.machinereading.EntityExtractorResultsPrinter

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.