Package edu.stanford.nlp.util

Examples of edu.stanford.nlp.util.CoreMap


    }
    return findNextParagraphSpeaker(paragraph, paragraphOffset, dict);
  }

  private String findNextParagraphSpeaker(List<CoreMap> paragraph, int paragraphOffset, Dictionaries dict) {
    CoreMap lastSent = paragraph.get(paragraph.size()-1);
    String speaker = "";
    for(CoreLabel w : lastSent.get(CoreAnnotations.TokensAnnotation.class)) {
      if(w.get(CoreAnnotations.LemmaAnnotation.class).equals("report") || w.get(CoreAnnotations.LemmaAnnotation.class).equals("say")) {
        String word = w.get(CoreAnnotations.TextAnnotation.class);
        SemanticGraph dependency = lastSent.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class);
        IndexedWord t = dependency.getNodeByWordPattern(word);

        for(Pair<GrammaticalRelation,IndexedWord> child : dependency.childPairs(t)){
          if(child.first().getShortName().equals("nsubj")) {
            int subjectIndex = child.second().index()// start from 1
View Full Code Here


    // Update some fields in original document
    if (doc.containsKey(CoreAnnotations.SentencesAnnotation.class)) {
      for (int i = 0; i < doc.get(CoreAnnotations.SentencesAnnotation.class).size(); ++i) {
        // Set docid and sentence index
        CoreMap sentence = doc.get(CoreAnnotations.SentencesAnnotation.class).get(i);
        for (int k = 0; k < sentence.get(CoreAnnotations.TokensAnnotation.class).size(); ++k) {
          CoreLabel token = sentence.get(CoreAnnotations.TokensAnnotation.class).get(k);
          // Set docID
          if (doc.containsKey(CoreAnnotations.DocIDAnnotation.class)) { token.setDocID(doc.get(CoreAnnotations.DocIDAnnotation.class)); }
          // Set sentence index if not already there
          token.setSentIndex(i);
          // Set index annotation if not already there
          if (!token.containsKey(CoreAnnotations.IndexAnnotation.class)) {
            token.set(CoreAnnotations.IndexAnnotation.class, k + 1);
          }
        }
        // Set normalized named entity for chunked tokens
        if (sentence.containsKey(CoreAnnotations.NumerizedTokensAnnotation.class)) {
          for (CoreMap numerizedToken : sentence.get(CoreAnnotations.NumerizedTokensAnnotation.class)) {
            if (numerizedToken.containsKey(CoreAnnotations.TokensAnnotation.class)) {
              // The normalized named entity got deleted?
              numerizedToken.set(CoreAnnotations.NormalizedNamedEntityTagAnnotation.class,
                  numerizedToken.get(CoreAnnotations.TokensAnnotation.class)
                      .get(0).get(CoreAnnotations.NormalizedNamedEntityTagAnnotation.class));
              // The named entity type got deleted?
              numerizedToken.set(CoreAnnotations.NamedEntityTagAnnotation.class,
                  numerizedToken.get(CoreAnnotations.TokensAnnotation.class)
                      .get(0).get(CoreAnnotations.NamedEntityTagAnnotation.class));
            }
          }
        }
      }
    }
    if (doc.containsKey(CoreAnnotations.TokensAnnotation.class)) {
      for (int i = 0; i < doc.get(CoreAnnotations.TokensAnnotation.class).size(); i++) {
        CoreLabel token = doc.get(CoreAnnotations.TokensAnnotation.class).get(i);
        // Remove null gender
        if (token.get(MachineReadingAnnotations.GenderAnnotation.class) == null) {
          token.remove(MachineReadingAnnotations.GenderAnnotation.class);
        }
      }
    }

    // Find out what doesn't match
    if (!doc.equals(readDoc)) {
      if (doc.containsKey(CorefCoreAnnotations.CorefChainAnnotation.class) && !doc.get(CorefCoreAnnotations.CorefChainAnnotation.class).equals(doc.get(CorefCoreAnnotations.CorefChainAnnotation.class))) {
        assertTrue("Coref chain differs between documents", false);
      } else if (doc.containsKey(CoreAnnotations.TokensAnnotation.class) && !doc.get(CoreAnnotations.TokensAnnotation.class).equals(readDoc.get(CoreAnnotations.TokensAnnotation.class))) {
        for (int i = 0; i < doc.get(CoreAnnotations.TokensAnnotation.class).size(); ++i) {
          CoreLabel tokA = doc.get(CoreAnnotations.TokensAnnotation.class).get(i);
          CoreLabel tokB = readDoc.get(CoreAnnotations.TokensAnnotation.class).get(i);
          if (!tokA.equals(tokB)) {
            for (Class keyA : tokA.keySet()) {
              if (!tokB.containsKey(keyA)) {
                assertTrue("Read document doesn't have key: " + keyA, false);
              } else if (tokA.get(keyA) != null && !tokA.get(keyA).equals(tokB.get(keyA))) {
                assertTrue("Documents disagree on key: " + keyA, false);
              }
            }
            for (Class keyB : tokB.keySet()) {
              if (!tokA.containsKey(keyB)) {
                assertTrue("Read document doesn't have key: " + keyB, false);
              } else if (tokB.get(keyB) != null && !tokB.get(keyB).equals(tokA.get(keyB))) {
                assertTrue("Documents disagree on key: " + keyB, false);
              }
            }
            assertTrue("Token " + i + " doesn't match", false);
          }
        }
      } else if (doc.containsKey(CoreAnnotations.SentencesAnnotation.class) && !doc.get(CoreAnnotations.SentencesAnnotation.class).equals(readDoc.get(CoreAnnotations.SentencesAnnotation.class))) {
        for (int i = 0; i < doc.get(CoreAnnotations.SentencesAnnotation.class).size(); ++i) {
          CoreMap sentA = doc.get(CoreAnnotations.SentencesAnnotation.class).get(i);
          CoreMap sentB = readDoc.get(CoreAnnotations.SentencesAnnotation.class).get(i);
          if (!sentA.equals(sentB)) {
            if (sentA.containsKey(TreeCoreAnnotations.TreeAnnotation.class) && !sentA.get(TreeCoreAnnotations.TreeAnnotation.class).equals(sentB.get(TreeCoreAnnotations.TreeAnnotation.class))) {
              assertTrue("Tree for sentence " + i + " doesn't match", false);
            } else if (sentA.containsKey(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class) && !sentA.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class).equals(sentB.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class))) {
              System.err.println("Graph A:");
              System.err.println("========");
              System.err.println(sentA.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class));
              System.err.println("Graph B:");
              System.err.println("========");
              System.err.println(sentB.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class));
              assertTrue("Basic graph for sentence " + i + " doesn't match", false);
            } else if (sentA.containsKey(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class) && !sentA.get(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class).equals(sentB.get(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class))) {
              assertTrue("Collapsed CC processed graph for sentence " + i + " doesn't match", false);
            } else if (sentA.containsKey(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class) && !sentA.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class).equals(sentB.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class))) {
              assertTrue("Collapsed graph for sentence " + i + " doesn't match", false);
            } else {
              for (Class x : sentA.keySet()) {
                if (!sentA.get(x).equals(sentB.get(x))) {
                  assertTrue("" + x.getSimpleName() + " for sentence " + i + " does not match", false);
                }
              }
              for (Class x : sentB.keySet()) {
                if (!sentB.get(x).equals(sentA.get(x))) {
                  assertTrue("" + x.getSimpleName() + " for sentence " + i + " does not match", false);
                }
              }
              assertTrue("Sentence " + i + " doesn't match (don't know why?)", false);
            }
View Full Code Here

          Timex timex = new Timex(child);
          if (child.getChildNodes().getLength() != 1) {
            throw new RuntimeException("TIMEX3 should only contain text " + child);
          }
          String timexText = child.getTextContent();
          CoreMap timexMap = new ArrayCoreMap();
          timexMap.set(TimeAnnotations.TimexAnnotation.class, timex);
          timexMap.set(CoreAnnotations.TextAnnotation.class, timexText);
          int charBegin = offset;
          timexMap.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, offset);
          offset += timexText.length();
          timexMap.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, offset);
          int charEnd = offset;
          //(tokens)
          if(haveTokenOffsets){
            Integer tokBegin = beginMap.get(charBegin);
            int searchStep = 1;          //if no exact match, search around the character offset
            while(tokBegin == null){
              tokBegin = beginMap.get(charBegin - searchStep);
              if(tokBegin == null){
                tokBegin = beginMap.get(charBegin + searchStep);
              }
              searchStep += 1;
            }
            searchStep = 1;
            Integer tokEnd = endMap.get(charEnd);
            while(tokEnd == null){
              tokEnd = endMap.get(charEnd - searchStep);
              if(tokEnd == null){
                tokEnd = endMap.get(charEnd + searchStep);
              }
              searchStep += 1;
            }
            timexMap.set(CoreAnnotations.TokenBeginAnnotation.class, tokBegin);
            timexMap.set(CoreAnnotations.TokenEndAnnotation.class, tokEnd);
          }
          timexMaps.add(timexMap);
        } else {
          throw new RuntimeException("unexpected element " + child);
        }
View Full Code Here

    Assert.assertNotNull(sentences);
    Assert.assertEquals(2, sentences.size());
   
    // sentence 1
    String text1 = "Dan Ramage is working for\nMicrosoft.";
    CoreMap sentence1 = sentences.get(0);
    Assert.assertEquals(text1, sentence1.toString());
    Assert.assertEquals(text1, sentence1.get(CoreAnnotations.TextAnnotation.class));
    Assert.assertEquals(0, (int)sentence1.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class));
    Assert.assertEquals(36, (int)sentence1.get(CoreAnnotations.CharacterOffsetEndAnnotation.class));
    Assert.assertEquals(0, (int)sentence1.get(CoreAnnotations.TokenBeginAnnotation.class));
    Assert.assertEquals(7, (int)sentence1.get(CoreAnnotations.TokenEndAnnotation.class));
   
    // sentence 1 tree
    Tree tree1 = Tree.valueOf("(ROOT (S (NP (NNP Dan) (NNP Ramage)) (VP (VBZ is) " +
        "(VP (VBG working) (PP (IN for) (NP (NNP Microsoft))))) (. .)))");
    Assert.assertEquals(tree1, sentence1.get(TreeCoreAnnotations.TreeAnnotation.class));
   
    // sentence 1 tokens
    String tokenText1 = "Dan Ramage is working for Microsoft .";
    List<CoreLabel> tokens1 = sentence1.get(CoreAnnotations.TokensAnnotation.class);
    Assert.assertNotNull(tokens1);
    Assert.assertEquals(7, tokens1.size());
    Assert.assertEquals(tokenText1, join(tokens1));
    Assert.assertEquals(4, (int)tokens1.get(1).get(CoreAnnotations.CharacterOffsetBeginAnnotation.class));
    Assert.assertEquals(10, (int)tokens1.get(1).get(CoreAnnotations.CharacterOffsetEndAnnotation.class));
    Assert.assertEquals("IN", tokens1.get(4).get(CoreAnnotations.PartOfSpeechAnnotation.class));
    Assert.assertEquals("NNP", tokens1.get(5).get(CoreAnnotations.PartOfSpeechAnnotation.class));
    Assert.assertEquals("work", tokens1.get(3).get(CoreAnnotations.LemmaAnnotation.class));
    Assert.assertEquals(".", tokens1.get(6).get(CoreAnnotations.LemmaAnnotation.class));
    Assert.assertEquals("ORGANIZATION", tokens1.get(5).get(CoreAnnotations.NamedEntityTagAnnotation.class));
   
    // sentence 2
    String text2 = "He's in Seattle!";
    CoreMap sentence2 = sentences.get(1);
    Assert.assertEquals(text2, sentence2.toString());
    Assert.assertEquals(text2, sentence2.get(CoreAnnotations.TextAnnotation.class));
    Assert.assertEquals(37, (int)sentence2.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class));
    Assert.assertEquals(53, (int)sentence2.get(CoreAnnotations.CharacterOffsetEndAnnotation.class));
    Assert.assertEquals(7, (int)sentence2.get(CoreAnnotations.TokenBeginAnnotation.class));
    Assert.assertEquals(12, (int)sentence2.get(CoreAnnotations.TokenEndAnnotation.class));

    // sentence 2 tree (note error on Seattle, caused by part of speech tagger)
    Tree tree2 = Tree.valueOf("(ROOT (S (NP (PRP He)) (VP (VBZ 's) (PP (IN in) " +
        "(NP (NNP Seattle)))) (. !)))");
    Assert.assertEquals(tree2, sentence2.get(TreeCoreAnnotations.TreeAnnotation.class));
   
    // sentence 2 tokens
    String tokenText2 = "He 's in Seattle !";
    List<CoreLabel> tokens2 = sentence2.get(CoreAnnotations.TokensAnnotation.class);
    Assert.assertNotNull(tokens2);
    Assert.assertEquals(5, tokens2.size());
    Assert.assertEquals(tokenText2, join(tokens2));
    Assert.assertEquals(39, (int)tokens2.get(1).get(CoreAnnotations.CharacterOffsetBeginAnnotation.class));
    Assert.assertEquals(41, (int)tokens2.get(1).get(CoreAnnotations.CharacterOffsetEndAnnotation.class));
View Full Code Here

            }
          }
          // Get merged element
          int groupEnd = matchResult.end(group);
          if (groupEnd - groupStart >= 1) {
            CoreMap merged = aggregator.merge(elements, groupStart, groupEnd);
            mergedElements.add(merged);
            last = groupEnd;

            // Fiddle with matched group indices
            res.matchedGroups[mergedGroup].matchBegin = mergedElements.size()-1;
View Full Code Here

    List<Tree> leaves = tree.getLeaves();
    List<CoreLabel> tokens = rel.getSentence().get(TokensAnnotation.class);

    // this assumes that both args are in the same sentence as the relation object
    // let's check for this to be safe
    CoreMap relSentence = rel.getSentence();
    CoreMap arg0Sentence = arg0.getSentence();
    CoreMap arg1Sentence = arg1.getSentence();
    if(arg0Sentence != relSentence){
      System.err.println("WARNING: Found relation with arg0 in a different sentence: " + rel);
      System.err.println("Relation sentence: " + relSentence.get(TextAnnotation.class));
      System.err.println("Arg0 sentence: " + arg0Sentence.get(TextAnnotation.class));
      return false;
    }
    if(arg1Sentence != relSentence){
      System.err.println("WARNING: Found relation with arg1 in a different sentence: " + rel);
      System.err.println("Relation sentence: " + relSentence.get(TextAnnotation.class));
      System.err.println("Arg1 sentence: " + arg1Sentence.get(TextAnnotation.class));
      return false;
    }

    // Checklist keeps track of which features have been handled by an if clause
    // Should be empty after all the clauses have been gone through.
    List<String> checklist = new ArrayList<String>(types);

    // arg_type: concatenation of the entity types of the args, e.g.
    // "arg1type=Loc_and_arg2type=Org"
    // arg_subtype: similar, for entity subtypes
    if (usingFeature(types, checklist, "arg_type")) {
      features.setCount("arg1type=" + arg0.getType() + "_and_arg2type=" + arg1.getType(), 1.0);
    }
    if (usingFeature(types,checklist,"arg_subtype")) {
      features.setCount("arg1subtype="+arg0.getSubType()+"_and_arg2subtype="+arg1.getSubType(),1.0);
    }

    // arg_order: which arg comes first in the sentence
    if (usingFeature(types, checklist, "arg_order")) {
      if (arg0.getSyntacticHeadTokenPosition() < arg1.getSyntacticHeadTokenPosition())
        features.setCount("arg1BeforeArg2", 1.0);
    }
    // same_head: whether the two args share the same syntactic head token
    if (usingFeature(types, checklist, "same_head")) {
      if (arg0.getSyntacticHeadTokenPosition() == arg1.getSyntacticHeadTokenPosition())
        features.setCount("arguments_have_same_head",1.0);
    }

    // full_tree_path: Path from one arg to the other in the phrase structure tree,
    // e.g., NNP -> PP -> NN <- NNP
    if (usingFeature(types, checklist, "full_tree_path")) {
      //System.err.println("ARG0: " + arg0);
      //System.err.println("ARG0 HEAD: " + arg0.getSyntacticHeadTokenPosition());
      //System.err.println("TREE: " + tree);
      //System.err.println("SENTENCE: " + sentToString(arg0.getSentence()));
      if(arg0.getSyntacticHeadTokenPosition() < leaves.size() && arg1.getSyntacticHeadTokenPosition() < leaves.size()){
        Tree arg0preterm = leaves.get(arg0.getSyntacticHeadTokenPosition()).parent(tree);
        Tree arg1preterm = leaves.get(arg1.getSyntacticHeadTokenPosition()).parent(tree);
        Tree join = tree.joinNode(arg0preterm, arg1preterm);
        StringBuilder pathStringBuilder = new StringBuilder();
        List<Tree> pathUp = join.dominationPath(arg0preterm);
        Collections.reverse(pathUp);
        for (Tree node : pathUp) {
          if (node != join) {
            pathStringBuilder.append(node.label().value() + " <- ");
          }
        }

        for (Tree node : join.dominationPath(arg1preterm)) {
          pathStringBuilder.append(((node == join) ? "" : " -> ") + node.label().value());
        }
        String pathString = pathStringBuilder.toString();
        if(logger != null && ! rel.getType().equals(RelationMention.UNRELATED)) logger.info("full_tree_path: " + pathString);
        features.setCount("treepath:"+pathString, 1.0);
      } else {
        System.err.println("WARNING: found weird argument offsets. Most likely because arguments appear in different sentences than the relation:");
        System.err.println("ARG0: " + arg0);
        System.err.println("ARG0 HEAD: " + arg0.getSyntacticHeadTokenPosition());
        System.err.println("ARG0 SENTENCE: " + sentToString(arg0.getSentence()));
        System.err.println("ARG1: " + arg1);
        System.err.println("ARG1 HEAD: " + arg1.getSyntacticHeadTokenPosition());
        System.err.println("ARG1 SENTENCE: " + sentToString(arg1.getSentence()));
        System.err.println("RELATION TREE: " + tree);
      }
    }

    int pathLength = tree.pathNodeToNode(tree.getLeaves().get(arg0.getSyntacticHeadTokenPosition()),
            tree.getLeaves().get(arg1.getSyntacticHeadTokenPosition())).size();
    // path_length: Length of the path in the phrase structure parse tree, integer-valued feature
    if (usingFeature(types, checklist, "path_length")) {
      features.setCount("path_length", pathLength);
    }
    // path_length_binary: Length of the path in the phrase structure parse tree, binary features
    if (usingFeature(types, checklist, "path_length_binary")) {
      features.setCount("path_length_" + pathLength, 1.0);
    }

    /* entity_order
           * This tells you for each of the two args
           * whether there are other entities before or after that arg.
           * In particular, it can tell whether an arg is the first entity of its type in the sentence
           * (which can be useful for example for telling the gameWinner and gameLoser in NFL).
           * TODO: restrict this feature so that it only looks for
           * entities of the same type?
           * */
    if (usingFeature(types, checklist, "entity_order")) {
      for (int i = 0; i < rel.getArgs().size(); i++) {
        // We already checked the class of the args at the beginning of the method
        EntityMention arg = (EntityMention) rel.getArgs().get(i);
        if(rel.getSentence().get(MachineReadingAnnotations.EntityMentionsAnnotation.class) != null) { // may be null due to annotation error
          for (EntityMention otherArg : rel.getSentence().get(MachineReadingAnnotations.EntityMentionsAnnotation.class)) {
            String feature;
            if (otherArg.getSyntacticHeadTokenPosition() > arg.getSyntacticHeadTokenPosition()) {
              feature = "arg" + i + "_before_" + otherArg.getType();
              features.setCount(feature, 1.0);
            }
            if (otherArg.getSyntacticHeadTokenPosition() < arg.getSyntacticHeadTokenPosition()) {
              feature = "arg" + i + "_after_" + otherArg.getType();
              features.setCount(feature, 1.0);
            }
          }
        }
      }
    }

    // surface_distance: Number of tokens in the sentence between the two words, integer-valued feature
    int surfaceDistance = Math.abs(arg0.getSyntacticHeadTokenPosition() - arg1.getSyntacticHeadTokenPosition());
    if (usingFeature(types, checklist, "surface_distance")) {
      features.setCount("surface_distance", surfaceDistance);
    }
    // surface_distance_binary: Number of tokens in the sentence between the two words, binary features
    if (usingFeature(types, checklist, "surface_distance_binary")) {
      features.setCount("surface_distance_" + surfaceDistance, 1.0);
    }
    // surface_distance_bins: number of tokens between the two args, binned to several intervals
    if(usingFeature(types, checklist, "surface_distance_bins")) {
      if(surfaceDistance < 4){
        features.setCount("surface_distance_bin" + surfaceDistance, 1.0);
      } else if(surfaceDistance < 6){
        features.setCount("surface_distance_bin_lt6", 1.0);
      } else if(surfaceDistance < 10) {
        features.setCount("surface_distance_bin_lt10", 1.0);
      } else {
        features.setCount("surface_distance_bin_ge10", 1.0);
      }
    }

    // separate_surface_windows: windows of 1,2,3 tokens before and after args, for each arg separately
    // Separate features are generated for windows to the left and to the right of the args.
    // Features are concatenations of words in the window (or NULL for sentence boundary).
    //
    // conjunction_surface_windows: concatenation of the windows of the two args
    //
    // separate_surface_windows_POS: windows of POS tags of size 1,2,3 for each arg
    //
    // conjunction_surface_windows_POS: concatenation of windows of the args

    List<EntityMention> args = new ArrayList<EntityMention>();
    args.add(arg0); args.add(arg1);
    for (int windowSize = 1; windowSize <= 3; windowSize++) {

      String[] leftWindow, rightWindow, leftWindowPOS, rightWindowPOS;
      leftWindow = new String[2];
      rightWindow = new String[2];
      leftWindowPOS = new String[2];
      rightWindowPOS = new String[2];

      for (int argn = 0; argn <= 1; argn++) {
        int ind = args.get(argn).getSyntacticHeadTokenPosition();
        for (int winnum = 1; winnum <= windowSize; winnum++) {
          int windex = ind - winnum;
          if (windex > 0) {
            leftWindow[argn] = leaves.get(windex).label().value() + "_" + leftWindow[argn];
            leftWindowPOS[argn] = leaves.get(windex).parent(tree).label().value() + "_" + leftWindowPOS[argn];
          } else {
            leftWindow[argn] = "NULL_" + leftWindow[argn];
            leftWindowPOS[argn] = "NULL_" + leftWindowPOS[argn];
          }
          windex = ind + winnum;
          if (windex < leaves.size()) {
            rightWindow[argn] = rightWindow[argn] + "_" + leaves.get(windex).label().value();
            rightWindowPOS[argn] = rightWindowPOS[argn] + "_" + leaves.get(windex).parent(tree).label().value();
          } else {
            rightWindow[argn] = rightWindow[argn] + "_NULL";
            rightWindowPOS[argn] = rightWindowPOS[argn] + "_NULL";
          }
        }
        if (usingFeature(types, checklist, "separate_surface_windows")) {
          features.setCount("left_window_"+windowSize+"_arg_" + argn + ": " + leftWindow[argn], 1.0);
          features.setCount("left_window_"+windowSize+"_POS_arg_" + argn + ": " + leftWindowPOS[argn], 1.0);
        }
        if (usingFeature(types, checklist, "separate_surface_windows_POS")) {
          features.setCount("right_window_"+windowSize+"_arg_" + argn + ": " + rightWindow[argn], 1.0);
          features.setCount("right_window_"+windowSize+"_POS_arg_" + argn + ": " + rightWindowPOS[argn], 1.0);
        }

      }
      if (usingFeature(types, checklist, "conjunction_surface_windows")) {
        features.setCount("left_windows_"+windowSize+": " + leftWindow[0] + "__" + leftWindow[1], 1.0);
        features.setCount("right_windows_"+windowSize+": " + rightWindow[0] + "__" + rightWindow[1], 1.0);
      }
      if (usingFeature(types, checklist, "conjunction_surface_windows_POS")) {
        features.setCount("left_windows_"+windowSize+"_POS: " + leftWindowPOS[0] + "__" + leftWindowPOS[1], 1.0);
        features.setCount("right_windows_"+windowSize+"_POS: " + rightWindowPOS[0] + "__" + rightWindowPOS[1], 1.0);
      }
    }

    // arg_words:  The actual arg tokens as separate features, and concatenated
    String word0 = leaves.get(arg0.getSyntacticHeadTokenPosition()).label().value();
    String word1 = leaves.get(arg1.getSyntacticHeadTokenPosition()).label().value();
    if (usingFeature(types, checklist, "arg_words")) {
      if(doNotLexicalizeFirstArg == false)
        features.setCount("word_arg0: " + word0, 1.0);
      features.setCount("word_arg1: " + word1, 1.0);
      if(doNotLexicalizeFirstArg == false)
        features.setCount("words: " + word0 + "__" + word1, 1.0);
    }

    // arg_POS:  POS tags of the args, as separate features and concatenated
    String pos0 = leaves.get(arg0.getSyntacticHeadTokenPosition()).parent(tree).label().value();
    String pos1 = leaves.get(arg1.getSyntacticHeadTokenPosition()).parent(tree).label().value();
    if (usingFeature(types, checklist, "arg_POS")) {
      features.setCount("POS_arg0: " + pos0, 1.0);
      features.setCount("POS_arg1: " + pos1, 1.0);
      features.setCount("POSs: " + pos0 + "__" + pos1, 1.0);
    }

    // adjacent_words: words immediately to the left and right of the args
    if(usingFeature(types, checklist, "adjacent_words")){
      for(int i = 0; i < rel.getArgs().size(); i ++){
        Span s = ((EntityMention) rel.getArg(i)).getHead();
        if(s.start() > 0){
          String v = tokens.get(s.start() - 1).word();
          features.setCount("leftarg" + i + "-" + v, 1.0);
        }
        if(s.end() < tokens.size()){
          String v = tokens.get(s.end()).word();
          features.setCount("rightarg" + i + "-" + v, 1.0);
        }
      }
    }

    // entities_between_args:  binary feature for each type specifying whether there is an entity of that type in the sentence
    // between the two args.
    // e.g. "entity_between_args: Loc" means there is at least one entity of type Loc between the two args
    if (usingFeature(types, checklist, "entities_between_args")) {
      CoreMap sent = rel.getSentence();
      if(sent == null) throw new RuntimeException("NULL sentence for relation " + rel);
      List<EntityMention> relArgs = sent.get(MachineReadingAnnotations.EntityMentionsAnnotation.class);
      if(relArgs != null) { // may be null due to annotation errors!
        for (EntityMention arg : relArgs) {
          if ((arg.getSyntacticHeadTokenPosition() > arg0.getSyntacticHeadTokenPosition() && arg.getSyntacticHeadTokenPosition() < arg1.getSyntacticHeadTokenPosition())
                  || (arg.getSyntacticHeadTokenPosition() > arg1.getSyntacticHeadTokenPosition() && arg.getSyntacticHeadTokenPosition() < arg0.getSyntacticHeadTokenPosition())) {
            features.setCount("entity_between_args: " + arg.getType(), 1.0);
View Full Code Here

   * Test a bug a user reported where the text would wind up having the list toString used, adding extra []
   */
  public void testFromList() {
    List<CoreMap> sentences = Generics.newArrayList();

    CoreMap sentence = new ArrayCoreMap();
    List<CoreLabel> words = Sentence.toCoreLabelList("This", "is", "a", "test", ".");
    sentence.set(CoreAnnotations.TokensAnnotation.class, words);
    sentences.add(sentence);

    Annotation annotation = new Annotation(sentences);
    assertEquals("This is a test .", annotation.toString());

    sentence.set(CoreAnnotations.TextAnnotation.class, "This is a test.");
    annotation = new Annotation(sentences);
    assertEquals("This is a test.", annotation.toString());
  }
View Full Code Here

        isCompatible = checkTokensCompatible.apply(p);
      }
      if (isEndOfChunk(prevTagType, curTagType) || !isCompatible) {
        int tokenEnd = i;
        if (tokenBegin >= 0 && tokenEnd > tokenBegin) {
          CoreMap chunk = ChunkAnnotationUtils.getAnnotatedChunk(tokens, tokenBegin, tokenEnd, totalTokensOffset,
              tokenChunkKey, textKey, tokenLabelKey);
          chunk.set(labelKey, prevTagType.type);
          chunks.add(chunk);
          tokenBegin = -1;
        }
      }
      if (isStartOfChunk(prevTagType, curTagType) || (!isCompatible && isChunk(curTagType))) {
        if (tokenBegin >= 0) {
          throw new RuntimeException("New chunk started, prev chunk not ended yet!");
        }
        tokenBegin = i;
      }
      prevTagType = curTagType;
    }
    if (tokenBegin >= 0) {
      CoreMap chunk = ChunkAnnotationUtils.getAnnotatedChunk(tokens, tokenBegin, tokens.size(), totalTokensOffset,
          tokenChunkKey, textKey, tokenLabelKey);
      chunk.set(labelKey, prevTagType.type);
      chunks.add(chunk);
    }
//    System.out.println("number of chunks " +  chunks.size());
    return chunks;
  }
View Full Code Here

  }

  private static String getIndexStrOrEmpty(Label lab) {
    String ans = "";
    if (lab instanceof CoreMap) {
      CoreMap aml = (CoreMap) lab;
      int idx = aml.<Integer, IndexAnnotation>get(IndexAnnotation.class);
      if (idx >= 0) {
        ans = " idx=\"" + idx + "\"";
      }
    }
    return ans;
View Full Code Here

    private ComparatorHolder() {}

    private static class DependencyIdxComparator implements Comparator<Dependency> {

      public int compare(Dependency dep1, Dependency dep2) {
        CoreMap dep1lab = (CoreMap) dep1.dependent();
        CoreMap dep2lab = (CoreMap) dep2.dependent();
        int dep1idx = dep1lab.<Integer, IndexAnnotation>get(IndexAnnotation.class);
        int dep2idx = dep2lab.<Integer, IndexAnnotation>get(IndexAnnotation.class);
        return dep1idx - dep2idx;
      }
View Full Code Here

TOP

Related Classes of edu.stanford.nlp.util.CoreMap

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.