Package edu.stanford.nlp.trees

Examples of edu.stanford.nlp.trees.Tree


      final TregexPattern pMWE = TregexPattern.compile("/^MW/");
      for(Tree t; (t = tr.readTree()) != null;) {
        //Count MWE statistics
        TregexMatcher m = pMWE.matcher(t);
        while(m.findNextMatchingNode()) {
          Tree match = m.getMatch();
          String label = match.value();
          List<CoreLabel> yield = match.taggedLabeledYield();
          StringBuilder termYield = new StringBuilder();
          StringBuilder posYield = new StringBuilder();
          for(CoreLabel cl : yield) {
            termYield.append(cl.word()).append(" ");
            posYield.append(cl.tag()).append(" ");
View Full Code Here


  private static final TregexPattern npOrPrpMentionPattern = TregexPattern.compile("/^(?:NP|PRP)/");

  protected static void extractNPorPRP(CoreMap s, List<Mention> mentions, Set<IntPair> mentionSpanSet, Set<IntPair> namedEntitySpanSet) {
    List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class);
    Tree tree = s.get(TreeCoreAnnotations.TreeAnnotation.class);
    tree.indexLeaves();
    SemanticGraph dependency = s.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class);

    TregexPattern tgrepPattern = npOrPrpMentionPattern;
    TregexMatcher matcher = tgrepPattern.matcher(tree);
    while (matcher.find()) {
      Tree t = matcher.getMatch();
      List<Tree> mLeaves = t.getLeaves();
      int beginIdx = ((CoreLabel)mLeaves.get(0).label()).get(CoreAnnotations.IndexAnnotation.class)-1;
      int endIdx = ((CoreLabel)mLeaves.get(mLeaves.size()-1).label()).get(CoreAnnotations.IndexAnnotation.class);
      if (",".equals(sent.get(endIdx-1).word())) { endIdx--; } // try not to have span that ends with ,
      IntPair mSpan = new IntPair(beginIdx, endIdx);
      if(!mentionSpanSet.contains(mSpan) && !insideNE(mSpan, namedEntitySpanSet)) {
View Full Code Here

  /** Extract enumerations (A, B, and C) */
  private static final TregexPattern enumerationsMentionPattern = TregexPattern.compile("NP < (/^(?:NP|NNP|NML)/=m1 $.. (/^CC|,/ $.. /^(?:NP|NNP|NML)/=m2))");

  protected static void extractEnumerations(CoreMap s, List<Mention> mentions, Set<IntPair> mentionSpanSet, Set<IntPair> namedEntitySpanSet) {
    List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class);
    Tree tree = s.get(TreeCoreAnnotations.TreeAnnotation.class);
    SemanticGraph dependency = s.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class);

    TregexPattern tgrepPattern = enumerationsMentionPattern;
    TregexMatcher matcher = tgrepPattern.matcher(tree);
    Map<IntPair, Tree> spanToMentionSubTree = Generics.newHashMap();
    while (matcher.find()) {
      matcher.getMatch();
      Tree m1 = matcher.getNode("m1");
      Tree m2 = matcher.getNode("m2");

      List<Tree> mLeaves = m1.getLeaves();
      int beginIdx = ((CoreLabel)mLeaves.get(0).label()).get(CoreAnnotations.IndexAnnotation.class)-1;
      int endIdx = ((CoreLabel)mLeaves.get(mLeaves.size()-1).label()).get(CoreAnnotations.IndexAnnotation.class);
      spanToMentionSubTree.put(new IntPair(beginIdx, endIdx), m1);

      mLeaves = m2.getLeaves();
      beginIdx = ((CoreLabel)mLeaves.get(0).label()).get(CoreAnnotations.IndexAnnotation.class)-1;
      endIdx = ((CoreLabel)mLeaves.get(mLeaves.size()-1).label()).get(CoreAnnotations.IndexAnnotation.class);
      spanToMentionSubTree.put(new IntPair(beginIdx, endIdx), m2);
    }

View Full Code Here

    }
    return false;
  }

  protected void findHead(CoreMap s, List<Mention> mentions) {
    Tree tree = s.get(TreeCoreAnnotations.TreeAnnotation.class);
    List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class);
    tree.indexSpans(0);
    for (Mention m : mentions){
      Tree head = findSyntacticHead(m, tree, sent);
      m.headIndex = ((CoreLabel) head.label()).get(CoreAnnotations.IndexAnnotation.class)-1;
      m.headWord = sent.get(m.headIndex);
      m.headString = m.headWord.get(CoreAnnotations.TextAnnotation.class).toLowerCase(Locale.ENGLISH);
      int start = m.headIndex - m.startIndex;
      if (start < 0 || start >= m.originalSpan.size()) {
        SieveCoreferenceSystem.logger.warning("Invalid index for head " + start + "=" + m.headIndex + "-" + m.startIndex
View Full Code Here

      String lastWord = m.originalSpan.get(m.originalSpan.size()-1).get(CoreAnnotations.TextAnnotation.class);
        if((lastWord.equals("'s") || lastWord.equals("'"))
            && m.originalSpan.size() != 1 ) endIdx--;
    }

    Tree exactMatch = findTreeWithSpan(root, m.startIndex, endIdx);
    //
    // found an exact match
    //
    if (exactMatch != null) {
      return safeHead(exactMatch, endIdx);
    }

    // no exact match found
    // in this case, we parse the actual extent of the mention, embedded in a sentence
    // context, so as to make the parser work better :-)
    if (allowReparsing) {
      int approximateness = 0;
      List<CoreLabel> extentTokens = new ArrayList<CoreLabel>();
      extentTokens.add(initCoreLabel("It"));
      extentTokens.add(initCoreLabel("was"));
      final int ADDED_WORDS = 2;
      for (int i = m.startIndex; i < endIdx; i++) {
        // Add everything except separated dashes! The separated dashes mess with the parser too badly.
        CoreLabel label = tokens.get(i);
        if ( ! "-".equals(label.word())) {
          extentTokens.add(tokens.get(i));
        } else {
          approximateness++;
        }
      }
      extentTokens.add(initCoreLabel("."));

      // constrain the parse to the part we're interested in.
      // Starting from ADDED_WORDS comes from skipping "It was".
      // -1 to exclude the period.
      // We now let it be any kind of nominal constituent, since there
      // are VP and S ones
      ParserConstraint constraint = new ParserConstraint(ADDED_WORDS, extentTokens.size() - 1, Pattern.compile(".*"));
      List<ParserConstraint> constraints = Collections.singletonList(constraint);
      Tree tree = parse(extentTokens, constraints);
      convertToCoreLabels(tree)// now unnecessary, as parser uses CoreLabels?
      tree.indexSpans(m.startIndex - ADDED_WORDS)// remember it has ADDED_WORDS extra words at the beginning
      Tree subtree = findPartialSpan(tree, m.startIndex);
      // There was a possible problem that with a crazy parse, extentHead could be one of the added words, not a real word!
      // Now we make sure in findPartialSpan that it can't be before the real start, and in safeHead, we disallow something
      // passed the right end (that is, just that final period).
      Tree extentHead = safeHead(subtree, endIdx);
      assert(extentHead != null);
      // extentHead is a child in the local extent parse tree. we need to find the corresponding node in the main tree
      // Because we deleted dashes, it's index will be >= the index in the extent parse tree
      CoreLabel l = (CoreLabel) extentHead.label();
      Tree realHead = funkyFindLeafWithApproximateSpan(root, l.value(), l.get(CoreAnnotations.BeginIndexAnnotation.class), approximateness);
      assert(realHead != null);
      return realHead;
    }

    // If reparsing wasn't allowed, try to find a span in the tree
    // which happens to have the head
    Tree wordMatch = findTreeWithSmallestSpan(root, m.startIndex, endIdx);
    if (wordMatch != null) {
      Tree head = safeHead(wordMatch, endIdx);
      if (head != null) {
        int index = ((CoreLabel) head.label()).get(CoreAnnotations.IndexAnnotation.class)-1;
        if (index >= m.startIndex && index < endIdx) {
          return head;
        }
      }
    }

    // If that didn't work, guess that it's the last word

    int lastNounIdx = endIdx-1;
    for(int i=m.startIndex ; i < m.endIndex ; i++) {
      if(tokens.get(i).tag().startsWith("N")) lastNounIdx = i;
      else if(tokens.get(i).tag().startsWith("W")) break;
    }

    List<Tree> leaves = root.getLeaves();
    Tree endLeaf = leaves.get(lastNounIdx);
    return endLeaf;
  }
View Full Code Here

  private Tree safeHead(Tree top, int endIndex) {
    // The trees passed in do not have the CoordinationTransformer
    // applied, but that just means the SemanticHeadFinder results are
    // slightly worse.
    Tree head = top.headTerminal(headFinder);
    // One obscure failure case is that the added period becomes the head. Disallow this.
    if (head != null) {
      Integer headIndexInteger = ((CoreLabel) head.label()).get(CoreAnnotations.IndexAnnotation.class);
      if (headIndexInteger != null) {
        int headIndex = headIndexInteger - 1;
        if (headIndex < endIndex) {
          return head;
        }
      }
    }
    // if no head found return the right-most leaf
    List<Tree> leaves = top.getLeaves();
    int candidate = leaves.size() - 1;
    while (candidate >= 0) {
      head = leaves.get(candidate);
      Integer headIndexInteger = ((CoreLabel) head.label()).get(CoreAnnotations.IndexAnnotation.class);
      if (headIndexInteger != null) {
        int headIndex = headIndexInteger - 1;
        if (headIndex < endIndex) {
          return head;
        }
View Full Code Here

    return top;
  }

  static Tree findTreeWithSmallestSpan(Tree tree, int start, int end) {
    List<Tree> leaves = tree.getLeaves();
    Tree startLeaf = leaves.get(start);
    Tree endLeaf = leaves.get(end - 1);
    return Trees.getLowestCommonAncestor(Arrays.asList(startLeaf, endLeaf), tree);
  }
View Full Code Here

    }

    // otherwise, check inside children - a match is possible
    for (Tree kid : tree.children()) {
      if (kid == null) continue;
      Tree ret = findTreeWithSpan(kid, start, end);
      // found matching child
      if (ret != null) return ret;
    }

    // no match
View Full Code Here

    return null;
  }

  /** Filter out all spurious mentions */
  protected static void removeSpuriousMentions(CoreMap s, List<Mention> mentions, Dictionaries dict) {
    Tree tree = s.get(TreeCoreAnnotations.TreeAnnotation.class);
    List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class);
    Set<Mention> remove = Generics.newHashSet();


    for(Mention m : mentions){
View Full Code Here

  private static boolean checkPleonastic(Mention m, Tree tree, TregexPattern tgrepPattern) {
    try {
      TregexMatcher matcher = tgrepPattern.matcher(tree);
      while (matcher.find()) {
        Tree np1 = matcher.getNode("m1");
        if (((CoreLabel)np1.label()).get(CoreAnnotations.BeginIndexAnnotation.class)+1 == m.headWord.get(CoreAnnotations.IndexAnnotation.class)) {
          return true;
        }
      }
    } catch (Exception e) {
      e.printStackTrace();
View Full Code Here

TOP

Related Classes of edu.stanford.nlp.trees.Tree

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.