Examples of edu.stanford.nlp.ling.CategoryWordTag

edu.stanford.nlp.ling.CategoryWordTag
A CategoryWordTag object acts as a complex Label which contains a category, a head word, and a tag. The category label is the primary value @author Christopher Manning

    StringBuilder catSB = new StringBuilder(cat);
    for (String annotation : annotations) {
      catSB.append(annotation);
    }


    t.setLabel(new CategoryWordTag(catSB.toString(), word, tag));
    return t;
  }

View Full Code Here

    if (DEBUG) {
      System.err.println("Span "+start+" to "+end+" word "+wordIndex.get(words[hWord])+"/"+hWord+" tag "+tagIndex.get(hTag)+"/"+hTag+" score "+iScore(start, end, hWord, hTag));
    }
    String headWordStr = wordIndex.get(words[hWord]);
    String headTagStr = tagIndex.get(hTag);
    Label headLabel = new CategoryWordTag(headWordStr, headWordStr, headTagStr);
    int numTags = tagIndex.size();


    // deal with span 1
    if (end - start == 1) {
      Tree leaf = tf.newLeaf(new Word(headWordStr));

View Full Code Here

  public void addRoot(Tree t) {
    if (t.isLeaf()) {
      System.err.println("Warning: tree is leaf: " + t);
      t = tf.newTreeNode(tlp.startSymbol(), Collections.singletonList(t));
    }
    t.setLabel(new CategoryWordTag(tlp.startSymbol(), Lexicon.BOUNDARY, Lexicon.BOUNDARY_TAG));
    List<Tree> preTermChildList = new ArrayList<Tree>();
    Tree boundaryTerm = tf.newLeaf(new Word(Lexicon.BOUNDARY));//CategoryWordTag(Lexicon.BOUNDARY,Lexicon.BOUNDARY,""));
    preTermChildList.add(boundaryTerm);
    Tree boundaryPreTerm = tf.newTreeNode(new CategoryWordTag(Lexicon.BOUNDARY_TAG, Lexicon.BOUNDARY, Lexicon.BOUNDARY_TAG), preTermChildList);
    List<Tree> childList = t.getChildrenAsList();
    childList.add(boundaryPreTerm);
    t.setChildren(childList);
  }

View Full Code Here

            tag = null;
          } else if (headChild.isLeaf()) {
            tag = cat;
            word = headChild.label().value();
          } else {
            CategoryWordTag headLabel = (CategoryWordTag) headChild.label();
            word = headLabel.word();
            tag = headLabel.tag();
          }
          Label label = new CategoryWordTag(cat, word, tag);
          t.setLabel(label);
        }
      }
      return t;
    }

View Full Code Here

        // }
      }
      // if (children.isEmpty()) {
      //   return null;
      // }
      CategoryWordTag newLabel = new CategoryWordTag(lab);
      newLabel.setCategory(s);
      if (lab instanceof HasTag) {
        String tag = ((HasTag) lab).tag();
        tag = treebankLanguagePack().basicCategory(tag);
        newLabel.setTag(tag);
      }
      Tree node = tf.newTreeNode(newLabel, children);
      node.setScore(tree.score());
      return node;
    }

View Full Code Here

      for (int cNum = 0; cNum < numKids; cNum++) {
        Tree child = tree.getChild(cNum);
        Tree newChild = transformTree(child);
        children.add(newChild);
      }
      CategoryWordTag newLabel = new CategoryWordTag(lab);
      newLabel.setCategory(s);
      if (lab instanceof HasTag) {
        String tag = ((HasTag) lab).tag();
        tag = treebankLanguagePack().basicCategory(tag);
        tag = treebankLanguagePack().stripGF(tag);


        newLabel.setTag(tag);
      }
      Tree node = tf.newTreeNode(newLabel, children);
      node.setScore(tree.score());
      return node;
    }

View Full Code Here

        tag += "^=lVV";
      }


      // end Chinese-specific tag splits


      Label label = new CategoryWordTag(tag, word, tag);
      t.setLabel(label);
    } else {
      // it's a phrasal category
      Tree[] kids = t.children();


      // Chinese-specific category splits
      List<String> leftSis = listBasicCategories(SisterAnnotationStats.leftSisterLabels(t, parent));
      List<String> rightSis = listBasicCategories(SisterAnnotationStats.rightSisterLabels(t, parent));


      if (paRootDtr && baseParentStr.equals("ROOT")) {
        category += "^ROOT";
      }


      if (markIPsisterBA && baseCategory.equals("IP")) {
        if (leftSis.contains("BA")) {
          category += "=BA";
          //System.out.println("Found IP sister of BA");
        }
      }


      if (dominatesV && hasV(t.preTerminalYield())) {
        // mark categories containing a verb
        category += "-v";
      }


      if (markIPsisterVVorP && baseCategory.equals("IP")) {
        // todo: cdm: is just looking for "P" here selective enough??
        if (leftSis.contains("VV") || leftSis.contains("P")) {
          category += "=VVP";
        }
      }


      if (markIPsisDEC && baseCategory.equals("IP")) {
        if (rightSis.contains("DEC")) {
          category += "=DEC";
          //System.out.println("Found prenominal IP");
        }
      }


      if (baseCategory.equals("VP")) {
        // cdm 2008: this used to just check that it startsWith("VP"), but
        // I think that was bad because it also matched VPT verb compounds
        if (chineseSplitVP == 3) {
          boolean hasCC = false;
          boolean hasPU = false;
          boolean hasLexV = false;
          for (Tree kid : kids) {
            if (kid.label().value().startsWith("CC")) {
              hasCC = true;
            } else if (kid.label().value().startsWith("PU")) {
              hasPU = true;
            } else if (StringUtils.lookingAt(kid.label().value(), "(V[ACEV]|VCD|VCP|VNV|VPT|VRD|VSB)")) {
              hasLexV = true;
            }
          }
          if (hasCC || (hasPU && ! hasLexV)) {
            category += "-CRD";
            //System.out.println("Found coordinate VP"); // testing
          } else if (hasLexV) {
            category += "-COMP";
            //System.out.println("Found complementing VP"); // testing
          } else {
            category += "-ADJT";
            //System.out.println("Found adjoining VP"); // testing
          }
        } else if (chineseSplitVP >= 1) {
          boolean hasBA = false;
          for (Tree kid : kids) {
            if (kid.label().value().startsWith("BA")) {
              hasBA = true;
            } else if (chineseSplitVP == 2 && tlp.basicCategory(kid.label().value()).equals("VP")) {
              for (Tree kidkid : kid.children()) {
                if (kidkid.label().value().startsWith("BA")) {
                  hasBA = true;
                }
              }
            }
          }
          if (hasBA) {
            category += "-BA";
          }
        }
      }


      if (markVPadjunct && baseParentStr.equals("VP")) {
        // cdm 2008: This used to use startsWith("VP") but changed to baseCat
        Tree[] sisters = parent.children();
        boolean hasVPsister = false;
        boolean hasCC = false;
        boolean hasPU = false;
        boolean hasLexV = false;
        for (Tree sister : sisters) {
          if (tlp.basicCategory(sister.label().value()).equals("VP")) {
            hasVPsister = true;
          }
          if (sister.label().value().startsWith("CC")) {
            hasCC = true;
          }
          if (sister.label().value().startsWith("PU")) {
            hasPU = true;
          }
          if (StringUtils.lookingAt(sister.label().value(), "(V[ACEV]|VCD|VCP|VNV|VPT|VRD|VSB)")) {
            hasLexV = true;
          }
        }
        if (hasVPsister && !(hasCC || hasPU || hasLexV)) {
          category += "-VPADJ";
          //System.out.println("Found adjunct of VP"); // testing
        }
      }


      if (markNPmodNP && baseCategory.equals("NP") && baseParentStr.equals("NP")) {
        if (rightSis.contains("NP")) {
          category += "=MODIFIERNP";
          //System.out.println("Found NP modifier of NP"); // testing
        }
      }


      if (markModifiedNP && baseCategory.equals("NP") && baseParentStr.equals("NP")) {
        if (rightSis.isEmpty() && (leftSis.contains("ADJP") || leftSis.contains("NP") || leftSis.contains("DNP") || leftSis.contains("QP") || leftSis.contains("CP") || leftSis.contains("PP"))) {
          category += "=MODIFIEDNP";
          //System.out.println("Found modified NP"); // testing
        }
      }


      if (markNPconj && baseCategory.equals("NP") && baseParentStr.equals("NP")) {
        if (rightSis.contains("CC") || rightSis.contains("PU") || leftSis.contains("CC") || leftSis.contains("PU")) {
          category += "=CONJ";
          //System.out.println("Found NP conjunct"); // testing
        }
      }


      if (markIPconj && baseCategory.equals("IP") && baseParentStr.equals("IP")) {
        Tree[] sisters = parent.children();
        boolean hasCommaSis = false;
        boolean hasIPSis = false;
        for (Tree sister : sisters) {
          if (ctlp.basicCategory(sister.label().value()).equals("PU") && ChineseTreebankLanguagePack.chineseCommaAcceptFilter().test(sister.children()[0].label().toString())) {
            hasCommaSis = true;
            //System.out.println("Found CommaSis"); // testing
          }
          if (ctlp.basicCategory(sister.label().value()).equals("IP") && sister != t) {
            hasIPSis = true;
          }
        }
        if (hasCommaSis && hasIPSis) {
          category += "-CONJ";
          //System.out.println("Found IP conjunct"); // testing
        }
      }


      if (unaryIP && baseCategory.equals("IP") && t.numChildren() == 1) {
        category += "-U";
        //System.out.println("Found unary IP"); //testing
      }
      if (unaryCP && baseCategory.equals("CP") && t.numChildren() == 1) {
        category += "-U";
        //System.out.println("Found unary CP"); //testing
      }


      if (splitBaseNP && baseCategory.equals("NP")) {
        if (t.isPrePreTerminal()) {
          category = category + "-B";
        }
      }


      //if (Test.verbose) printlnErr(baseCategory + " " + leftSis.toString()); //debugging


      if (markPostverbalPP && leftSis.contains("VV") && baseCategory.equals("PP")) {
        //System.out.println("Found post-verbal PP");
        category += "=lVV";
      }


      if ((markADgrandchildOfIP || gpaAD) && listBasicCategories(SisterAnnotationStats.kidLabels(t)).contains("AD")) {
        category += "^ADVP";
      }


      if (markCC) {
        // was: for (int i = 0; i < kids.length; i++) {
        // This second version takes an idea from Collins: don't count
        // marginal conjunctions which don't conjoin 2 things.
        for (int i = 1; i < kids.length - 1; i++) {
          String cat2 = kids[i].label().value();
          if (cat2.startsWith("CC")) {
            category += "-CC";
          }
        }
      }


      Label label = new CategoryWordTag(category, word, tag);
      t.setLabel(label);
    }
    return t;
  }

View Full Code Here


  protected Tree extractParse(Edge edge) {
    String head = wordIndex.get(words[edge.head]);
    String tag = tagIndex.get(edge.tag);
    String state = stateIndex.get(edge.state);
    Label label = new CategoryWordTag(state, head, tag);
    if (edge.backEdge == null && edge.backHook == null) {
      // leaf, but needs word terminal
      Tree leaf;
      if (originalLabels[edge.head] != null) {
        leaf = tf.newLeaf(originalLabels[edge.head]);

View Full Code Here

    StringBuilder catSB = new StringBuilder(cat);
    for (String annotation : annotations) {
      catSB.append(annotation);
    }


    t.setLabel(new CategoryWordTag(catSB.toString(), word, tag));
    return t;
  }

View Full Code Here

        } else {
          cat = cat2;
        }
      }
    }
    result = tf.newTreeNode(new CategoryWordTag(cat, word, cat), Collections.<Tree>emptyList());
    ArrayList<Tree> newKids = new ArrayList<Tree>();
    Tree[] kids = t.children();
    for (Tree kid : kids) {
      newKids.add(transformTreeHelper(kid, root, tf));
    }

View Full Code Here

0 1

TOP

Related Classes of edu.stanford.nlp.ling.CategoryWordTag

edu.stanford.nlp.parser.lexparser.AbstractTreebankParserParams$RemoveGFSubcategoryStripper

edu.stanford.nlp.parser.lexparser.AbstractTreebankParserParams$SubcategoryStripper

edu.stanford.nlp.parser.lexparser.BiLexPCFGParser

edu.stanford.nlp.parser.lexparser.ChineseTreebankParserParams

edu.stanford.nlp.parser.lexparser.ExhaustiveDependencyParser

edu.stanford.nlp.parser.lexparser.NegraPennTreebankParserParams

edu.stanford.nlp.parser.lexparser.PostSplitter

edu.stanford.nlp.parser.lexparser.TreeAnnotatorAndBinarizer

edu.stanford.nlp.parser.lexparser.TreeAnnotatorAndBinarizer$TreeNullAnnotator

edu.stanford.nlp.parser.lexparser.TueBaDZParserParams

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.