Package edu.stanford.nlp.trees

Examples of edu.stanford.nlp.trees.Tree$TreeIterator


  public List<TaggedWord> next() {
    if (next == null) {
      throw new NoSuchElementException("Iterator exhausted.");
    }
    Tree t = next;
    if (normalizer != null) {
      t = normalizer.normalizeWholeTree(t, t.treeFactory());
    }
    if (transformer != null) {
      t = t.transform(transformer);
    }
    findNext();
    return t.taggedYield();
  }
View Full Code Here


      headFinder = hf;
      this.lowerCase = lowerCase;
    }

    public String apply(TregexMatcher matcher) {
      Tree matchedTree = matcher.getMatch();

      Tree head = headFinder.determineHead(matchedTree);
      if (!head.isPrePreTerminal())
        return "";

      Tree lexicalHead = head.firstChild().firstChild();
      String headValue = lexicalHead.value();

      if (headValue != null) {
        if (lowerCase) headValue = headValue.toLowerCase();
        return '[' + headValue + ']';
      } else {
View Full Code Here

    new Pair("(a (b entonces,_yo))", "(a (MW_PHRASE?_b (MW? entonces) (MW? ,) (MW? yo)))"),
  };

  public void testMultiWordNormalization() {
    for (Pair<String, String> testCase : multiWordTestCases) {
      Tree head = readTree(testCase.first());
      for (Tree t : head) {
        if (t.isPrePreTerminal())
          tn.normalizeForMultiWord(t, tf);
      }

      assertEquals(testCase.second(), head.toString());
    }
  }
View Full Code Here

        MemoryTreebank trainTreebank;
        if (argMap.containsKey("-annotate")) {
          trainTreebank = new MemoryTreebank();
          TreeAnnotator annotator = new TreeAnnotator(ctpp.headFinder(), ctpp, op);
          for (Iterator iter = rawTrainTreebank.iterator(); iter.hasNext();) {
            Tree tree = (Tree) iter.next();
            tree = annotator.transformTree(tree);
            trainTreebank.add(tree);
          }
          System.err.println("Done annotating trees.");
        } else {
          trainTreebank = rawTrainTreebank;
        }
        lex.initializeTraining(trainTreebank.size());
        lex.train(trainTreebank);
        lex.finishTraining();
        System.err.println("Done training lexicon.");
        if (lexArgs.length == 3) {
          String filename = lexArgs.length == 3 ? lexArgs[2] : "parsers/chineseCharLex.ser.gz";
          System.err.println("Writing lexicon in serialized format to file " + filename + " ");
          System.err.flush();
          ObjectOutputStream out = IOUtils.writeStreamFromString(filename);
          out.writeObject(lex);
          out.close();
          System.err.println("done.");
        }
      } else {
        String lexFile = lexArgs.length == 1 ? lexArgs[0] : "parsers/chineseCharLex.ser.gz";
        System.err.println("Reading Lexicon from file " + lexFile);
        ObjectInputStream in = IOUtils.readStreamFromString(lexFile);
        try {
          lex = (Lexicon) in.readObject();
        } catch (ClassNotFoundException e) {
          throw new RuntimeException("Bad serialized file: " + lexFile);
        }
        in.close();
      }
    }

    if (argMap.containsKey("-test")) {
      boolean segmentWords = ctpp.segment;
      boolean parse = lp != null;
      assert (parse || segmentWords);
      //      WordCatConstituent.collinizeWords = argMap.containsKey("-collinizeWords");
      //      WordCatConstituent.collinizeTags = argMap.containsKey("-collinizeTags");
      WordSegmenter seg = null;
      if (segmentWords) {
        seg = (WordSegmenter) lex;
      }
      String[] testArgs = (argMap.get("-test"));
      MemoryTreebank testTreebank = op.tlpParams.memoryTreebank();
      FileFilter testFilt = new NumberRangesFileFilter(testArgs[1], false);
      testTreebank.loadPath(new File(testArgs[0]), testFilt);
      TreeTransformer subcategoryStripper = op.tlpParams.subcategoryStripper();
      TreeTransformer collinizer = ctpp.collinizer();

      WordCatEquivalenceClasser eqclass = new WordCatEquivalenceClasser();
      WordCatEqualityChecker eqcheck = new WordCatEqualityChecker();
      EquivalenceClassEval basicEval = new EquivalenceClassEval(eqclass, eqcheck, "basic");
      EquivalenceClassEval collinsEval = new EquivalenceClassEval(eqclass, eqcheck, "collinized");
      List<String> evalTypes = new ArrayList<String>(3);
      boolean goodPOS = false;
      if (segmentWords) {
        evalTypes.add(WordCatConstituent.wordType);
        if (ctpp.segmentMarkov && !parse) {
          evalTypes.add(WordCatConstituent.tagType);
          goodPOS = true;
        }
      }
      if (parse) {
        evalTypes.add(WordCatConstituent.tagType);
        evalTypes.add(WordCatConstituent.catType);
        if (combo) {
          evalTypes.add(WordCatConstituent.wordType);
          goodPOS = true;
        }
      }
      TreeToBracketProcessor proc = new TreeToBracketProcessor(evalTypes);

      System.err.println("Testing...");
      for (Tree goldTop : testTreebank) {
        Tree gold = goldTop.firstChild();
        List<HasWord> goldSentence = gold.yieldHasWord();
        if (goldSentence.size() > maxLength) {
          System.err.println("Skipping sentence; too long: " + goldSentence.size());
          continue;
        } else {
          System.err.println("Processing sentence; length: " + goldSentence.size());
        }
        List<HasWord> s;
        if (segmentWords) {
          StringBuilder goldCharBuf = new StringBuilder();
          for (Iterator<HasWord> wordIter = goldSentence.iterator(); wordIter.hasNext();) {
            StringLabel word = (StringLabel) wordIter.next();
            goldCharBuf.append(word.value());
          }
          String goldChars = goldCharBuf.toString();
          s = seg.segment(goldChars);
        } else {
          s = goldSentence;
        }
        Tree tree;
        if (parse) {
          tree = lp.parseTree(s);
          if (tree == null) {
            throw new RuntimeException("PARSER RETURNED NULL!!!");
          }
        } else {
          tree = Trees.toFlatTree(s);
          tree = subcategoryStripper.transformTree(tree);
        }

        if (pw != null) {
          if (parse) {
            tree.pennPrint(pw);
          } else {
            Iterator sentIter = s.iterator();
            for (; ;) {
              Word word = (Word) sentIter.next();
              pw.print(word.word());
              if (sentIter.hasNext()) {
                pw.print(" ");
              } else {
                break;
              }
            }
          }
          pw.println();
        }

        if (eval) {
          Collection ourBrackets, goldBrackets;
          ourBrackets = proc.allBrackets(tree);
          goldBrackets = proc.allBrackets(gold);
          if (goodPOS) {
            ourBrackets.addAll(proc.commonWordTagTypeBrackets(tree, gold));
            goldBrackets.addAll(proc.commonWordTagTypeBrackets(gold, tree));
          }

          basicEval.eval(ourBrackets, goldBrackets);
          System.out.println("\nScores:");
          basicEval.displayLast();

          Tree collinsTree = collinizer.transformTree(tree);
          Tree collinsGold = collinizer.transformTree(gold);
          ourBrackets = proc.allBrackets(collinsTree);
          goldBrackets = proc.allBrackets(collinsGold);
          if (goodPOS) {
            ourBrackets.addAll(proc.commonWordTagTypeBrackets(collinsTree, collinsGold));
            goldBrackets.addAll(proc.commonWordTagTypeBrackets(collinsGold, collinsTree));
View Full Code Here

    if (tree.isLeaf()) {
      return;
    }

    if (tree.isPreTerminal()) {
      Tree wordNode = tree.children()[0];
      String word = wordNode.label().value();
      SimpleMatrix wordVector = dvModel.getWordVector(word);
      wordVector = NeuralUtils.elementwiseApplyTanh(wordVector);
      nodeVectors.put(tree, wordVector);
      return;
    }
View Full Code Here

    List<Tree> hypotheses = topParses.get(tree);
    if (hypotheses == null || hypotheses.size() == 0) {
      throw new AssertionError("Failed to get any hypothesis trees for " + tree);
    }
    double bestScore = Double.NEGATIVE_INFINITY;
    Tree bestTree = null;
    IdentityHashMap<Tree, SimpleMatrix> bestVectors = null;
    for (Tree hypothesis : hypotheses) {
      IdentityHashMap<Tree, SimpleMatrix> nodeVectors = new IdentityHashMap<Tree, SimpleMatrix>();
      double scoreHyp = score(hypothesis, nodeVectors);
      double deltaMargin =0;
View Full Code Here

    Iterator<Tree> treeItr = trees.iterator();

    // Train factored lexicon on lemmas and morph tags
    int treeId = 0;
    while (treeItr.hasNext()) {
      Tree tree = treeItr.next();
      // CoreLabels, with morph analysis in the originalText annotation
      List<Label> yield = rawTrees == null ? tree.yield() : rawTreesItr.next().yield();
      // Annotated, binarized tree for the tags (labels are usually CategoryWordTag)
      List<Label> pretermYield = tree.preTerminalYield();

      int yieldLen = yield.size();
      for (int i = 0; i < yieldLen; ++i) {
        String word = yield.get(i).value();
        int wordId = wordIndex.addToIndex(word); // Don't do anything with words
View Full Code Here

  protected void doOneSentence(Annotation annotation, CoreMap sentence) {
    final List<CoreLabel> words = sentence.get(CoreAnnotations.TokensAnnotation.class);
    if (VERBOSE) {
      System.err.println("Parsing: " + words);
    }
    Tree tree = null;
    // generate the constituent tree
    if (maxSentenceLength <= 0 || words.size() <= maxSentenceLength) {
      try {
        final List<ParserConstraint> constraints = sentence.get(ParserAnnotations.ConstraintAnnotation.class);
        tree = doOneSentence(constraints, words);
View Full Code Here

  }

  @Override
  public void doOneFailedSentence(Annotation annotation, CoreMap sentence) {
    final List<CoreLabel> words = sentence.get(CoreAnnotations.TokensAnnotation.class);
    Tree tree = ParserUtils.xTree(words);
    for (CoreLabel word : words) {
      if (word.tag() == null) {
        word.setTag("XX");
      }
    }
View Full Code Here

    ParserAnnotatorUtils.fillInParseAnnotations(VERBOSE, BUILD_GRAPHS, gsf, sentence, tree);

    if (saveBinaryTrees) {
      TreeBinarizer binarizer = TreeBinarizer.simpleTreeBinarizer(parser.getTLPParams().headFinder(), parser.treebankLanguagePack());
      Tree binarized = binarizer.transformTree(tree);
      Trees.convertToCoreLabels(binarized);
      sentence.set(TreeCoreAnnotations.BinarizedTreeAnnotation.class, binarized);
    }
  }
View Full Code Here

TOP

Related Classes of edu.stanford.nlp.trees.Tree$TreeIterator

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.