Package edu.stanford.nlp.parser.lexparser

Examples of edu.stanford.nlp.parser.lexparser.TreeBinarizer


    System.err.println("Converted trees to binarized format");
    return binarized;
  }

  public static List<Tree> binarizeTreebank(Treebank treebank, Options op) {
    TreeBinarizer binarizer = TreeBinarizer.simpleTreeBinarizer(op.tlpParams.headFinder(), op.tlpParams.treebankLanguagePack());
    BasicCategoryTreeTransformer basicTransformer = new BasicCategoryTreeTransformer(op.langpack());
    CompositeTreeTransformer transformer = new CompositeTreeTransformer();
    transformer.addTransformer(binarizer);
    transformer.addTransformer(basicTransformer);
View Full Code Here


    }

    ParserAnnotatorUtils.fillInParseAnnotations(VERBOSE, BUILD_GRAPHS, gsf, sentence, tree);

    if (saveBinaryTrees) {
      TreeBinarizer binarizer = TreeBinarizer.simpleTreeBinarizer(parser.getTLPParams().headFinder(), parser.treebankLanguagePack());
      Tree binarized = binarizer.transformTree(tree);
      Trees.convertToCoreLabels(binarized);
      sentence.set(TreeCoreAnnotations.BinarizedTreeAnnotation.class, binarized);
    }
  }
View Full Code Here

    if (inputPath == null) {
      throw new IllegalArgumentException("Must specify input file with -input");
    }

    LexicalizedParser parser = LexicalizedParser.loadModel(parserModel);
    TreeBinarizer binarizer = TreeBinarizer.simpleTreeBinarizer(parser.getTLPParams().headFinder(), parser.treebankLanguagePack());

    if (sentimentModelPath != null) {
      sentimentModel = SentimentModel.loadSerialized(sentimentModelPath);
    }

    String text = IOUtils.slurpFileNoExceptions(inputPath);
    String[] chunks = text.split("\\n\\s*\\n+"); // need blank line to make a new chunk

    for (String chunk : chunks) {
      if (chunk.trim() == "") {
        continue;
      }
      // The expected format is that line 0 will be the text of the
      // sentence, and each subsequence line, if any, will be a value
      // followed by the sequence of tokens that get that value.

      // Here we take the first line and tokenize it as one sentence.
      String[] lines = chunk.trim().split("\\n");
      String sentence = lines[0];
      StringReader sin = new StringReader(sentence);
      DocumentPreprocessor document = new DocumentPreprocessor(sin);
      document.setSentenceFinalPuncWords(new String[] {"\n"});
      List<HasWord> tokens = document.iterator().next();
      Integer mainLabel = new Integer(tokens.get(0).word());
      //System.out.print("Main Sentence Label: " + mainLabel.toString() + "; ");
      tokens = tokens.subList(1, tokens.size());
      //System.err.println(tokens);

      Map<Pair<Integer, Integer>, String> spanToLabels = Generics.newHashMap();
      for (int i = 1; i < lines.length; ++i) {
        extractLabels(spanToLabels, tokens, lines[i]);
      }

      // TODO: add an option which treats the spans as constraints when parsing

      Tree tree = parser.apply(tokens);
      Tree binarized = binarizer.transformTree(tree);
      Tree collapsedUnary = transformer.transformTree(binarized);

      // if there is a sentiment model for use in prelabeling, we
      // label here and then use the user given labels to adjust
      if (sentimentModel != null) {
View Full Code Here

TOP

Related Classes of edu.stanford.nlp.parser.lexparser.TreeBinarizer

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.