Package edu.stanford.nlp.pipeline

Source Code of edu.stanford.nlp.pipeline.CharniakParserAnnotator

package edu.stanford.nlp.pipeline;

import java.util.List;
import java.util.Set;

import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.parser.common.ParserUtils;
import edu.stanford.nlp.parser.charniak.CharniakParser;
import edu.stanford.nlp.trees.EnglishGrammaticalStructureFactory;
import edu.stanford.nlp.trees.GrammaticalStructureFactory;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.util.CoreMap;

/**
* This class will add parse information to an Annotation from the BLLIP parser.
* It allows you to use the Charniak parser or Charniak and Johnson reranking parser
* along with any existing parser and reranking model.
*
* It assumes that the Annotation already contains the tokenized words
* as a {@code List<List<CoreLabel>>} under
* {@code CoreAnnotations.SentencesAnnotation.class}.
* If the words have POS tags, they will not be used.
*
* @author David McClosky
*/
public class CharniakParserAnnotator implements Annotator {

  // TODO: make this an option?
  private static final boolean BUILD_GRAPHS = true;

  private final GrammaticalStructureFactory gsf = new EnglishGrammaticalStructureFactory();

  private final boolean VERBOSE;
  private final CharniakParser parser;

  public CharniakParserAnnotator(String parserModel, String parserExecutable, boolean verbose, int maxSentenceLength) {
    VERBOSE = verbose;
    parser = new CharniakParser(parserExecutable, parserModel);
    parser.setMaxSentenceLength(maxSentenceLength);
  }

  public CharniakParserAnnotator() {
    VERBOSE = false;
    parser = new CharniakParser();
  }

  @Override
  public void annotate(Annotation annotation) {
    if (annotation.containsKey(CoreAnnotations.SentencesAnnotation.class)) {
      // parse a tree for each sentence
      for (CoreMap sentence: annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
        List<CoreLabel> words = sentence.get(CoreAnnotations.TokensAnnotation.class);
        if (VERBOSE) {
          System.err.println("Parsing: " + words);
        }
        int maxSentenceLength = parser.getMaxSentenceLength();
        // generate the constituent tree
        Tree tree; // initialized below
        if (maxSentenceLength <= 0 || words.size() < maxSentenceLength) {
          tree = parser.getBestParse(words);
        }
        else {
          tree = ParserUtils.xTree(words);
        }

        ParserAnnotatorUtils.fillInParseAnnotations(VERBOSE, BUILD_GRAPHS, gsf, sentence, tree);
      }
    } else {
      throw new RuntimeException("unable to find sentences in: " + annotation);
    }
  }

  @Override
  public Set<Requirement> requires() {
    return TOKENIZE_AND_SSPLIT;
  }

  @Override
  public Set<Requirement> requirementsSatisfied() {
    return PARSE_AND_TAG;
  }

}
TOP

Related Classes of edu.stanford.nlp.pipeline.CharniakParserAnnotator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.