Examples of edu.stanford.nlp.parser.lexparser.LexicalizedParser

Package edu.stanford.nlp.parser.lexparser

Examples of edu.stanford.nlp.parser.lexparser.LexicalizedParser

edu.stanford.nlp.parser.lexparser.LexicalizedParser
This class provides the top-level API and command-line interface to a set of reasonably good treebank-trained parsers. The name reflects the main factored parsing model, which provides a lexicalized PCFG parser implemented as a product model of a plain PCFG parser and a lexicalized dependency parser. But you can also run either component parser alone. In particular, it is often useful to do unlexicalized PCFG parsing by using just that component parser.
See the package documentation for more details and examples of use.
For information on invoking the parser from the command-line, and for a more detailed list of options, see the {@link #main} method.
Note that training on a 1 million word treebank requires a fair amount of memory to run. Try -mx1500m to increase the memory allocated by the JVM. @author Dan Klein (original version) @author Christopher Manning (better features, ParserParams, serialization) @author Roger Levy (internationalization) @author Teg Grenager (grammar compaction, tokenization, etc.) @author Galen Andrew (considerable refactoring) @author John Bauer (made threadsafe)

    CompositeTreeTransformer transformer = LexicalizedParser.buildTrainTransformer(op);
    return transformer;
  }


  public LexicalizedParser attachModelToLexicalizedParser() {
    LexicalizedParser newParser = LexicalizedParser.copyLexicalizedParser(parser);
    DVModelReranker reranker = new DVModelReranker(dvModel);
    newParser.reranker = reranker;
    return newParser;
  }

View Full Code Here

    return newParser;
  }


  public void saveModel(String filename) {
    System.err.println("Saving serialized model to " + filename);
    LexicalizedParser newParser = attachModelToLexicalizedParser();
    newParser.saveParserToSerialized(filename);
    System.err.println("... done");
  }

View Full Code Here

      throw new IllegalArgumentException("Need to either train a new model, run the gradient check or specify a model to load with -model");
    }


    String[] newArgs = unusedArgs.toArray(new String[unusedArgs.size()]);
    DVParser dvparser = null;
    LexicalizedParser lexparser = null;
    if (initialModelPath != null) {
      lexparser = LexicalizedParser.loadModel(initialModelPath, newArgs);
      DVModel model = getModelFromLexicalizedParser(lexparser);
      dvparser = new DVParser(model, lexparser);
    } else if (runTraining || runGradientCheck) {

View Full Code Here

   *  Usage: ParserDemo2 [grammar [textFile]]
   */
  public static void main(String[] args) throws IOException {
    String grammar = args.length > 0 ? args[0] : "edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz";
    String[] options = { "-maxLength", "80", "-retainTmpSubcategories" };
    LexicalizedParser lp = LexicalizedParser.loadModel(grammar, options);
    TreebankLanguagePack tlp = lp.getOp().langpack();
    GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();


    Iterable<List<? extends HasWord>> sentences;
    if (args.length > 1) {
      DocumentPreprocessor dp = new DocumentPreprocessor(args[1]);
      List<List<? extends HasWord>> tmp =
        new ArrayList<List<? extends HasWord>>();
      for (List<HasWord> sentence : dp) {
        tmp.add(sentence);
      }
      sentences = tmp;
    } else {
      // Showing tokenization and parsing in code a couple of different ways.
      String[] sent = { "This", "is", "an", "easy", "sentence", "." };
      List<HasWord> sentence = new ArrayList<HasWord>();
      for (String word : sent) {
        sentence.add(new Word(word));
      }


      String sent2 = ("This is a slightly longer and more complex " +
                      "sentence requiring tokenization.");
      // Use the default tokenizer for this TreebankLanguagePack
      Tokenizer<? extends HasWord> toke =
        tlp.getTokenizerFactory().getTokenizer(new StringReader(sent2));
      List<? extends HasWord> sentence2 = toke.tokenize();


      String[] sent3 = { "It", "can", "can", "it", "." };
      String[] tag3 = { "PRP", "MD", "VB", "PRP", "." }; // Parser gets second "can" wrong without help
      List<TaggedWord> sentence3 = new ArrayList<TaggedWord>();
      for (int i = 0; i < sent3.length; i++) {
        sentence3.add(new TaggedWord(sent3[i], tag3[i]));
      }
      Tree parse = lp.parse(sentence3);
      parse.pennPrint();


      List<List<? extends HasWord>> tmp =
        new ArrayList<List<? extends HasWord>>();
      tmp.add(sentence);
      tmp.add(sentence2);
      tmp.add(sentence3);
      sentences = tmp;
    }


    for (List<? extends HasWord> sentence : sentences) {
      Tree parse = lp.parse(sentence);
      parse.pennPrint();
      System.out.println();
      GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
      List<TypedDependency> tdl = gs.typedDependenciesCCprocessed();
      System.out.println(tdl);
      System.out.println();


      System.out.println("The words of the sentence:");
      for (Label lab : parse.yield()) {
        if (lab instanceof CoreLabel) {
          System.out.println(((CoreLabel) lab).toString(CoreLabel.OutputFormat.VALUE_MAP));
        } else {
          System.out.println(lab);
        }
      }
      System.out.println();
      System.out.println(parse.taggedYield());
      System.out.println();


    }


    // This method turns the String into a single sentence using the
    // default tokenizer for the TreebankLanguagePack.
    String sent3 = "This is one last test!";
    lp.parse(sent3).pennPrint();
  }

View Full Code Here

  public static void main(String[] args) {
    String parserModel = "edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz";
    if (args.length > 0) {
      parserModel = args[0];
    }
    LexicalizedParser lp = LexicalizedParser.loadModel(parserModel);


    if (args.length == 0) {
      demoAPI(lp);
    } else {
      String textFile = (args.length > 1) ? args[1] : args[0];

View Full Code Here


    System.err.println("Writing output to " + output);
    System.err.println("Loading parser model " + parserModel);
    System.err.println("Writing " + dvKBest + " hypothesis trees for each tree");


    LexicalizedParser parser = LexicalizedParser.loadModel(parserModel, "-dvKBest", Integer.toString(dvKBest));
    CacheParseHypotheses cacher = new CacheParseHypotheses(parser);
    TreeTransformer transformer = DVParser.buildTrainTransformer(parser.getOp());
    List<Tree> sentences = new ArrayList<Tree>();
    for (Pair<String, FileFilter> description : treebanks) {
      System.err.println("Reading trees from " + description.first);
      Treebank treebank = parser.getOp().tlpParams.memoryTreebank();
      treebank.loadPath(description.first, description.second);


      treebank = treebank.transform(transformer);
      sentences.addAll(treebank);
    }

View Full Code Here

        unusedArgs.add(args[argIndex++]);
      }
    }


    String[] newArgs = unusedArgs.toArray(new String[unusedArgs.size()]);
    LexicalizedParser underlyingParser = null;
    Options options = null;
    LexicalizedParser combinedParser = null;
    if (baseModelPaths != null) {
      List<DVModel> dvparsers = new ArrayList<DVModel>();
      for (String baseModelPath : baseModelPaths) {
        System.err.println("Loading serialized DVParser from " + baseModelPath);
        LexicalizedParser dvparser = LexicalizedParser.loadModel(baseModelPath);
        Reranker reranker = dvparser.reranker;
        if (!(reranker instanceof DVModelReranker)) {
          throw new IllegalArgumentException("Expected parsers with DVModel embedded");
        }
        dvparsers.add(((DVModelReranker) reranker).getModel());

View Full Code Here

        unusedArgs.add(args[argIndex++]);
      }
    }


    String[] newArgs = unusedArgs.toArray(new String[unusedArgs.size()]);
    LexicalizedParser parser = LexicalizedParser.loadModel(modelPath, newArgs);
    DVModel model = DVParser.getModelFromLexicalizedParser(parser);


    File outputFile = new File(outputPath);
    FileSystem.checkNotExistsOrFail(outputFile);
    FileSystem.mkdirOrFail(outputFile);


    int count = 0;
    if (inputPath != null) {
      Reader input = new BufferedReader(new FileReader(inputPath));
      DocumentPreprocessor processor = new DocumentPreprocessor(input);
      for (List<HasWord> sentence : processor) {
        count++; // index from 1
        ParserQuery pq = parser.parserQuery();
        if (!(pq instanceof RerankingParserQuery)) {
          throw new IllegalArgumentException("Expected a RerankingParserQuery");
        }
        RerankingParserQuery rpq = (RerankingParserQuery) pq;
        if (!rpq.parse(sentence)) {

View Full Code Here


    if (inputPath == null) {
      throw new IllegalArgumentException("Must specify input file with -input");
    }


    LexicalizedParser parser = LexicalizedParser.loadModel(parserModel);
    TreeBinarizer binarizer = TreeBinarizer.simpleTreeBinarizer(parser.getTLPParams().headFinder(), parser.treebankLanguagePack());


    if (sentimentModelPath != null) {
      sentimentModel = SentimentModel.loadSerialized(sentimentModelPath);
    }


    String text = IOUtils.slurpFileNoExceptions(inputPath);
    String[] chunks = text.split("\\n\\s*\\n+"); // need blank line to make a new chunk


    for (String chunk : chunks) {
      if (chunk.trim() == "") {
        continue;
      }
      // The expected format is that line 0 will be the text of the
      // sentence, and each subsequence line, if any, will be a value
      // followed by the sequence of tokens that get that value.


      // Here we take the first line and tokenize it as one sentence.
      String[] lines = chunk.trim().split("\\n");
      String sentence = lines[0];
      StringReader sin = new StringReader(sentence);
      DocumentPreprocessor document = new DocumentPreprocessor(sin);
      document.setSentenceFinalPuncWords(new String[] {"\n"});
      List<HasWord> tokens = document.iterator().next();
      Integer mainLabel = new Integer(tokens.get(0).word());
      //System.out.print("Main Sentence Label: " + mainLabel.toString() + "; ");
      tokens = tokens.subList(1, tokens.size());
      //System.err.println(tokens);


      Map<Pair<Integer, Integer>, String> spanToLabels = Generics.newHashMap();
      for (int i = 1; i < lines.length; ++i) {
        extractLabels(spanToLabels, tokens, lines[i]);
      }


      // TODO: add an option which treats the spans as constraints when parsing


      Tree tree = parser.apply(tokens);
      Tree binarized = binarizer.transformTree(tree);
      Tree collapsedUnary = transformer.transformTree(binarized);


      // if there is a sentiment model for use in prelabeling, we
      // label here and then use the user given labels to adjust

View Full Code Here

    "edu/stanford/nlp/models/lexparser/englishFactored.ser.gz",
  };




  public void testEnglishTagSet() {
    LexicalizedParser lp = LexicalizedParser.loadModel(englishParsers[0]);
    Set<String> tagSet = lp.getLexicon().tagSet(lp.treebankLanguagePack().getBasicCategoryFunction());
    for (String name : englishTaggers) {
      MaxentTagger tagger = new MaxentTagger(name);
      assertEquals("English PCFG parser/" + name + " tag set mismatch", tagSet, tagger.tagSet());
    }
    for (String name : englishParsers) {
      LexicalizedParser lp2 = LexicalizedParser.loadModel(name);
      assertEquals("English PCFG parser/" + name + " tag set mismatch",
                   tagSet, lp2.getLexicon().tagSet(lp.treebankLanguagePack().getBasicCategoryFunction()));
    }
  }

View Full Code Here

0 1

TOP

Related Classes of edu.stanford.nlp.parser.lexparser.LexicalizedParser

edu.stanford.nlp.ling.Word

edu.stanford.nlp.parser.common.ParserQuery

edu.stanford.nlp.parser.DependencyIndexITest

edu.stanford.nlp.parser.dvparser.AverageDVModels

edu.stanford.nlp.parser.dvparser.CacheParseHypotheses

edu.stanford.nlp.parser.dvparser.CombineDVModels

edu.stanford.nlp.parser.dvparser.CrossValidateTestOptions

edu.stanford.nlp.parser.dvparser.DumpMatrices

edu.stanford.nlp.parser.dvparser.DVParser

edu.stanford.nlp.parser.dvparser.FindNearestNeighbors

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.