Package edu.stanford.nlp.parser.lexparser

Source Code of edu.stanford.nlp.parser.lexparser.ThreadedParserSlowITest$ParserThread

package edu.stanford.nlp.parser.lexparser;

import junit.framework.TestCase;

import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.trees.LabeledScoredTreeReaderFactory;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeReader;
import edu.stanford.nlp.trees.TreeReaderFactory;

/**
* Tests that the parser doesn't crash and comes up with the same
* result when run in a variety of multithreaded situations
*/
public class ThreadedParserSlowITest extends TestCase {
  public static List<Tree> readTrees(String filename, String encoding) {
    ArrayList<Tree> trees = new ArrayList<Tree>();
    try {
      TreeReaderFactory trf = new LabeledScoredTreeReaderFactory();
      TreeReader tr = trf.newTreeReader(new InputStreamReader(
                        new FileInputStream(filename), encoding));
      Tree next;
      while ((next = tr.readTree()) != null) {
        trees.add(next);
      }
      System.out.println("Read " + trees.size() + " trees from " + filename);
      return trees;
    } catch (IOException e) {
      throw new RuntimeException(e);
    }
  }

  public static List<Tree> processFile(LexicalizedParser parser,
                                       List<Tree> input) {
    List<Tree> results = new ArrayList<Tree>();
    for (Tree tree : input) {
      List<HasWord> sentence = tree.yieldHasWord();
      Tree output = parser.parseTree(sentence);
      results.add(output);
      if (results.size() % 10 == 0 || results.size() == input.size()) {
        System.out.println("Processed " + results.size() + " trees");
      }
    }
    return results;
  }

  public static class ParserThread extends Thread {
    private final LexicalizedParser parser;
    private final List<Tree> input;
    private List<Tree> results;
    private List<Tree> expectedResults;

    public ParserThread(String parserFilename, List<Tree> input,
                        List<Tree> expectedResults) {
      parser = LexicalizedParser.loadModel(parserFilename);
      this.input = input;
      this.expectedResults = expectedResults;
    }

    public ParserThread(LexicalizedParser parser, List<Tree> input,
                        List<Tree> expectedResults) {
      this.parser = parser;
      this.input = input;
      this.expectedResults = expectedResults;
    }

    public void compareResults() {
      assertEquals(expectedResults.size(), results.size());
      for (int i = 0; i < expectedResults.size(); ++i) {
        assertEquals(expectedResults.get(i), results.get(i));
      }
    }

    @Override
    public void run() {
      results = processFile(parser, input);
    }
  }

  public static final String englishTrees = "/u/nlp/data/lexparser/testtrees/engwsj160.mrg";
  public static final String englishEncoding = "utf-8";
  public static final String englishPCFG = "/u/nlp/data/lexparser/englishPCFG.ser.gz";
  public static final String englishFactored = "/u/nlp/data/lexparser/englishFactored.ser.gz";

  public static final String germanTrees = "/u/nlp/data/lexparser/testtrees/german133.mrg";
  public static final String germanEncoding = "ISO-8859-1";
  public static final String germanPCFG = "/u/nlp/data/lexparser/germanPCFG.ser.gz";
  public static final String germanFactored = "/u/nlp/data/lexparser/germanFactored.ser.gz";

  public static final String frenchTrees = "/u/nlp/data/lexparser/testtrees/french99.mrg";
  public static final String frenchEncoding = "UTF-8";
  public static final String frenchFactored = "/u/nlp/data/lexparser/frenchFactored.ser.gz";

  public static final String arabicTrees = "/u/nlp/data/lexparser/testtrees/arabic99.mrg";
  public static final String arabicEncoding = "UTF-8";
  public static final String arabicFactored = "/u/nlp/data/lexparser/arabicFactored.ser.gz";

  public static final String chineseTrees = "/u/nlp/data/lexparser/testtrees/chinese100.mrg";
  public static final String chineseEncoding = "utf-8";
  public static final String chinesePCFG = "/u/nlp/data/lexparser/chinesePCFG.ser.gz";
  public static final String chineseFactored = "/u/nlp/data/lexparser/chineseFactored.ser.gz";

  public static Map<String, List<Tree>> expectedResults;
  public static Map<String, List<Tree>> inputTrees;

  public static void setupExpectedResults() {
    expectedResults = new HashMap<String, List<Tree>>();
    inputTrees = new HashMap<String, List<Tree>>();

    List<Tree> input = readTrees(englishTrees, englishEncoding);
    inputTrees.put(englishPCFG, input);
    inputTrees.put(englishFactored, input);
    input = readTrees(germanTrees, germanEncoding);
    inputTrees.put(germanPCFG, input);
    inputTrees.put(germanFactored, input);
    input = readTrees(frenchTrees, frenchEncoding);
    inputTrees.put(frenchFactored, input);
    input = readTrees(arabicTrees, arabicEncoding);
    inputTrees.put(arabicFactored, input);
    input = readTrees(chineseTrees, chineseEncoding);
    inputTrees.put(chinesePCFG, input);
    inputTrees.put(chineseFactored, input);

    LexicalizedParser parser = LexicalizedParser.loadModel(englishPCFG);
    List<Tree> results = processFile(parser, inputTrees.get(englishPCFG));
    expectedResults.put(englishPCFG, results);

    parser = LexicalizedParser.loadModel(englishFactored);
    results = processFile(parser, inputTrees.get(englishFactored));
    expectedResults.put(englishFactored, results);

    parser = LexicalizedParser.loadModel(germanPCFG);
    results = processFile(parser, inputTrees.get(germanPCFG));
    expectedResults.put(germanPCFG, results);

    parser = LexicalizedParser.loadModel(germanFactored);
    results = processFile(parser, inputTrees.get(germanFactored));
    expectedResults.put(germanFactored, results);

    // TODO: Problem: too slow
    parser = LexicalizedParser.loadModel(frenchFactored);
    // results = processFile(parser, inputTrees.get(frenchFactored));
    // expectedResults.put(frenchFactored, results);

    parser = LexicalizedParser.loadModel(arabicFactored);
    //results = processFile(parser, inputTrees.get(arabicFactored));
    //expectedResults.put(arabicFactored, results);

    parser = LexicalizedParser.loadModel(chinesePCFG);
    results = processFile(parser, inputTrees.get(chinesePCFG));
    expectedResults.put(chinesePCFG, results);

    parser = LexicalizedParser.loadModel(chineseFactored);
    // results = processFile(parser, inputTrees.get(chineseFactored));
    // expectedResults.put(chineseFactored, results);
  }

  @Override
  public void setUp() {
    synchronized(ThreadedParserSlowITest.class) {
      if (expectedResults == null) {
        setupExpectedResults();
      }
    }
  }

  public static void runFourTests(String pcfg, String factored)
    throws Exception
  {
    List<Tree> pcfgInput = inputTrees.get(pcfg);
    List<Tree> factoredInput = inputTrees.get(factored);

    List<Tree> pcfgResults = expectedResults.get(pcfg);
    List<Tree> factoredResults = expectedResults.get(factored);

    // Test two of the same PCFG
    LexicalizedParser parser = LexicalizedParser.loadModel(pcfg);
    runTest(new ParserThread(parser, pcfgInput, pcfgResults),
            new ParserThread(parser, pcfgInput, pcfgResults));

    // test two of the same factored
    parser = LexicalizedParser.loadModel(factored);
    runTest(new ParserThread(parser, factoredInput, factoredResults),
            new ParserThread(parser, factoredInput, factoredResults));

    // test two different instantiations of the same pcfg
    runTest(new ParserThread(pcfg, pcfgInput, pcfgResults),
            new ParserThread(pcfg, pcfgInput, pcfgResults));

    // test one of each
    runTest(new ParserThread(pcfg, pcfgInput, pcfgResults),
            new ParserThread(factored, factoredInput, factoredResults));
  }

  public static void runTwoTests(String parserPath)
    throws Exception
  {
    List<Tree> input = inputTrees.get(parserPath);
    List<Tree> results = expectedResults.get(parserPath);

    // Test two of the same
    LexicalizedParser parser = LexicalizedParser.loadModel(parserPath);
    runTest(new ParserThread(parser, input, results),
            new ParserThread(parser, input, results));

    // test two different instantiations of the same model
    runTest(new ParserThread(parserPath, input, results),
            new ParserThread(parserPath, input, results));
  }

  public void testEnglish()
    throws Exception
  {
    runFourTests(englishPCFG, englishFactored);
  }

  public void testGerman()
    throws Exception
  {
    runFourTests(germanPCFG, germanFactored);
  }

  public void testChinese()
    throws Exception
  {
    runTwoTests(chinesePCFG);
  }

  // TODO: problem: very slow
  // public void testFrench()
  //   throws Exception
  // {
  //   runTwoTests(frenchFactored);
  // }

  // TODO: problem: very slow
  // public void testArabic()
  //   throws Exception
  // {
  //   runTwoTests(arabicFactored);
  // }

  public static void runTest(ParserThread ... threads) throws Exception {
    for (ParserThread thread : threads) {
      thread.start();
    }
    for (ParserThread thread : threads) {
      thread.join();
      thread.compareResults();
    }
  }



}
TOP

Related Classes of edu.stanford.nlp.parser.lexparser.ThreadedParserSlowITest$ParserThread

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.