package edu.stanford.nlp.parser.lexparser;
import junit.framework.TestCase;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.trees.LabeledScoredTreeReaderFactory;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeReader;
import edu.stanford.nlp.trees.TreeReaderFactory;
/**
* Tests that the parser doesn't crash and comes up with the same
* result when run in a variety of multithreaded situations
*/
public class ThreadedParserSlowITest extends TestCase {
public static List<Tree> readTrees(String filename, String encoding) {
ArrayList<Tree> trees = new ArrayList<Tree>();
try {
TreeReaderFactory trf = new LabeledScoredTreeReaderFactory();
TreeReader tr = trf.newTreeReader(new InputStreamReader(
new FileInputStream(filename), encoding));
Tree next;
while ((next = tr.readTree()) != null) {
trees.add(next);
}
System.out.println("Read " + trees.size() + " trees from " + filename);
return trees;
} catch (IOException e) {
throw new RuntimeException(e);
}
}
public static List<Tree> processFile(LexicalizedParser parser,
List<Tree> input) {
List<Tree> results = new ArrayList<Tree>();
for (Tree tree : input) {
List<HasWord> sentence = tree.yieldHasWord();
Tree output = parser.parseTree(sentence);
results.add(output);
if (results.size() % 10 == 0 || results.size() == input.size()) {
System.out.println("Processed " + results.size() + " trees");
}
}
return results;
}
public static class ParserThread extends Thread {
private final LexicalizedParser parser;
private final List<Tree> input;
private List<Tree> results;
private List<Tree> expectedResults;
public ParserThread(String parserFilename, List<Tree> input,
List<Tree> expectedResults) {
parser = LexicalizedParser.loadModel(parserFilename);
this.input = input;
this.expectedResults = expectedResults;
}
public ParserThread(LexicalizedParser parser, List<Tree> input,
List<Tree> expectedResults) {
this.parser = parser;
this.input = input;
this.expectedResults = expectedResults;
}
public void compareResults() {
assertEquals(expectedResults.size(), results.size());
for (int i = 0; i < expectedResults.size(); ++i) {
assertEquals(expectedResults.get(i), results.get(i));
}
}
@Override
public void run() {
results = processFile(parser, input);
}
}
public static final String englishTrees = "/u/nlp/data/lexparser/testtrees/engwsj160.mrg";
public static final String englishEncoding = "utf-8";
public static final String englishPCFG = "/u/nlp/data/lexparser/englishPCFG.ser.gz";
public static final String englishFactored = "/u/nlp/data/lexparser/englishFactored.ser.gz";
public static final String germanTrees = "/u/nlp/data/lexparser/testtrees/german133.mrg";
public static final String germanEncoding = "ISO-8859-1";
public static final String germanPCFG = "/u/nlp/data/lexparser/germanPCFG.ser.gz";
public static final String germanFactored = "/u/nlp/data/lexparser/germanFactored.ser.gz";
public static final String frenchTrees = "/u/nlp/data/lexparser/testtrees/french99.mrg";
public static final String frenchEncoding = "UTF-8";
public static final String frenchFactored = "/u/nlp/data/lexparser/frenchFactored.ser.gz";
public static final String arabicTrees = "/u/nlp/data/lexparser/testtrees/arabic99.mrg";
public static final String arabicEncoding = "UTF-8";
public static final String arabicFactored = "/u/nlp/data/lexparser/arabicFactored.ser.gz";
public static final String chineseTrees = "/u/nlp/data/lexparser/testtrees/chinese100.mrg";
public static final String chineseEncoding = "utf-8";
public static final String chinesePCFG = "/u/nlp/data/lexparser/chinesePCFG.ser.gz";
public static final String chineseFactored = "/u/nlp/data/lexparser/chineseFactored.ser.gz";
public static Map<String, List<Tree>> expectedResults;
public static Map<String, List<Tree>> inputTrees;
public static void setupExpectedResults() {
expectedResults = new HashMap<String, List<Tree>>();
inputTrees = new HashMap<String, List<Tree>>();
List<Tree> input = readTrees(englishTrees, englishEncoding);
inputTrees.put(englishPCFG, input);
inputTrees.put(englishFactored, input);
input = readTrees(germanTrees, germanEncoding);
inputTrees.put(germanPCFG, input);
inputTrees.put(germanFactored, input);
input = readTrees(frenchTrees, frenchEncoding);
inputTrees.put(frenchFactored, input);
input = readTrees(arabicTrees, arabicEncoding);
inputTrees.put(arabicFactored, input);
input = readTrees(chineseTrees, chineseEncoding);
inputTrees.put(chinesePCFG, input);
inputTrees.put(chineseFactored, input);
LexicalizedParser parser = LexicalizedParser.loadModel(englishPCFG);
List<Tree> results = processFile(parser, inputTrees.get(englishPCFG));
expectedResults.put(englishPCFG, results);
parser = LexicalizedParser.loadModel(englishFactored);
results = processFile(parser, inputTrees.get(englishFactored));
expectedResults.put(englishFactored, results);
parser = LexicalizedParser.loadModel(germanPCFG);
results = processFile(parser, inputTrees.get(germanPCFG));
expectedResults.put(germanPCFG, results);
parser = LexicalizedParser.loadModel(germanFactored);
results = processFile(parser, inputTrees.get(germanFactored));
expectedResults.put(germanFactored, results);
// TODO: Problem: too slow
parser = LexicalizedParser.loadModel(frenchFactored);
// results = processFile(parser, inputTrees.get(frenchFactored));
// expectedResults.put(frenchFactored, results);
parser = LexicalizedParser.loadModel(arabicFactored);
//results = processFile(parser, inputTrees.get(arabicFactored));
//expectedResults.put(arabicFactored, results);
parser = LexicalizedParser.loadModel(chinesePCFG);
results = processFile(parser, inputTrees.get(chinesePCFG));
expectedResults.put(chinesePCFG, results);
parser = LexicalizedParser.loadModel(chineseFactored);
// results = processFile(parser, inputTrees.get(chineseFactored));
// expectedResults.put(chineseFactored, results);
}
@Override
public void setUp() {
synchronized(ThreadedParserSlowITest.class) {
if (expectedResults == null) {
setupExpectedResults();
}
}
}
public static void runFourTests(String pcfg, String factored)
throws Exception
{
List<Tree> pcfgInput = inputTrees.get(pcfg);
List<Tree> factoredInput = inputTrees.get(factored);
List<Tree> pcfgResults = expectedResults.get(pcfg);
List<Tree> factoredResults = expectedResults.get(factored);
// Test two of the same PCFG
LexicalizedParser parser = LexicalizedParser.loadModel(pcfg);
runTest(new ParserThread(parser, pcfgInput, pcfgResults),
new ParserThread(parser, pcfgInput, pcfgResults));
// test two of the same factored
parser = LexicalizedParser.loadModel(factored);
runTest(new ParserThread(parser, factoredInput, factoredResults),
new ParserThread(parser, factoredInput, factoredResults));
// test two different instantiations of the same pcfg
runTest(new ParserThread(pcfg, pcfgInput, pcfgResults),
new ParserThread(pcfg, pcfgInput, pcfgResults));
// test one of each
runTest(new ParserThread(pcfg, pcfgInput, pcfgResults),
new ParserThread(factored, factoredInput, factoredResults));
}
public static void runTwoTests(String parserPath)
throws Exception
{
List<Tree> input = inputTrees.get(parserPath);
List<Tree> results = expectedResults.get(parserPath);
// Test two of the same
LexicalizedParser parser = LexicalizedParser.loadModel(parserPath);
runTest(new ParserThread(parser, input, results),
new ParserThread(parser, input, results));
// test two different instantiations of the same model
runTest(new ParserThread(parserPath, input, results),
new ParserThread(parserPath, input, results));
}
public void testEnglish()
throws Exception
{
runFourTests(englishPCFG, englishFactored);
}
public void testGerman()
throws Exception
{
runFourTests(germanPCFG, germanFactored);
}
public void testChinese()
throws Exception
{
runTwoTests(chinesePCFG);
}
// TODO: problem: very slow
// public void testFrench()
// throws Exception
// {
// runTwoTests(frenchFactored);
// }
// TODO: problem: very slow
// public void testArabic()
// throws Exception
// {
// runTwoTests(arabicFactored);
// }
public static void runTest(ParserThread ... threads) throws Exception {
for (ParserThread thread : threads) {
thread.start();
}
for (ParserThread thread : threads) {
thread.join();
thread.compareResults();
}
}
}