* double advancePercentage =
* opennlp.tools.parser.chunking.Parser.defaultAdvancePercentage;
* opennlp.tools.parser.Parser parser = TreebankParser.getParser(
* "models/parser", useTagDict, useCaseInsensitiveTagDict, beamSize,
* advancePercentage);
*/Parser parser = ParserFactory.create(new ParserModel(
new FileInputStream("models/en-parser-chunking.bin")),
AbstractBottomUpParser.defaultBeamSize,
AbstractBottomUpParser.defaultAdvancePercentage);
// break a paragraph into sentences
String[] sents = sdetector.sentDetect(paragraph.toString());
// TODO handle paragraph (multiple sentences)
String sent = sents[0];
// tokenize brackets and parentheses by putting a space on either side.
// this makes sure it doesn't get confused with output from the parser
sent = untokenizedParenPattern1.matcher(sent).replaceAll("$1 $2");
sent = untokenizedParenPattern2.matcher(sent).replaceAll("$1 $2");
// get the tokenizer to break apart the sentence
String[] tokens = tokenizer.tokenize(sent);
// build a string to parse as well as a list of tokens
StringBuffer sb = new StringBuffer();
List<String> tokenList = new ArrayList<String>();
for (int j = 0; j < tokens.length; j++) {
String tok = convertToken(tokens[j]);
tokenList.add(tok);
sb.append(tok).append(" ");
}
String text = sb.substring(0, sb.length() - 1).toString();
// the parent parse instance spans the entire sentence
Parse p = new Parse(text, new Span(0, text.length()), "INC", 1.0, null);
// create a parse object for each token and add it to the parent
int start = 0;
for (final String tok : tokenList) {
p.insert(new Parse(text, new Span(start, start + tok.length()),
opennlp.tools.parser.treeinsert.Parser.TOK_NODE, 1.0, 0));
start += tok.length() + 1;
}
// fetch multiple possible parse trees
Parse[] parses = parser.parse(p, numParses);
Parse chosen_parse = parses[0 /* TODO handle other parse trees */];
for (Parse parse : parses) {
System.out.print("Prob[" + parse.getProb() + "] : ");
parse.show();
System.out.println(chosen_parse);