package kpi.asoiu.parsers;
import kpi.asoiu.factory.ModelFactory;
import kpi.asoiu.model.*;
import opennlp.tools.cmdline.parser.ParserTool;
import opennlp.tools.parser.Parse;
import opennlp.tools.parser.Parser;
import opennlp.tools.parser.ParserFactory;
import opennlp.tools.parser.ParserModel;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
/**
* Created by IntelliJ IDEA.
* User: Dara
* Date: 25.09.11
* Time: 16:18
*/
public class ParseSentence {
public ParseSentence() throws IOException {
InputStream modelIO = new FileInputStream("en-parser-chunking.bin");
ParserModel parserModel = new ParserModel(modelIO);
parser = ParserFactory.create(parserModel);
}
Parser parser;
// List<Sentence> result = new ArrayList<Sentence>();
Sentence result;
public Sentence parse(String inputLine) {
result = new Sentence();
result.setText(inputLine);
inputLine = inputLine.replace(".", " ");
Parse[] parses = ParserTool.parseLine(inputLine, parser, 1);
collect(parses, "");
System.out.println("result = " + result.getText());
System.out.println("result.getWords() = " + result.getWords());
System.out.println();
System.out.println();
result.postProcess();
return result;
}
private void collect(Parse[] p, String tab) {
for (Parse parse : p) {
PartsOfSentence part = useRules(parse);
System.out.println(tab + parse.getType() + "_" + parse.getLabel() + "-" + parse.getHead() + " = " + part + " " + parse);
if (ParsePhrase.isParseIsPhrase(parse) && part != null) {
Phrase phrase = ModelFactory.getInstance().getParsePhrase().parse(parse, part);
for (Word word : phrase.getWords()) {
result.addWord(word);
}
continue;
}
if (part != null) {
if (part.equals(PartsOfSentence.SUBJECT)) {
result.addWord(parse.getHead().toString(), part, parse.getChildren()[0].getType());
} else {
result.addWord(parse.toString(), part, parse.getType());
}
}
if (parse.getChildCount() != 0) {
collect(parse.getChildren(), tab + " \t");
}
}
}
private PartsOfSentence useRules(Parse parse) {
for (Rule rule : Rule.getRules()) {
if (rule.getPart().contains(parse.getType()) &&
rule.getLabel().equals(parse.getLabel())) {
return rule.getResult();
}
}
return null;
}
}