package net.sf.nlpshell;
import java.util.ArrayList;
import java.util.List;
import net.sf.nlpshell.domain.PartOfSpeech;
import net.sf.nlpshell.domain.clause.Sentence;
import net.sf.nlpshell.domain.phrase.Phrase;
import net.sf.nlpshell.domain.word.POSWord;
import net.sf.nlpshell.domain.word.POSWord.PennTreeBankWordTagset;
import opennlp.tools.parser.Parse;
public class OpenNlpConverter {
public PartOfSpeech buildPOS(Parse parse) {
String type = parse.getType();
PartOfSpeech partOfSpeech = null;
PennTreeBankWordTagset posTag = POSWord.resolveTag(type);
if (posTag != null) {
partOfSpeech = POSWord.newWordInstance(posTag, parse.toString());
}
if (Sentence.PennTreeBankClauseTagset.literals().contains(type)) {
Sentence sentence = Sentence.newClauseInstance(type, parse
.toString());
List<PartOfSpeech> parts = new ArrayList<PartOfSpeech>();
for (Parse parseChild : parse.getChildren()) {
parts.add(buildPOS(parseChild));
}
sentence.children.addAll(parts);
partOfSpeech = sentence;
}
if (Phrase.PennTreeBankPhraseTagset.literals().contains(type)) {
Phrase phrase = Phrase.newPhraseInstance(type, parse.toString());
List<PartOfSpeech> parts = new ArrayList<PartOfSpeech>();
for (Parse parseChild : parse.getChildren()) {
parts.add(buildPOS(parseChild));
}
phrase.children.addAll(parts);
partOfSpeech = phrase;
}
if (partOfSpeech == null) {
throw new IllegalArgumentException("Not yet supported type : ["
+ type + "], text : [" + parse.toString() + "]");
}
return partOfSpeech;
}
}