package kpi.asoiu.parsers;
import kpi.asoiu.factory.DAOFactory;
import kpi.asoiu.model.PartsOfSentence;
import kpi.asoiu.model.Phrase;
import kpi.asoiu.model.Word;
import kpi.asoiu.model.test.PhraseLevelEntity;
import opennlp.tools.parser.Parse;
import java.util.*;
/**
* Created by IntelliJ IDEA.
* User: Dara
* Date: 01.04.12
* Time: 17:39
* To change this template use File | Settings | File Templates.
*/
public class ParsePhrase {
private static List<PhraseLevelEntity> phrases;
List<String> objectTypes = Arrays.asList("IN", "NP", "NN", "NS", "NNS");
List<String> predicateTypes = Arrays.asList("VBZ", "VBP", "VBN", "VP");
List<String> subjectTypes = Arrays.asList("NNP", "NN", "NP", "NS", "NNS", "PRP");
Map<PartsOfSentence, List<String>> types = new HashMap<PartsOfSentence, List<String>>();
public ParsePhrase() {
phrases = DAOFactory.getDao().getPhraseLevelEntities();
types.put(PartsOfSentence.OBJECT, objectTypes);
types.put(PartsOfSentence.SUBJECT, subjectTypes);
types.put(PartsOfSentence.PREDICATE, predicateTypes);
}
public Phrase parse(Parse parse, PartsOfSentence partsOfSentence) {
Phrase phrase = new Phrase();
collect(parse, phrase, partsOfSentence);
// System.out.println("!!! phrase = " + phrase);
return phrase;
}
private void postProcess(Phrase phrase) {
List<Word> newWords = new ArrayList<Word>();
if (phrase.getChildren().size() == 1) {
newWords = phrase.getChildren().get(0).getWords();
} else {
for (int i = 0; i < phrase.getChildren().size(); i++) {
for (int j = i + 1; j < phrase.getChildren().size(); j++) {
newWords = mergeWords(phrase.getChildren().get(i).getWords(), phrase.getChildren().get(j).getWords());
}
}
}
phrase.setWords(mergeWords(phrase.getWords(), newWords));
}
private List<Word> mergeWords(List<Word> words1, List<Word> words2) {
List<Word> newWords = new ArrayList<Word>();
if (words1.isEmpty()) {
newWords.addAll(words2);
} else if (words2.isEmpty()) {
newWords.addAll(words1);
} else {
for (Word word : words1) {
for (Word word2 : words2) {
Word w = new Word();
w.setWord(word.getWord() + " " + word2.getWord());
w.setPartOfSentence(word.getPartOfSentence());
w.setPartOfLanguage(word.getPartOfLanguage());
newWords.add(w);
}
}
}
return newWords;
}
private Phrase collect(Parse p, Phrase phrase, PartsOfSentence partsOfSentence) {
for (Parse parse : p.getChildren()) {
System.out.println(parse.getType() + "_" + parse.getLabel() + "-" + parse.getHead() + " = " + partsOfSentence + " " + parse);
if (isParseIsPhrase(parse)) {
phrase.getChildren().add(collect(parse, new Phrase(), partsOfSentence));
} else {
if (types.get(partsOfSentence).contains(parse.getType())) {
Word w = new Word();
w.setWord(parse.toString());
w.setPartOfLanguage(parse.getType());
w.setPartOfSentence(partsOfSentence);
phrase.getWords().add(w);
}
}
}
postProcess(phrase);
return phrase;
}
public static boolean isParseIsPhrase(Parse p) {
if (p.getType() != null) {
for (PhraseLevelEntity phraseLevelEntity : phrases) {
if (phraseLevelEntity.getName().equals(p.getType())) {
return true;
}
}
}
return false;
}
}