package kpi.asoiu.model;
import kpi.asoiu.dao.SimpleDAO;
import kpi.asoiu.factory.DAOFactory;
import kpi.asoiu.model.test.ArticleEntity;
import kpi.asoiu.model.test.SubjectsEntity;
import kpi.asoiu.model.test.TripletEntity;
import java.util.*;
/**
* Created by IntelliJ IDEA.
* User: Dara
* Date: 20.11.11
* Time: 16:24
*/
public class Text {
Map<Triplet, Map> tree = new LinkedHashMap<Triplet, Map>();
private List<Sentence> sentences = new ArrayList<Sentence>();
// private Map<Triplet, List<Triplet>> tree = new HashMap<Triplet, List<Triplet>>();
public void postProcess() {
replaceSubjects();
buildTree();
// saveArticle();
for (Sentence sentence : sentences) {
System.out.println(sentence);
}
printTree();
System.out.println("\n Frequency : " + calculateSubjectFrequency());
}
private void saveArticle() {
ArticleEntity article = new ArticleEntity();
StringBuffer text = new StringBuffer();
for (Sentence sentence : sentences) {
text.append(sentence.getText());
}
article.setText(text.toString());
SimpleDAO simpleDAO = DAOFactory.getDao();
article.setId(simpleDAO.addObjects(article));
Map<String, SubjectsEntity> subjectsEntityMap = new HashMap<String, SubjectsEntity>();
Map<String, Double> subjectsMap = calculateSubjectFrequency();
for (String word : subjectsMap.keySet()) {
SubjectsEntity subjectsEntity = new SubjectsEntity();
subjectsEntity.setFreq(subjectsMap.get(word).floatValue());
subjectsEntity.setWord(word);
subjectsEntity.setArticle(article);
subjectsEntityMap.put(word, subjectsEntity);
subjectsEntity.setId(simpleDAO.addObjects(subjectsEntity));
}
article.setSubjects(new ArrayList<SubjectsEntity>(subjectsEntityMap.values()));
List<TripletEntity> tripletEntities = new ArrayList<TripletEntity>();
for (Triplet triplet : tree.keySet()) {
TripletEntity tripletEntity = createEntityFromTriplet(triplet, subjectsEntityMap);
tripletEntities.add(tripletEntity);
tripletEntities.addAll(treeToList(tripletEntity, tree.get(triplet), subjectsEntityMap));
}
for (TripletEntity tripletEntity : tripletEntities) {
tripletEntity.setId(simpleDAO.addObjects(tripletEntity));
}
article.setTriplets(tripletEntities);
simpleDAO.updateObject(article);
}
private TripletEntity createEntityFromTriplet(Triplet triplet, Map<String, SubjectsEntity> subjects) {
TripletEntity tripletEntity = new TripletEntity();
tripletEntity.setObject(triplet.getObject());
tripletEntity.setSubject(subjects.get(triplet.getSubject().toLowerCase()));
tripletEntity.setPredicate(triplet.getPredicate());
return tripletEntity;
}
private List<TripletEntity> treeToList(TripletEntity parentTriplet,
Map<Triplet, Map> tripletTree, Map<String, SubjectsEntity> subjects) {
List<TripletEntity> entities = new ArrayList<TripletEntity>();
if (tripletTree == null || tripletTree.isEmpty()) {
return entities;
} else {
for (Triplet triplet : tripletTree.keySet()) {
TripletEntity tripletEntity = createEntityFromTriplet(triplet, subjects);
tripletEntity.setParentTriplet(parentTriplet);
entities.addAll(treeToList(tripletEntity, tripletTree.get(triplet), subjects));
}
}
return entities;
}
private void replaceSubjects() {
Iterator iterator = sentences.iterator();
Sentence prevSentence = null;
Sentence currSentence = null;
if (iterator.hasNext())
prevSentence = (Sentence) iterator.next();
if (prevSentence != null) {
while (iterator.hasNext()) {
currSentence = (Sentence) iterator.next();
if (!currSentence.getTriplets().isEmpty() &&
!prevSentence.getTriplets().isEmpty() &&
currSentence.getTriplets().get(0).getSubjectType() != null &&
currSentence.getTriplets().get(0).getSubjectType().equals("PRP")) {
String newSubject = prevSentence.getTriplets().get(0).getSubject();
for (Triplet triplet : currSentence.getTriplets()) {
triplet.setSubject(newSubject);
}
}
prevSentence = currSentence;
}
}
}
private void buildTree() {
LinkedList<Triplet> triplets = new LinkedList<Triplet>();
for (Sentence sentence : sentences) {
triplets.addAll(sentence.getTriplets());
}
while (!triplets.isEmpty()) {
Triplet triplet = triplets.poll();
tree.put(triplet, findDependencies(triplet, triplets));
}
}
private Map findDependencies(Triplet triplet, LinkedList<Triplet> triplets) {
int size = triplets.size() > 3 ? 3 : triplets.size();
int i = 0;
Map<Triplet, Object> result = new LinkedHashMap<Triplet, Object>();
while (i < size) {
if (triplet.getObject() == null) {
i++;
continue;
}
if (triplets.size() < i + 1) break;
if (triplet.getObject().equalsIgnoreCase(triplets.get(i).getSubject())) {
Triplet childTriplet = triplets.get(i);
triplets.remove(i);
Map resMap = findDependencies(childTriplet, triplets);
result.put(childTriplet, resMap);
continue;
}
i++;
}
return result;
}
//
// Map<String, List<Triplet>> objects = new HashMap<String, List<Triplet>>();
// for (Sentence sentence : sentences) {
// for (Triplet triplet : sentence.getTriplets()) {
// if (objects.containsKey(triplet.getSubject().toLowerCase())) {
// objects.get(triplet.getSubject().toLowerCase()).add(triplet);
// } else {
// List<Triplet> triplets = new LinkedList<Triplet>();
// triplets.add(triplet);
// objects.put(triplet.getSubject().toLowerCase(), triplets);
// }
// }
// }
//
// for (Sentence sentence : sentences) {
// for (Triplet triplet : sentence.getTriplets()) {
// List<Triplet> node = new ArrayList<Triplet>();
// if(triplet.getObject() != null)
// node.addAll(objects.get(triplet.getObject().toLowerCase()) == null ? Collections.EMPTY_LIST : objects.get(triplet.getObject().toLowerCase()));
// tree.put(triplet, node);
// }
// }
// Iterator iterator = sentences.iterator();
// Sentence prevSentence = null;
// Sentence currSentence = null;
// if (iterator.hasNext())
// prevSentence = (Sentence) iterator.next();
//
// if (prevSentence != null) {
// while (iterator.hasNext()) {
// currSentence = (Sentence) iterator.next();
//
// prevSentence = currSentence;
// }
// }
// List<Triplet> allTriplets = getAllTriplets();
//
// Iterator iterator = allTriplets.iterator();
// Triplet prevTriplet = null;
// Triplet currTriplet = null;
// if (iterator.hasNext())
// prevTriplet = (Triplet) iterator.next();
//
// if (prevTriplet != null) {
// while (iterator.hasNext()) {
// currTriplet = (Triplet) iterator.next();
// if (currTriplet.getSubject().equalsIgnoreCase(prevTriplet.getObject())) {
// if (tree.containsKey(prevTriplet)) {
// tree.get(prevTriplet).add(currTriplet);
// } else {
// List<Triplet> node = new ArrayList<Triplet>();
// node.add(currTriplet);
// tree.put(prevTriplet, node);
// }
// continue;
// } else {
// tree.put(currTriplet, new ArrayList<Triplet>());
// }
// prevTriplet = currTriplet;
// }
// }
// }
private List<Triplet> getAllTriplets() {
List<Triplet> allTriplets = new ArrayList<Triplet>();
for (Sentence sentence : sentences) {
allTriplets.addAll(sentence.getTriplets());
}
return allTriplets;
}
public void printTree() {
for (Triplet triplet : tree.keySet()) {
System.out.println(triplet);
printNode("\t", tree.get(triplet));
// System.out.println(tree.get(triplet));
}
}
private void printNode(String prefix, Map<Triplet, Map> node) {
if (node != null) {
for (Triplet triplet : node.keySet()) {
System.out.println(prefix + triplet);
printNode(prefix + "\t", node.get(triplet));
}
}
}
private Map<String, Double> calculateSubjectFrequency() {
Map<String, Integer> subjectsCounts = new HashMap<String, Integer>();
Map<String, Double> subjectsFrequency = new HashMap<String, Double>();
List<Triplet> allTriplets = getAllTriplets();
int subjectSize = 0;
for (Triplet triplet : allTriplets) {
String subject = triplet.getSubject();
if (subject == null) continue;
if (subjectsCounts.containsKey(subject.toLowerCase())) {
Integer count = subjectsCounts.get(subject.toLowerCase());
count++;
subjectsCounts.put(subject.toLowerCase(), count);
} else {
subjectsCounts.put(subject.toLowerCase(), 1);
}
subjectSize++;
}
for (String subject : subjectsCounts.keySet()) {
subjectsFrequency.put(subject, Double.valueOf(subjectsCounts.get(subject)) / subjectSize);
}
return subjectsFrequency;
}
public void addSentence(Sentence sentence) {
sentences.add(sentence);
}
public List<Sentence> getSentences() {
return sentences;
}
}