Package kpi.asoiu.model

Source Code of kpi.asoiu.model.Text

package kpi.asoiu.model;

import kpi.asoiu.dao.SimpleDAO;
import kpi.asoiu.factory.DAOFactory;
import kpi.asoiu.model.test.ArticleEntity;
import kpi.asoiu.model.test.SubjectsEntity;
import kpi.asoiu.model.test.TripletEntity;

import java.util.*;

/**
* Created by IntelliJ IDEA.
* User: Dara
* Date: 20.11.11
* Time: 16:24
*/
public class Text {

    Map<Triplet, Map> tree = new LinkedHashMap<Triplet, Map>();

    private List<Sentence> sentences = new ArrayList<Sentence>();

//    private Map<Triplet, List<Triplet>> tree = new HashMap<Triplet, List<Triplet>>();

    public void postProcess() {
        replaceSubjects();
        buildTree();
//        saveArticle();


        for (Sentence sentence : sentences) {
            System.out.println(sentence);
        }
        printTree();

        System.out.println("\n Frequency : " + calculateSubjectFrequency());
    }

    private void saveArticle() {
        ArticleEntity article = new ArticleEntity();
        StringBuffer text = new StringBuffer();
        for (Sentence sentence : sentences) {
            text.append(sentence.getText());
        }
        article.setText(text.toString());
        SimpleDAO simpleDAO = DAOFactory.getDao();
        article.setId(simpleDAO.addObjects(article));
        Map<String, SubjectsEntity> subjectsEntityMap = new HashMap<String, SubjectsEntity>();
        Map<String, Double> subjectsMap = calculateSubjectFrequency();
        for (String word : subjectsMap.keySet()) {
            SubjectsEntity subjectsEntity = new SubjectsEntity();
            subjectsEntity.setFreq(subjectsMap.get(word).floatValue());
            subjectsEntity.setWord(word);
            subjectsEntity.setArticle(article);
            subjectsEntityMap.put(word, subjectsEntity);
            subjectsEntity.setId(simpleDAO.addObjects(subjectsEntity));
        }

        article.setSubjects(new ArrayList<SubjectsEntity>(subjectsEntityMap.values()));
        List<TripletEntity> tripletEntities = new ArrayList<TripletEntity>();
        for (Triplet triplet : tree.keySet()) {
            TripletEntity tripletEntity = createEntityFromTriplet(triplet, subjectsEntityMap);
            tripletEntities.add(tripletEntity);
            tripletEntities.addAll(treeToList(tripletEntity, tree.get(triplet), subjectsEntityMap));
        }
        for (TripletEntity tripletEntity : tripletEntities) {
            tripletEntity.setId(simpleDAO.addObjects(tripletEntity));
        }
        article.setTriplets(tripletEntities);
        simpleDAO.updateObject(article);
    }

    private TripletEntity createEntityFromTriplet(Triplet triplet, Map<String, SubjectsEntity> subjects) {
        TripletEntity tripletEntity = new TripletEntity();
        tripletEntity.setObject(triplet.getObject());
        tripletEntity.setSubject(subjects.get(triplet.getSubject().toLowerCase()));
        tripletEntity.setPredicate(triplet.getPredicate());
        return tripletEntity;
    }

    private List<TripletEntity> treeToList(TripletEntity parentTriplet,
                                           Map<Triplet, Map> tripletTree, Map<String, SubjectsEntity> subjects) {
        List<TripletEntity> entities = new ArrayList<TripletEntity>();
        if (tripletTree == null || tripletTree.isEmpty()) {
            return entities;
        } else {
            for (Triplet triplet : tripletTree.keySet()) {
                TripletEntity tripletEntity = createEntityFromTriplet(triplet, subjects);
                tripletEntity.setParentTriplet(parentTriplet);
                entities.addAll(treeToList(tripletEntity, tripletTree.get(triplet), subjects));
            }
        }
        return entities;
    }


    private void replaceSubjects() {
        Iterator iterator = sentences.iterator();
        Sentence prevSentence = null;
        Sentence currSentence = null;
        if (iterator.hasNext())
            prevSentence = (Sentence) iterator.next();

        if (prevSentence != null) {
            while (iterator.hasNext()) {
                currSentence = (Sentence) iterator.next();
                if (!currSentence.getTriplets().isEmpty() &&
                        !prevSentence.getTriplets().isEmpty() &&
                        currSentence.getTriplets().get(0).getSubjectType() != null &&
                        currSentence.getTriplets().get(0).getSubjectType().equals("PRP")) {
                    String newSubject = prevSentence.getTriplets().get(0).getSubject();
                    for (Triplet triplet : currSentence.getTriplets()) {
                        triplet.setSubject(newSubject);
                    }
                }
                prevSentence = currSentence;
            }
        }
    }

    private void buildTree() {
        LinkedList<Triplet> triplets = new LinkedList<Triplet>();
        for (Sentence sentence : sentences) {
            triplets.addAll(sentence.getTriplets());
        }

        while (!triplets.isEmpty()) {
            Triplet triplet = triplets.poll();
            tree.put(triplet, findDependencies(triplet, triplets));
        }
    }

    private Map findDependencies(Triplet triplet, LinkedList<Triplet> triplets) {
        int size = triplets.size() > 3 ? 3 : triplets.size();
        int i = 0;
        Map<Triplet, Object> result = new LinkedHashMap<Triplet, Object>();
        while (i < size) {
            if (triplet.getObject() == null) {
                i++;
                continue;
            }
            if (triplets.size() < i + 1) break;
            if (triplet.getObject().equalsIgnoreCase(triplets.get(i).getSubject())) {
                Triplet childTriplet = triplets.get(i);
                triplets.remove(i);
                Map resMap = findDependencies(childTriplet, triplets);
                result.put(childTriplet, resMap);
                continue;
            }
            i++;
        }
        return result;
    }
//
//        Map<String, List<Triplet>> objects = new HashMap<String, List<Triplet>>();
//        for (Sentence sentence : sentences) {
//            for (Triplet triplet : sentence.getTriplets()) {
//                if (objects.containsKey(triplet.getSubject().toLowerCase())) {
//                    objects.get(triplet.getSubject().toLowerCase()).add(triplet);
//                } else {
//                    List<Triplet> triplets = new LinkedList<Triplet>();
//                    triplets.add(triplet);
//                    objects.put(triplet.getSubject().toLowerCase(), triplets);
//                }
//            }
//        }
//
//        for (Sentence sentence : sentences) {
//            for (Triplet triplet : sentence.getTriplets()) {
//                List<Triplet> node = new ArrayList<Triplet>();
//                if(triplet.getObject() != null)
//                    node.addAll(objects.get(triplet.getObject().toLowerCase()) == null ? Collections.EMPTY_LIST : objects.get(triplet.getObject().toLowerCase()));
//                tree.put(triplet, node);
//            }
//        }
//        Iterator iterator = sentences.iterator();
//        Sentence prevSentence = null;
//        Sentence currSentence = null;
//        if (iterator.hasNext())
//            prevSentence = (Sentence) iterator.next();
//
//        if (prevSentence != null) {
//            while (iterator.hasNext()) {
//                currSentence = (Sentence) iterator.next();
//
//                prevSentence = currSentence;
//            }
//        }

//        List<Triplet> allTriplets = getAllTriplets();
//
//        Iterator iterator = allTriplets.iterator();
//        Triplet prevTriplet = null;
//        Triplet currTriplet = null;
//        if (iterator.hasNext())
//            prevTriplet = (Triplet) iterator.next();
//
//        if (prevTriplet != null) {
//            while (iterator.hasNext()) {
//                currTriplet = (Triplet) iterator.next();
//                if (currTriplet.getSubject().equalsIgnoreCase(prevTriplet.getObject())) {
//                    if (tree.containsKey(prevTriplet)) {
//                        tree.get(prevTriplet).add(currTriplet);
//                    } else {
//                        List<Triplet> node = new ArrayList<Triplet>();
//                        node.add(currTriplet);
//                        tree.put(prevTriplet, node);
//                    }
//                    continue;
//                } else {
//                    tree.put(currTriplet, new ArrayList<Triplet>());
//                }
//                prevTriplet = currTriplet;
//            }
//        }
//    }

    private List<Triplet> getAllTriplets() {
        List<Triplet> allTriplets = new ArrayList<Triplet>();
        for (Sentence sentence : sentences) {
            allTriplets.addAll(sentence.getTriplets());
        }
        return allTriplets;
    }

    public void printTree() {
        for (Triplet triplet : tree.keySet()) {
            System.out.println(triplet);
            printNode("\t", tree.get(triplet));
//            System.out.println(tree.get(triplet));
        }
    }

    private void printNode(String prefix, Map<Triplet, Map> node) {
        if (node != null) {
            for (Triplet triplet : node.keySet()) {
                System.out.println(prefix + triplet);
                printNode(prefix + "\t", node.get(triplet));
            }
        }
    }

    private Map<String, Double> calculateSubjectFrequency() {
        Map<String, Integer> subjectsCounts = new HashMap<String, Integer>();
        Map<String, Double> subjectsFrequency = new HashMap<String, Double>();
        List<Triplet> allTriplets = getAllTriplets();
        int subjectSize = 0;
        for (Triplet triplet : allTriplets) {
            String subject = triplet.getSubject();
            if (subject == null) continue;
            if (subjectsCounts.containsKey(subject.toLowerCase())) {
                Integer count = subjectsCounts.get(subject.toLowerCase());
                count++;
                subjectsCounts.put(subject.toLowerCase(), count);
            } else {
                subjectsCounts.put(subject.toLowerCase(), 1);
            }
            subjectSize++;
        }

        for (String subject : subjectsCounts.keySet()) {
            subjectsFrequency.put(subject, Double.valueOf(subjectsCounts.get(subject)) / subjectSize);
        }
        return subjectsFrequency;
    }


    public void addSentence(Sentence sentence) {
        sentences.add(sentence);
    }

    public List<Sentence> getSentences() {
        return sentences;
    }
}
TOP

Related Classes of kpi.asoiu.model.Text

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.