Package seekfeel.miners.supervised

Source Code of seekfeel.miners.supervised.SupervisedPanel

/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package seekfeel.miners.supervised;

import seekfeel.miners.features.extractors.UniGramsExtractor;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Map.Entry;
import seekfeel.dataholders.DataUnit;
import seekfeel.dataholders.Review;
import seekfeel.supervised.essentials.CorpusHolder;
import seekfeel.miners.features.Feature;
import seekfeel.miners.SentiMiner;
import seekfeel.miners.Sentiment;
import seekfeel.miners.features.computers.BinaryComputerTaggedStem;
import seekfeel.miners.features.computers.DeltaTFIDFComputerBi;
import seekfeel.miners.features.computers.DeltaTFIDFComputerUni;
import seekfeel.miners.features.computers.FeatureValueComputer;
import seekfeel.miners.features.computers.SentimentValueComputer;
import seekfeel.miners.features.extractors.BiGramsExtractor;
import seekfeel.miners.features.extractors.FeaturesExtractor;
import seekfeel.miners.features.extractors.SentimentFeaturesExtractor;
import seekfeel.miners.features.extractors.TaggedStemsExtractor;
import seekfeel.supervised.essentials.CorpusLoader;
import weka.classifiers.Evaluation;

/**
*
* @author Ahmed
*
* In this class it will take the file name and
*/
public class SupervisedPanel implements SentiMiner {

    private CorpusHolder theCorpus;
    private WekaWrapper weka;
    private ArrayList<ArrayList<Feature>> allFeatures;
    private ArrayList<SupportedFeature> neededFeats;
    private HashMap<SupportedFeature, FeatureValueComputer> featuresComputers;

    public SupervisedPanel(String corpusFile, ArrayList<SupportedFeature> _features,CorpusLoader theLoader) {
        neededFeats = _features;
        theCorpus = theLoader.loadCorpus(corpusFile);
        prepareFeatures(null);
        weka = new WekaWrapper();
        initializeFeatsComputers();
    }

    public SupervisedPanel(ArrayList<SupportedFeature> _features) {
        neededFeats = _features;
        weka = new WekaWrapper();
        initializeFeatsComputers();
    }

    private void initializeFeatsComputers() {
        featuresComputers = new HashMap<SupportedFeature, FeatureValueComputer>();
        featuresComputers.put(SupportedFeature.UniGrams, new DeltaTFIDFComputerUni());
        featuresComputers.put(SupportedFeature.BiGrams, new DeltaTFIDFComputerBi());
        featuresComputers.put(SupportedFeature.SentimentFeats, new SentimentValueComputer());
        featuresComputers.put(SupportedFeature.TaggedStems, new BinaryComputerTaggedStem());
    }

    public void setCorpus(CorpusHolder _Corpus) {
        theCorpus = _Corpus;
        prepareFeatures(null);
    }

    public void setCorpus(CorpusHolder _Corpus, ArrayList<Integer> indecies) {
        theCorpus = _Corpus;
        prepareFeatures(indecies);
    }

    public void train() {
        ArrayList<LinkedHashMap<Integer, Double>> trainingSamples = new ArrayList<LinkedHashMap<Integer, Double>>();
        ArrayList<Integer> labels = new ArrayList<Integer>();
        getSamples(trainingSamples, labels, null);
        // LibSVMWrapper.writeTrainingFile("test.txt",labels, trainingSamples);
        weka.train(trainingSamples, labels, getNumTotalFeats());
    }

    public void train(ArrayList<Integer> indecies) {
        ArrayList<LinkedHashMap<Integer, Double>> trainingSamples = new ArrayList<LinkedHashMap<Integer, Double>>();
        ArrayList<Integer> labels = new ArrayList<Integer>();
        getSamples(trainingSamples, labels, indecies);
        ArrayList<LinkedHashMap<Integer, Double>> negExs = new ArrayList(trainingSamples.subList(labels.lastIndexOf(0) + 1, trainingSamples.size()));
        ArrayList<LinkedHashMap<Integer, Double>> posExs = new ArrayList(trainingSamples.subList(0, labels.lastIndexOf(0) + 1));
        FeaturesSelector.filterFeatures(posExs, negExs, allFeatures, 300);
        trainingSamples.clear();
        trainingSamples.addAll(posExs);
        trainingSamples.addAll(negExs);

        weka.train(trainingSamples, labels, getNumTotalFeats());
    }

    public int testClassifier(ArrayList<Integer> testIndecies) {
        ArrayList<Integer> testLabels = new ArrayList<Integer>();
        ArrayList<LinkedHashMap<Integer, Double>> testSamples = new ArrayList<LinkedHashMap<Integer, Double>>();
        ArrayList<Integer> actualLabels = new ArrayList<Integer>();
        getSamples(testSamples, actualLabels, testIndecies);

        for (int i = 0; i < testSamples.size(); i++) {
            testLabels.add((int) weka.classify(testSamples.get(i)));
        }
        return compareLabels(testLabels, actualLabels);
    }

    private void getSamples(ArrayList<LinkedHashMap<Integer, Double>> trainingSamples, ArrayList<Integer> labels, ArrayList<Integer> theIndecies) {
        ArrayList<DataUnit> positiveExamples;
        ArrayList<DataUnit> negativeExamples;
        if (theIndecies == null) {
            positiveExamples = theCorpus.getPositiveExamples();
            negativeExamples = theCorpus.getNegativeExamples();
        } else {
            positiveExamples = theCorpus.getSpecificPositiveExamples(theIndecies);
            negativeExamples = theCorpus.getSpecificNegativeExamples(theIndecies);
        }
        for (DataUnit positiveExample : positiveExamples) {
            trainingSamples.add(computeSampleFeatures(positiveExample, theIndecies));
            labels.add(0);
        }
        for (DataUnit negativeExample : negativeExamples) {
            trainingSamples.add(computeSampleFeatures(negativeExample, theIndecies));
            labels.add(1);
        }
    }

    @Override
    public Sentiment classifyText(String text) {
        Review theText = new Review();
        theText.setDataBody(text);
        LinkedHashMap<Integer, Double> textFeats = computeSampleFeatures(theText, null);
        double result = weka.classify(textFeats);
        if (result == 0.0) {
            return Sentiment.Positive;
        } else {
            return Sentiment.Negative;
        }
    }

    private void prepareFeatures(ArrayList<Integer> indecies) {
        allFeatures = new ArrayList<ArrayList<Feature>>();
        FeaturesExtractor currentFeatExtractor;
        CorpusHolder tempCorpus = indecies == null ? theCorpus : theCorpus.getNeededCorpus(indecies);
        for (SupportedFeature currentFeat : neededFeats) {
            currentFeatExtractor = getExtractor(currentFeat);
            allFeatures.add(currentFeatExtractor.filter(2, currentFeatExtractor.extractFeatures(tempCorpus)));
        }
    }

    public Evaluation kFoldValidate() {
        ArrayList<LinkedHashMap<Integer, Double>> trainingSamples = new ArrayList<LinkedHashMap<Integer, Double>>();
        ArrayList<Integer> labels = new ArrayList<Integer>();
        getSamples(trainingSamples, labels, null);
        return weka.crossValidate(trainingSamples, labels, 10, getNumTotalFeats());
    }

    private FeaturesExtractor getExtractor(SupportedFeature currentFeature) {
        switch (currentFeature) {
            case BiGrams:
                return new BiGramsExtractor();
            case UniGrams:
                return new UniGramsExtractor();
            case SentimentFeats:
                return new SentimentFeaturesExtractor();
            case TaggedStems:
                return new TaggedStemsExtractor();
            default:
                return null;
        }
    }

    private FeatureValueComputer getFeatComputer(SupportedFeature currentFeature) {
        return featuresComputers.get(currentFeature);
    }

    private LinkedHashMap<Integer, Double> computeSampleFeatures(DataUnit sample, ArrayList<Integer> theIndecies) {
        FeatureValueComputer currentFeatComputer;
        ArrayList<Feature> currentFeats;
        LinkedHashMap<Integer, Double> sampleFeats = new LinkedHashMap<Integer, Double>();
        int appendStartIndex = 0;
        CorpusHolder tempCorpus = theIndecies == null ? theCorpus : theCorpus.getNeededCorpus(theIndecies);
        for (int i = 0; i < neededFeats.size(); i++) {
            currentFeatComputer = getFeatComputer(neededFeats.get(i));
            currentFeats = allFeatures.get(i);
            appendMap(sampleFeats, currentFeatComputer.computeFeatures(sample, currentFeats, tempCorpus), appendStartIndex);
            appendStartIndex += currentFeats.size();
        }
        return sampleFeats;
    }

    private void appendMap(LinkedHashMap<Integer, Double> original, LinkedHashMap<Integer, Double> mapToAppend, int appendStartIndex) {
        Iterator<Entry<Integer, Double>> mapIt = mapToAppend.entrySet().iterator();
        Entry<Integer, Double> currentEntry;
        while (mapIt.hasNext()) {
            currentEntry = mapIt.next();
            original.put(appendStartIndex + currentEntry.getKey(), currentEntry.getValue());
        }
    }

    private int getNumTotalFeats() {
        int numFeats = 0;
        for (ArrayList<Feature> currentList : allFeatures) {
            numFeats += currentList.size();
        }
        return numFeats;
    }

    private int compareLabels(ArrayList<Integer> testLabels, ArrayList<Integer> actualLabels) {
        int numErrors = 0;
        for (int i = 0; i < testLabels.size(); i++) {
            if (testLabels.get(i) != actualLabels.get(i)) {
                numErrors++;
            }
        }
        return numErrors;
    }
}
TOP

Related Classes of seekfeel.miners.supervised.SupervisedPanel

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.