/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package seekfeel.miners.supervised;
import seekfeel.miners.features.extractors.UniGramsExtractor;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Map.Entry;
import seekfeel.dataholders.DataUnit;
import seekfeel.dataholders.Review;
import seekfeel.supervised.essentials.CorpusHolder;
import seekfeel.miners.features.Feature;
import seekfeel.miners.SentiMiner;
import seekfeel.miners.Sentiment;
import seekfeel.miners.features.computers.BinaryComputerTaggedStem;
import seekfeel.miners.features.computers.DeltaTFIDFComputerBi;
import seekfeel.miners.features.computers.DeltaTFIDFComputerUni;
import seekfeel.miners.features.computers.FeatureValueComputer;
import seekfeel.miners.features.computers.SentimentValueComputer;
import seekfeel.miners.features.extractors.BiGramsExtractor;
import seekfeel.miners.features.extractors.FeaturesExtractor;
import seekfeel.miners.features.extractors.SentimentFeaturesExtractor;
import seekfeel.miners.features.extractors.TaggedStemsExtractor;
import seekfeel.supervised.essentials.CorpusLoader;
import weka.classifiers.Evaluation;
/**
*
* @author Ahmed
*
* In this class it will take the file name and
*/
public class SupervisedPanel implements SentiMiner {
private CorpusHolder theCorpus;
private WekaWrapper weka;
private ArrayList<ArrayList<Feature>> allFeatures;
private ArrayList<SupportedFeature> neededFeats;
private HashMap<SupportedFeature, FeatureValueComputer> featuresComputers;
public SupervisedPanel(String corpusFile, ArrayList<SupportedFeature> _features,CorpusLoader theLoader) {
neededFeats = _features;
theCorpus = theLoader.loadCorpus(corpusFile);
prepareFeatures(null);
weka = new WekaWrapper();
initializeFeatsComputers();
}
public SupervisedPanel(ArrayList<SupportedFeature> _features) {
neededFeats = _features;
weka = new WekaWrapper();
initializeFeatsComputers();
}
private void initializeFeatsComputers() {
featuresComputers = new HashMap<SupportedFeature, FeatureValueComputer>();
featuresComputers.put(SupportedFeature.UniGrams, new DeltaTFIDFComputerUni());
featuresComputers.put(SupportedFeature.BiGrams, new DeltaTFIDFComputerBi());
featuresComputers.put(SupportedFeature.SentimentFeats, new SentimentValueComputer());
featuresComputers.put(SupportedFeature.TaggedStems, new BinaryComputerTaggedStem());
}
public void setCorpus(CorpusHolder _Corpus) {
theCorpus = _Corpus;
prepareFeatures(null);
}
public void setCorpus(CorpusHolder _Corpus, ArrayList<Integer> indecies) {
theCorpus = _Corpus;
prepareFeatures(indecies);
}
public void train() {
ArrayList<LinkedHashMap<Integer, Double>> trainingSamples = new ArrayList<LinkedHashMap<Integer, Double>>();
ArrayList<Integer> labels = new ArrayList<Integer>();
getSamples(trainingSamples, labels, null);
// LibSVMWrapper.writeTrainingFile("test.txt",labels, trainingSamples);
weka.train(trainingSamples, labels, getNumTotalFeats());
}
public void train(ArrayList<Integer> indecies) {
ArrayList<LinkedHashMap<Integer, Double>> trainingSamples = new ArrayList<LinkedHashMap<Integer, Double>>();
ArrayList<Integer> labels = new ArrayList<Integer>();
getSamples(trainingSamples, labels, indecies);
ArrayList<LinkedHashMap<Integer, Double>> negExs = new ArrayList(trainingSamples.subList(labels.lastIndexOf(0) + 1, trainingSamples.size()));
ArrayList<LinkedHashMap<Integer, Double>> posExs = new ArrayList(trainingSamples.subList(0, labels.lastIndexOf(0) + 1));
FeaturesSelector.filterFeatures(posExs, negExs, allFeatures, 300);
trainingSamples.clear();
trainingSamples.addAll(posExs);
trainingSamples.addAll(negExs);
weka.train(trainingSamples, labels, getNumTotalFeats());
}
public int testClassifier(ArrayList<Integer> testIndecies) {
ArrayList<Integer> testLabels = new ArrayList<Integer>();
ArrayList<LinkedHashMap<Integer, Double>> testSamples = new ArrayList<LinkedHashMap<Integer, Double>>();
ArrayList<Integer> actualLabels = new ArrayList<Integer>();
getSamples(testSamples, actualLabels, testIndecies);
for (int i = 0; i < testSamples.size(); i++) {
testLabels.add((int) weka.classify(testSamples.get(i)));
}
return compareLabels(testLabels, actualLabels);
}
private void getSamples(ArrayList<LinkedHashMap<Integer, Double>> trainingSamples, ArrayList<Integer> labels, ArrayList<Integer> theIndecies) {
ArrayList<DataUnit> positiveExamples;
ArrayList<DataUnit> negativeExamples;
if (theIndecies == null) {
positiveExamples = theCorpus.getPositiveExamples();
negativeExamples = theCorpus.getNegativeExamples();
} else {
positiveExamples = theCorpus.getSpecificPositiveExamples(theIndecies);
negativeExamples = theCorpus.getSpecificNegativeExamples(theIndecies);
}
for (DataUnit positiveExample : positiveExamples) {
trainingSamples.add(computeSampleFeatures(positiveExample, theIndecies));
labels.add(0);
}
for (DataUnit negativeExample : negativeExamples) {
trainingSamples.add(computeSampleFeatures(negativeExample, theIndecies));
labels.add(1);
}
}
@Override
public Sentiment classifyText(String text) {
Review theText = new Review();
theText.setDataBody(text);
LinkedHashMap<Integer, Double> textFeats = computeSampleFeatures(theText, null);
double result = weka.classify(textFeats);
if (result == 0.0) {
return Sentiment.Positive;
} else {
return Sentiment.Negative;
}
}
private void prepareFeatures(ArrayList<Integer> indecies) {
allFeatures = new ArrayList<ArrayList<Feature>>();
FeaturesExtractor currentFeatExtractor;
CorpusHolder tempCorpus = indecies == null ? theCorpus : theCorpus.getNeededCorpus(indecies);
for (SupportedFeature currentFeat : neededFeats) {
currentFeatExtractor = getExtractor(currentFeat);
allFeatures.add(currentFeatExtractor.filter(2, currentFeatExtractor.extractFeatures(tempCorpus)));
}
}
public Evaluation kFoldValidate() {
ArrayList<LinkedHashMap<Integer, Double>> trainingSamples = new ArrayList<LinkedHashMap<Integer, Double>>();
ArrayList<Integer> labels = new ArrayList<Integer>();
getSamples(trainingSamples, labels, null);
return weka.crossValidate(trainingSamples, labels, 10, getNumTotalFeats());
}
private FeaturesExtractor getExtractor(SupportedFeature currentFeature) {
switch (currentFeature) {
case BiGrams:
return new BiGramsExtractor();
case UniGrams:
return new UniGramsExtractor();
case SentimentFeats:
return new SentimentFeaturesExtractor();
case TaggedStems:
return new TaggedStemsExtractor();
default:
return null;
}
}
private FeatureValueComputer getFeatComputer(SupportedFeature currentFeature) {
return featuresComputers.get(currentFeature);
}
private LinkedHashMap<Integer, Double> computeSampleFeatures(DataUnit sample, ArrayList<Integer> theIndecies) {
FeatureValueComputer currentFeatComputer;
ArrayList<Feature> currentFeats;
LinkedHashMap<Integer, Double> sampleFeats = new LinkedHashMap<Integer, Double>();
int appendStartIndex = 0;
CorpusHolder tempCorpus = theIndecies == null ? theCorpus : theCorpus.getNeededCorpus(theIndecies);
for (int i = 0; i < neededFeats.size(); i++) {
currentFeatComputer = getFeatComputer(neededFeats.get(i));
currentFeats = allFeatures.get(i);
appendMap(sampleFeats, currentFeatComputer.computeFeatures(sample, currentFeats, tempCorpus), appendStartIndex);
appendStartIndex += currentFeats.size();
}
return sampleFeats;
}
private void appendMap(LinkedHashMap<Integer, Double> original, LinkedHashMap<Integer, Double> mapToAppend, int appendStartIndex) {
Iterator<Entry<Integer, Double>> mapIt = mapToAppend.entrySet().iterator();
Entry<Integer, Double> currentEntry;
while (mapIt.hasNext()) {
currentEntry = mapIt.next();
original.put(appendStartIndex + currentEntry.getKey(), currentEntry.getValue());
}
}
private int getNumTotalFeats() {
int numFeats = 0;
for (ArrayList<Feature> currentList : allFeatures) {
numFeats += currentList.size();
}
return numFeats;
}
private int compareLabels(ArrayList<Integer> testLabels, ArrayList<Integer> actualLabels) {
int numErrors = 0;
for (int i = 0; i < testLabels.size(); i++) {
if (testLabels.get(i) != actualLabels.get(i)) {
numErrors++;
}
}
return numErrors;
}
}