Package seekfeel.miners.features

Examples of seekfeel.miners.features.BiGram


public class DeltaTFIDFComputerBi extends FeatureValueComputer {

    @Override
    public LinkedHashMap<Integer, Double> computeFeatures(DataUnit example, ArrayList<Feature> features, CorpusHolder corpus) {
        LinkedHashMap<Integer, Double> featuresValues = new LinkedHashMap<Integer, Double>();
        BiGram tempBigram;
        // The Equation used in calculating feature value :
        // V = Count * log ( NumPositiveExamples * Count of term in Negative corpus / NumNegativeExamples * Count of term in positive corpus)
        double featureValue;
        int countInDoc = 0;
        double countInPos;
        double countInNeg;
        double posToNegRatio = (double) corpus.getNumPositives() / (double) corpus.getNumNegatives();
        HashMap<String, Integer> wordsWithCount = TextSplitter.getBigramsCount((example).getDataBody());

        for (int i = 0; i < features.size(); i++) {
            tempBigram = (BiGram) features.get(i);
            if (wordsWithCount.containsKey(((BiGram)features.get(i)).getBiGram())) {
                countInDoc = wordsWithCount.get(((BiGram)features.get(i)).getBiGram());
                countInPos = tempBigram.getPositiveCount() + 0.0001;
                countInNeg = tempBigram.getNegativeCount() + 0.0001;
                featureValue = countInDoc * (Math.log((posToNegRatio * (countInNeg / countInPos))));
                featuresValues.put(i, 1.0);
            }
        }
        return featuresValues;
View Full Code Here


    public ArrayList<Feature> extractFeatures(CorpusHolder corpus) {

        ArrayList bigrams = new ArrayList<Feature>();
        ArrayList<DataUnit> positiveExamples = corpus.getPositiveExamples();
        ArrayList<DataUnit> negativeExamples = corpus.getNegativeExamples();
        BiGram tempBiGram;

        ArrayList<String> wordPairs;
        for (DataUnit posReview : positiveExamples) {
            wordPairs = TextSplitter.splitTextToBigrams(posReview.getDataBody());
            for (String pair : wordPairs) {
                if (pair.length() == 0) {
                    continue;
                }
                tempBiGram = new BiGram();
                tempBiGram.setBiGram(pair);
                if (bigrams.contains(tempBiGram)) {
                    ((BiGram) bigrams.get(bigrams.indexOf(tempBiGram))).inCrementPositive();
                } else {
                    tempBiGram.setPositiveCount(1);
                    bigrams.add(tempBiGram);
                }
            }

        }
        for (DataUnit negReview : negativeExamples) {
            wordPairs = TextSplitter.splitTextToBigrams(negReview.getDataBody());
            for (String pair : wordPairs) {
                if (pair.length() == 0) {
                    continue;
                }
                tempBiGram = new BiGram();
                tempBiGram.setBiGram(pair);
                if (bigrams.contains(tempBiGram)) {
                    ((BiGram) bigrams.get(bigrams.indexOf(tempBiGram))).inCrementNegative();
                } else {
                    tempBiGram.setNegativeCount(1);
                    bigrams.add(tempBiGram);
                }
            }

        }
View Full Code Here

    }

    @Override
    public ArrayList<Feature> filter(int countThreshold, ArrayList<Feature> allFeatures) {
        ArrayList<Feature> filteredFeatures = new ArrayList<Feature>();
        BiGram tempFeat;
        for (Feature feat : allFeatures) {
            tempFeat = (BiGram) feat;
            if ((tempFeat.getPositiveCount() + tempFeat.getNegativeCount()) >= countThreshold) {
                filteredFeatures.add(tempFeat);
            }
        }
        return filteredFeatures;
    }
View Full Code Here

TOP

Related Classes of seekfeel.miners.features.BiGram

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.