public ArrayList<Feature> extractFeatures(CorpusHolder corpus) {
ArrayList bigrams = new ArrayList<Feature>();
ArrayList<DataUnit> positiveExamples = corpus.getPositiveExamples();
ArrayList<DataUnit> negativeExamples = corpus.getNegativeExamples();
BiGram tempBiGram;
ArrayList<String> wordPairs;
for (DataUnit posReview : positiveExamples) {
wordPairs = TextSplitter.splitTextToBigrams(posReview.getDataBody());
for (String pair : wordPairs) {
if (pair.length() == 0) {
continue;
}
tempBiGram = new BiGram();
tempBiGram.setBiGram(pair);
if (bigrams.contains(tempBiGram)) {
((BiGram) bigrams.get(bigrams.indexOf(tempBiGram))).inCrementPositive();
} else {
tempBiGram.setPositiveCount(1);
bigrams.add(tempBiGram);
}
}
}
for (DataUnit negReview : negativeExamples) {
wordPairs = TextSplitter.splitTextToBigrams(negReview.getDataBody());
for (String pair : wordPairs) {
if (pair.length() == 0) {
continue;
}
tempBiGram = new BiGram();
tempBiGram.setBiGram(pair);
if (bigrams.contains(tempBiGram)) {
((BiGram) bigrams.get(bigrams.indexOf(tempBiGram))).inCrementNegative();
} else {
tempBiGram.setNegativeCount(1);
bigrams.add(tempBiGram);
}
}
}