@Override
public ArrayList<Feature> extractFeatures(CorpusHolder corpus) {
// Preprocess the input text : result : ArrayList of words
// POSTag it : Result Parsed Data Object
ArrayList<Feature> allFeats = new ArrayList<Feature>();
StanfordParser theParser = new StanfordParser(Language.English);
ArrayList<DataUnit> posExamples = corpus.getPositiveExamples();
ArrayList<DataUnit> negExamples = corpus.getNegativeExamples();
ArrayList<PosTag> neededTags = new ArrayList<PosTag>();
neededTags.add(PosTag.RB);
neededTags.add(PosTag.RBR);
neededTags.add(PosTag.RBS);
neededTags.add(PosTag.RP);
neededTags.add(PosTag.JJ);
neededTags.add(PosTag.JJR);
neededTags.add(PosTag.JJS);
neededTags.add(PosTag.VB);
neededTags.add(PosTag.VBD);
neededTags.add(PosTag.VBG);
neededTags.add(PosTag.VBN);
neededTags.add(PosTag.VBP);
neededTags.add(PosTag.VBZ);
ParsedData parsingResult = null;
ParsingOptions opts = new ParsingOptions();
opts.setNeededTags(neededTags);
opts.setPosTag(true);
ArrayList<Word> allWords;
for (DataUnit posReview : posExamples) {
allWords = StringToWordsTokenizer.tokenize(posReview.getDataBody());
parsingResult = theParser.parse(allWords, opts);
updateAllFeatures(allFeats, parsingResult, allWords);
}
for (DataUnit negReview : negExamples) {
allWords = StringToWordsTokenizer.tokenize(negReview.getDataBody());
parsingResult = theParser.parse(allWords, opts);
try {
parsingResult = theParser.parse(allWords, opts);
} catch (Exception e) {
System.out.println(e.toString());
}
updateAllFeatures(allFeats, parsingResult, allWords);
}