/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package preprocessing;
import mira.*;
import java.util.ArrayList;
import edu.stanford.nlp.ling.*;
import seekfeel.utilities.Language;
import seekfeel.utilities.stanfordwrapper.DependencyRelation;
import seekfeel.utilities.stanfordwrapper.ParsedData;
import seekfeel.utilities.stanfordwrapper.ParsingOptions;
import seekfeel.utilities.stanfordwrapper.StanfordParser;
import seekfeel.utilities.stanfordwrapper.StringToWordsTokenizer;
import seekfeel.utilities.stanfordwrapper.WordPos;
/**
*
* @author acer
*/
public class Formatted_Text {
ArrayList<token> tokenizedSentence = null;
private ArrayList<DependencyRelation> textDependencyRelations = null;
String Sentences = null;
static StanfordParser sParser = new StanfordParser(Language.English);
public void Format_Text(String rev) {
Sentences = rev;
ArrayList<Word> allWords = StringToWordsTokenizer.tokenize(rev);
if (allWords.isEmpty()) {
return;
}
ParsingOptions popts = new ParsingOptions();
popts.setParseRelations(true);
popts.setPosTag(true);
ParsedData result = sParser.parse(allWords, popts);
ArrayList<WordPos> wordsPos = result.getTaggedWords();
setTextDependencyRelations(result.getDependencyRelations());
tokenizedSentence = new ArrayList<token>();
int size = allWords.size();
for (int i = 0; i < size; i++) {
token t = new token();
t.setWord(allWords.get(i).word());
t.setPos(wordsPos.get(i).getWordTag());
t.setRelevance(true);
tokenizedSentence.add(t);
}
}
public ArrayList<token> getTokenizedSentence() {
return tokenizedSentence;
}
/**
* @return the textDependencyRelations
*/
public ArrayList<DependencyRelation> getTextDependencyRelations() {
return textDependencyRelations;
}
/**
* @param textDependencyRelations the textDependencyRelations to set
*/
public void setTextDependencyRelations(ArrayList<DependencyRelation> textDependencyRelations) {
this.textDependencyRelations = textDependencyRelations;
}
}