*/
public Sentence parse(String sentenceString, int para, boolean isTitle, boolean isBox) {
BufferedReader reader = new BufferedReader(new StringReader(
sentenceString));
Sentence sentence = new Sentence();
Node root = new Node();
try {
// first line is <s ...>
String line = reader.readLine();
boolean useSameTextAndMeta = false; // to handle cases where there are diff sug of parse (&&)
// should find the source source
while (!line.startsWith("SOURCE")) {
if(line.equals("&&")) {
// same sentence again!
useSameTextAndMeta = true;
break;
}
line = reader.readLine();
if (line == null) {
return null;
}
}
if(!useSameTextAndMeta) {
// got source, get the metadata
String metaFromSource = line.substring(7);
line = reader.readLine();
// we should have the plain sentence
// we remove the first token
int start = line.indexOf(" ");
text = line.substring(start + 1).trim();
text = fixPunctuation(text);
String titleTag = "";
if(isTitle) titleTag = " title";
String boxTag = "";
if(isBox) boxTag = " box";
if(start > 0) {
meta = line.substring(0, start) + " p=" + para + titleTag + boxTag + metaFromSource;
} else {
// rare case were there is no space between id and the sentence.
// will use previous meta for now
}
}
sentence.setText(text);
sentence.setMetadata(meta);
// now we look for the root node
// skip lines starting with ###
line = reader.readLine();
while(line != null && line.startsWith("###")) {
line = reader.readLine();
}
// got the root. Add it to the stack
Stack<Node> nodeStack = new Stack<Node>();
root.setSyntacticTag("ROOT");
root.setLevel(0);
nodeStack.add(root);
/* now we have to take care of the lastLevel. Every time it raises, we will add the
leaf to the node at the top. If it decreases, we remove the top. */
while (line != null && line.length() != 0 && line.startsWith("</s>") == false && !line.equals("&&")) {
TreeElement element = this.getElement(line);
if(element != null) {
// The idea here is to keep a stack of nodes that are candidates for
// parenting the following elements (nodes and leafs).
// 1) When we get a new element, we check its level and remove from
// the top of the stack nodes that are brothers or nephews.
while (!nodeStack.isEmpty() && element.getLevel() > 0
&& element.getLevel() <= nodeStack.peek().getLevel()) {
Node nephew = nodeStack.pop();
}
if( element.isLeaf() ) {
// 2a) If the element is a leaf and there is no parent candidate,
// add it as a daughter of the root.
if (nodeStack.isEmpty()) {
root.addElement(element);
} else {
// 2b) There are parent candidates.
// look for the node with the correct level
Node peek = nodeStack.peek();
if (element.level == 0) { // add to the root
nodeStack.firstElement().addElement(element);
} else {
Node parent = null;
int index = nodeStack.size() - 1;
while (parent == null) {
if (peek.getLevel() < element.getLevel()) {
parent = peek;
} else {
index--;
if (index > -1) {
peek = nodeStack.get(index);
} else {
parent = nodeStack.firstElement();
}
}
}
parent.addElement(element);
}
}
} else {
// 3) Check if the element that is at the top of the stack is this
// node parent, if yes add it as a son