*/
public Paragraph parse(String paragraphString) {
BufferedReader reader = new BufferedReader(new StringReader(
paragraphString));
Paragraph sentence = new Paragraph();
Node root = new Node();
try {
// first line is <s ...>
String line = reader.readLine();
if (line.startsWith("<s")) {
// should finde the source source
while (!line.startsWith("SOURCE")) {
line = reader.readLine();
if (line == null) {
return new Paragraph();
}
}
}
line = reader.readLine();
// we should have the plain sentence
// we remove the first token
int start = line.indexOf(" ");
sentence.setText(line.substring(start + 1));
// now we look for the root node
line = reader.readLine();
while (!rootPattern.matcher(line).matches()) {
line = reader.readLine();
if (line == null) {
return sentence;
}
}
// got the root. Add it to the stack
Stack<Node> nodeStack = new Stack<Node>();
// we get the complete line
root.setSyntacticTag("ROOT");
root.setLevel(0);
nodeStack.add(root);
// now we have to take care of the lastLevel. Every time it raises, we
// will add the
// leaf to the node at the top. If it decreases, we remove the top.
//line = reader.readLine();
while (line.length() != 0 && line.startsWith("</s>") == false) {
TreeElement element = this.getElement(line);
if(element != null) {
// remove elements at same level or higher
while (!nodeStack.isEmpty()
&& element.getLevel() > 0 && element.getLevel() <= nodeStack.peek().getLevel()) {
nodeStack.pop();
}
if( element.isLeaf() ) {
if (nodeStack.isEmpty()) {
root.addElement(element);
} else {
// look for the node with the correct level
Node peek = nodeStack.peek();
if (element.level == 0) { // add to the root
nodeStack.firstElement().addElement(element);
} else {
Node parent = null;
int index = nodeStack.size() - 1;
while(parent == null) {
if(peek.getLevel() < element.getLevel()) {
parent = peek;
} else {
index--;
if(index > -1) {
peek = nodeStack.get(index);
} else {
parent = nodeStack.firstElement();
}
}
}
parent.addElement(element);
}
}
} else {
if (!nodeStack.isEmpty()) {
nodeStack.peek().addElement(element);