public boolean readSentence(TokenStructure syntaxGraph) throws MaltChainedException {
if (syntaxGraph == null || !(syntaxGraph instanceof PhraseStructure)) {
return false;
}
syntaxGraph.clear();
final PhraseStructure phraseStructure = (PhraseStructure)syntaxGraph;
PhraseStructureNode parent = null;
PhraseStructureNode child = null;
currentHeaderTable = NegraTables.UNDEF;
String line = null;
syntaxGraph.clear();
nonterminals.clear();
try {
while (true) {
line = reader.readLine();
if (line == null) {
if (syntaxGraph.hasTokens()) {
sentenceCount++;
if (syntaxGraph instanceof MappablePhraseStructureGraph) {
((MappablePhraseStructureGraph)syntaxGraph).getMapping().updateDependenyGraph(((MappablePhraseStructureGraph)syntaxGraph), ((PhraseStructure)syntaxGraph).getPhraseStructureRoot());
}
}
if (cIterations < nIterations) {
cIterations++;
reopen();
return true;
}
return false;
} else if (line.startsWith("#EOS")) {
currentTerminalSize = 0;
currentNonTerminalSize = 0;
currentHeaderTable = NegraTables.UNDEF;
if (syntaxGraph instanceof MappablePhraseStructureGraph) {
((MappablePhraseStructureGraph)syntaxGraph).getMapping().updateDependenyGraph(((MappablePhraseStructureGraph)syntaxGraph), ((PhraseStructure)syntaxGraph).getPhraseStructureRoot());
}
return true;
} else if (line.startsWith("#BOS")) {
currentHeaderTable = NegraTables.SENTENCE;
int s = -1, e = -1;
for (int i = 5, n = line.length(); i < n; i++) {
if (Character.isDigit(line.charAt(i)) && s == -1) {
s = i;
}
if (line.charAt(i) == ' ') {
e = i;
break;
}
}
if (s != e && s != -1 && e != -1) {
phraseStructure.setSentenceID(Integer.parseInt(line.substring(s,e)));
}
sentenceCount++;
} else if (currentHeaderTable == NegraTables.SENTENCE) {
if (line.length() >= 2 && line.charAt(0) == '#' && Character.isDigit(line.charAt(1))) { // Non-terminal
Iterator<ColumnDescription> columns = dataFormatInstance.iterator();
ColumnDescription column = null;
currentNonTerminalSize++;
char[] lineChars = line.toCharArray();
int start = 0;
int secedgecounter = 0;
for (int i = 0, n = lineChars.length; i < n; i++) {
if (lineChars[i] == '\t' && start == i) {
start++;
} else if (lineChars[i] == '\t' || i == n - 1) {
if (columns.hasNext()) {
column = columns.next();
}
if (column.getPosition() == 0) {
int index = Integer.parseInt((i == n - 1)?line.substring(start+1):line.substring(start+1, i));
child = nonterminals.get(index);
if (child == null) {
if (index != 0) {
child = ((PhraseStructure)syntaxGraph).addNonTerminalNode(index-START_ID_OF_NONTERMINALS+1);
}
nonterminals.put(index,child);
}
} else if (column.getPosition() == 2 && child != null) {
syntaxGraph.addLabel(child, "CAT", (i == n - 1)?line.substring(start):line.substring(start, i));
} else if (column.getCategory() == ColumnDescription.PHRASE_STRUCTURE_EDGE_LABEL) {
edgelabelSymbol.setLength(0);
edgelabelSymbol.append((i == n - 1)?line.substring(start):line.substring(start, i));
edgelabelTableName.setLength(0);
edgelabelTableName.append(column.getName());
} else if (column.getCategory() == ColumnDescription.PHRASE_STRUCTURE_NODE_LABEL && child != null) {
int index = Integer.parseInt((i == n - 1)?line.substring(start):line.substring(start, i));
parent = nonterminals.get(index);
if (parent == null) {
if (index == 0) {
parent = phraseStructure.getPhraseStructureRoot();
} else {
parent = phraseStructure.addNonTerminalNode(index-START_ID_OF_NONTERMINALS+1);
}
nonterminals.put(index,parent);
}
Edge e = phraseStructure.addPhraseStructureEdge(parent, child);
syntaxGraph.addLabel(e, edgelabelTableName.toString(), edgelabelSymbol.toString());
} else if (column.getCategory() == ColumnDescription.SECONDARY_EDGE_LABEL && child != null) {
if (secedgecounter % 2 == 0) {
edgelabelSymbol.setLength(0);
edgelabelSymbol.append((i == n - 1)?line.substring(start):line.substring(start, i));
secedgecounter++;
} else {
int index = Integer.parseInt((i == n - 1)?line.substring(start):line.substring(start, i));
if (index == 0) {
parent = phraseStructure.getPhraseStructureRoot();
} else if (index < START_ID_OF_NONTERMINALS) {
parent = phraseStructure.getTokenNode(index);
} else {
parent = nonterminals.get(index);
if (parent == null) {
parent = phraseStructure.addNonTerminalNode(index-START_ID_OF_NONTERMINALS+1);
nonterminals.put(index,parent);
}
}
Edge e = phraseStructure.addSecondaryEdge(parent, child);
e.addLabel(column.getSymbolTable(), edgelabelSymbol.toString());
secedgecounter++;
}
}
start = i + 1;
}
}
} else { // Terminal
Iterator<ColumnDescription> columns = dataFormatInstance.iterator();
ColumnDescription column = null;
currentTerminalSize++;
child = syntaxGraph.addTokenNode(currentTerminalSize);
char[] lineChars = line.toCharArray();
int start = 0;
int secedgecounter = 0;
for (int i = 0, n = lineChars.length; i < n; i++) {
if (lineChars[i] == '\t' && start == i) {
start++;
} else if (lineChars[i] == '\t' || i == n - 1) {
if (columns.hasNext()) {
column = columns.next();
}
if (column.getCategory() == ColumnDescription.INPUT && child != null) {
syntaxGraph.addLabel(child, column.getName(), (i == n - 1)?line.substring(start):line.substring(start, i));
} else if (column.getCategory() == ColumnDescription.PHRASE_STRUCTURE_EDGE_LABEL && child != null) { // && column.getName().equals("EDGELABEL")) {
edgelabelSymbol.setLength(0);
edgelabelSymbol.append((i == n - 1)?line.substring(start):line.substring(start, i));
edgelabelTableName.setLength(0);
edgelabelTableName.append(column.getName());
} else if (column.getCategory() == ColumnDescription.PHRASE_STRUCTURE_NODE_LABEL && child != null) {
int index = Integer.parseInt((i == n - 1)?line.substring(start):line.substring(start, i));
parent = nonterminals.get(index);
if (parent == null) {
if (index == 0) {
parent = phraseStructure.getPhraseStructureRoot();
} else {
parent = phraseStructure.addNonTerminalNode(index-START_ID_OF_NONTERMINALS+1);
}
nonterminals.put(index,parent);
}
Edge e = phraseStructure.addPhraseStructureEdge(parent, child);
syntaxGraph.addLabel(e, edgelabelTableName.toString(), edgelabelSymbol.toString());
} else if (column.getCategory() == ColumnDescription.SECONDARY_EDGE_LABEL && child != null) {
if (secedgecounter % 2 == 0) {
edgelabelSymbol.setLength(0);
edgelabelSymbol.append((i == n - 1)?line.substring(start):line.substring(start, i));
secedgecounter++;
} else {
int index = Integer.parseInt((i == n - 1)?line.substring(start):line.substring(start, i));
if (index == 0) {
parent = phraseStructure.getPhraseStructureRoot();
} else if (index < START_ID_OF_NONTERMINALS) {
parent = phraseStructure.getTokenNode(index);
} else {
parent = nonterminals.get(index);
if (parent == null) {
parent = phraseStructure.addNonTerminalNode(index-START_ID_OF_NONTERMINALS+1);
nonterminals.put(index,parent);
}
}
Edge e = phraseStructure.addSecondaryEdge(parent, child);
e.addLabel(column.getSymbolTable(), edgelabelSymbol.toString());
secedgecounter++;
}
}
start = i + 1;