* @param sourceDoc The source SciXML document.
* @return A SAF XML document containing various information.
* @throws Exception
*/
public Document runGenia(Document sourceDoc) throws Exception {
ProcessingDocument procDoc = ProcessingDocumentFactory.getInstance().makeTokenisedDocument(sourceDoc, false, false, true);
Document safDoc = new Document(new Element("saf"));
for(List<Token> sentence : procDoc.getSentences()) {
if(sentence.size() > 0) {
Token first = sentence.get(0);
Token last = sentence.get(sentence.size()-1);
Element sentenceAnnot = SafTools.makeAnnot(first.getStartXPoint(), last.getEndXPoint(), "sentence");
safDoc.getRootElement().appendChild(sentenceAnnot);
}
}
for(TokenSequence ts : procDoc.getTokenSequences()) {
for(Token t : ts.getTokens()) {
Element safElem = SafTools.makeAnnot(t.getStartXPoint(), t.getEndXPoint(), "genia");
SafTools.setSlot(safElem, "surface", t.getValue());
SafTools.setSlot(safElem, "stem", t.getGeniaData()[1]);
SafTools.setSlot(safElem, "tag", t.getGeniaData()[2]);