String outFilePath = odir + "/" + file.getName().substring(0, fname.length() - 3) + "xmi";
File outFile = new java.io.File(outFilePath);
String assertionFilePath = assertionDir + "/" + file.getName().substring(0, fname.length() - 3) + "ast";
System.out.println("Assertion file path: " + assertionFilePath);
File assertionFile = new java.io.File(assertionFilePath);
LineTokenToCharacterOffsetConverter converter = new LineTokenToCharacterOffsetConverter(contents);
AssertionFileProcessor assertionProcessor = new AssertionFileProcessor();
List<Annotation> assertions = assertionProcessor.processAnnotationFile(assertionFile);
JCas jcas = cas.getJCas();
SingleDocumentProcessor p = new SingleDocumentProcessor();
p.setContents(contents);
p.preprocess();
String [][] tokenArrays = p.getTokenArrays();
jcas.setSofaDataString(contents, "");
int sentNum = 0;
int tokNum = 0;
for (int i=0; i < tokenArrays.length; i++) {
Sentence sent = new Sentence(jcas);
LineAndTokenPosition sentStart = new LineAndTokenPosition();
sentStart.setLine(i+1);
sentStart.setTokenOffset(0);
LineAndTokenPosition sentEnd = new LineAndTokenPosition();
sentEnd.setLine(i+1);
sentEnd.setTokenOffset(tokenArrays[i].length-1);
LineTokenToCharacterOffsetConverter.BeginAndEndCharacterOffsetPair sStart = converter.convert(sentStart);
LineTokenToCharacterOffsetConverter.BeginAndEndCharacterOffsetPair sEnd = converter.convert(sentEnd);
if ((sStart == null) || (sEnd == null)) {
sent.setBegin(0);
sent.setEnd(0);
sent.setSentenceNumber(sentNum);
} else {
sent.setBegin(sStart.getBegin()); // get begin of first token
sent.setEnd(sEnd.getEnd() + 1); // get end of last token
sent.setSentenceNumber(sentNum);
}
sentNum++;
sent.addToIndexes();
for (int j=0; j < tokenArrays[i].length; j++) {
WordToken tok = new WordToken(jcas);
LineAndTokenPosition word = new LineAndTokenPosition();
word.setLine(i+1);
word.setTokenOffset(j);
LineTokenToCharacterOffsetConverter.BeginAndEndCharacterOffsetPair tPos = converter.convert(word);
if (tPos == null) {
tok.setBegin(0);
tok.setEnd(1);
} else {
tok.setBegin(tPos.getBegin());
tok.setEnd(tPos.getEnd() + 1);
}
tok.setTokenNumber(tokNum);
tokNum++;
tok.addToIndexes();
}
}
logger.info("before assertions");
for (Annotation a : assertions) {
logger.info(" begin assertion");
logger.info(" assertion: " + a.toString());
//Concept assertion = new Concept(jcas);
org.mitre.medfacts.i2b2.annotation.AssertionAnnotation i2b2Assertion = (org.mitre.medfacts.i2b2.annotation.AssertionAnnotation)a;
ConceptType conceptType = i2b2Assertion.getConceptType();
IdentifiedAnnotation entityOrEventMention = null;
if (conceptType.equals(ConceptType.TREATMENT))
{
entityOrEventMention = new EventMention(jcas);
} else
{
entityOrEventMention = new EntityMention(jcas);
}
LineAndTokenPosition assertionStart = new LineAndTokenPosition();
LineAndTokenPosition assertionEnd = new LineAndTokenPosition();
assertionStart.setLine(a.getBegin().getLine());
assertionStart.setTokenOffset(a.getBegin().getTokenOffset());
assertionEnd.setLine(a.getEnd().getLine());
assertionEnd.setTokenOffset(a.getEnd().getTokenOffset());
// assertion.setBegin(converter.convert(assertionStart).getBegin());
// assertion.setEnd(converter.convert(assertionEnd).getEnd() + 1);
// assertion.setConceptType("PROBLEM");
// assertion.addToIndexes();
entityOrEventMention.setBegin(converter.convert(assertionStart).getBegin());
entityOrEventMention.setEnd(converter.convert(assertionEnd).getEnd());
entityOrEventMention.setConfidence(1.0f);
FSArray ontologyConceptArray = ConceptLookup.reverseLookup(conceptType, jcas);
entityOrEventMention.setOntologyConceptArr(ontologyConceptArray);