if (xmlFile == null) {
throw new IllegalArgumentException("no Anafora XML file found from " + possibleXMLFiles);
}
// load the XML
Element dataElem;
try {
dataElem = new SAXBuilder().build(xmlFile.toURI().toURL()).getRootElement();
} catch (MalformedURLException e) {
throw new AnalysisEngineProcessException(e);
} catch (JDOMException e) {
throw new AnalysisEngineProcessException(e);
} catch (IOException e) {
throw new AnalysisEngineProcessException(e);
}
for (Element annotationsElem : dataElem.getChildren("annotations")) {
Map<String, Annotation> idToAnnotation = Maps.newHashMap();
for (Element entityElem : annotationsElem.getChildren("entity")) {
String id = removeSingleChildText(entityElem, "id", null);
Element spanElem = removeSingleChild(entityElem, "span", id);
String type = removeSingleChildText(entityElem, "type", id);
Element propertiesElem = removeSingleChild(entityElem, "properties", id);
// UIMA doesn't support disjoint spans, so take the span enclosing
// everything
int begin = Integer.MAX_VALUE;
int end = Integer.MIN_VALUE;
for (String spanString : spanElem.getText().split(";")) {
String[] beginEndStrings = spanString.split(",");
if (beginEndStrings.length != 2) {
error("span not of the format 'number,number'", id);
}
int spanBegin = Integer.parseInt(beginEndStrings[0]);
int spanEnd = Integer.parseInt(beginEndStrings[1]);
if (spanBegin < begin) {
begin = spanBegin;
}
if (spanEnd > end) {
end = spanEnd;
}
}
Annotation annotation;
if (type.equals("EVENT")) {
String docTimeRel = removeSingleChildText(propertiesElem, "DocTimeRel", id);
if (docTimeRel == null) {
error("no docTimeRel, assuming OVERLAP", id);
docTimeRel = "OVERLAP";
}
String eventType = removeSingleChildText(propertiesElem, "Type", id);
String degree = removeSingleChildText(propertiesElem, "Degree", id);
String polarity = removeSingleChildText(propertiesElem, "Polarity", id);
String contextualModality = removeSingleChildText(propertiesElem, "ContextualModality", id);
String contextualAspect = removeSingleChildText(propertiesElem, "ContextualAspect", id);
String permanence = removeSingleChildText(propertiesElem, "Permanence", id);
EventMention eventMention = new EventMention(jCas, begin, end);
Event event = new Event(jCas);
EventProperties eventProperties = new EventProperties(jCas);
eventProperties.setDocTimeRel(docTimeRel);
eventProperties.setCategory(eventType);
eventProperties.setDegree(degree);
if (polarity.equals("POS")) {
eventProperties.setPolarity(CONST.NE_POLARITY_NEGATION_ABSENT);
} else if (polarity.equals("NEG")) {
eventProperties.setPolarity(CONST.NE_POLARITY_NEGATION_PRESENT);
} else {
error("polarity that was not POS or NEG", id);
}
eventProperties.setContextualModality(contextualModality);
eventProperties.setContextualAspect(contextualAspect);
eventProperties.setPermanence(permanence);
eventProperties.addToIndexes();
event.setConfidence(1.0f);
event.setDiscoveryTechnique(CONST.NE_DISCOVERY_TECH_GOLD_ANNOTATION);
event.setProperties(eventProperties);
event.setMentions(new FSArray(jCas, 1));
event.setMentions(0, eventMention);
event.addToIndexes();
eventMention.setConfidence(1.0f);
eventMention.setDiscoveryTechnique(CONST.NE_DISCOVERY_TECH_GOLD_ANNOTATION);
eventMention.setEvent(event);
eventMention.addToIndexes();
annotation = eventMention;
} else if (type.equals("TIMEX3")) {
String timeClass = removeSingleChildText(propertiesElem, "Class", id);
TimeMention timeMention = new TimeMention(jCas, begin, end);
timeMention.setTimeClass(timeClass);
timeMention.addToIndexes();
annotation = timeMention;
} else if (type.equals("DOCTIME")) {
TimeMention timeMention = new TimeMention(jCas, begin, end);
timeMention.setTimeClass(type);
timeMention.addToIndexes();
annotation = timeMention;
} else if (type.equals("SECTIONTIME")) {
TimeMention timeMention = new TimeMention(jCas, begin, end);
timeMention.setTimeClass(type);
timeMention.addToIndexes();
annotation = timeMention;
} else {
throw new UnsupportedOperationException("unsupported entity type: " + type);
}
// match the annotation to it's ID for later use
idToAnnotation.put(id, annotation);
// make sure all XML has been consumed
removeSingleChild(entityElem, "parentsType", id);
if (!propertiesElem.getChildren().isEmpty() || !entityElem.getChildren().isEmpty()) {
List<String> children = Lists.newArrayList();
for (Element child : propertiesElem.getChildren()) {
children.add(child.getName());
}
for (Element child : entityElem.getChildren()) {
children.add(child.getName());
}
error("unprocessed children " + children, id);
}
}
for (Element relationElem : annotationsElem.getChildren("relation")) {
String id = removeSingleChildText(relationElem, "id", null);
String type = removeSingleChildText(relationElem, "type", id);
Element propertiesElem = removeSingleChild(relationElem, "properties", id);
if (type.equals("TLINK")) {
String sourceID = removeSingleChildText(propertiesElem, "Source", id);
String targetID = removeSingleChildText(propertiesElem, "Target", id);
String tlinkType = removeSingleChildText(propertiesElem, "Type", id);
TemporalTextRelation relation = new TemporalTextRelation(jCas);
addRelation(jCas, relation, sourceID, targetID, tlinkType, idToAnnotation, id);
} else if (type.equals("ALINK")) {
String sourceID = removeSingleChildText(propertiesElem, "Source", id);
String targetID = removeSingleChildText(propertiesElem, "Target", id);
String alinkType = removeSingleChildText(propertiesElem, "Type", id);
AspectualTextRelation relation = new AspectualTextRelation(jCas);
addRelation(jCas, relation, sourceID, targetID, alinkType, idToAnnotation, id);
} else {
throw new UnsupportedOperationException("unsupported relation type: " + type);
}
// make sure all XML has been consumed
removeSingleChild(relationElem, "parentsType", id);
if (!propertiesElem.getChildren().isEmpty() || !relationElem.getChildren().isEmpty()) {
List<String> children = Lists.newArrayList();
for (Element child : propertiesElem.getChildren()) {
children.add(child.getName());
}
for (Element child : relationElem.getChildren()) {
children.add(child.getName());
}