private void processReport(List<Template> templates, File reportFile,File outputDir) throws Exception {
progress("processing report ("+(processCount+1)+") "+reportFile.getName()+" ... ");
// read in the report
String text = TextTools.getText(new FileInputStream(reportFile));
AnnotatedDocument doc = new AnnotatedDocument();
doc.setName(reportFile.getName());
doc.setText(text);
doc.getFilters().addAll(DocumentFilter.getDeIDFilters());
// combine terminologies into a single instance and add filters
CompositTerminology terminology = new CompositTerminology();
for(Template t: templates){
doc.getFilters().addAll(t.getFilters());
terminology.addTerminology(t.getTerminology());
}
// do a simple parsing of this document
long time = System.currentTimeMillis();
int offset = 0;
for(String line: getLines(doc.getFilteredDocument())){
// skip synoptic sections
if(doc.isSynopticSection(offset))
continue;
for(String sentence: getSentences(line)){
for(String phrase: getPhrases(sentence) ){
int offs = line.indexOf(phrase);
for(Concept c: terminology.search(phrase,IndexFinderTerminology.BEST_MATCH)){
for(Annotation a: c.getAnnotations()){
if(a.getOffset() < (offset+offs))
a.updateOffset(offset+offs);
doc.addAnnotation(a);
}
doc.addConcept(c);
}
}
}
offset += (line.length()+1);
}
doc.sort();
/* System.out.println("--------------");
for(Annotation a: doc.getAnnotations()){
System.out.println(a.getText()+" ... ("+a.getStartPosition()+","+a.getEndPosition()+ ") \t\t"+a.getConcept().getName()+" ... "+a.getConcept().getCode());
}
System.out.println("\n----------------( "+time+" )-------------------\n");*/
// get a list of processed concepts
Map<Template,List<ItemInstance>> resultMap = new LinkedHashMap<Template, List<ItemInstance>>();
// now lets do information extraction
for(Template template: templates){
if(template.isAppropriate(doc)){
List<ItemInstance> items = template.process(doc);
resultMap.put(template,items);
// re-add annotation
for(ItemInstance i: items){
for(Annotation a: i.getAnnotations()){
if(!doc.getAnnotations().contains(a))
doc.getAnnotations().add(a);
}
}
}
}
doc.sort();
long total = System.currentTimeMillis()-time;
processCount ++;
processTime += total;