SpotlightConfiguration configuration = new SpotlightConfiguration("conf/dev.properties");
LingPipeFactory lingPipeFactory = new LingPipeFactory(new File(configuration.getTaggerFile()), new IndoEuropeanSentenceModel());
LOG.info("Reading gold standard.");
AnnotatedDataset evaluationCorpus =
new AnnotatedDataset(new File("/home/pablo/eval/csaw/original"),
AnnotatedDataset.Format.CSAW, lingPipeFactory);
LOG.info(String.format("Read %s annotations.",evaluationCorpus.getInstances().size()));
/**
* Base:
*/
SelectorResult baseResult = getDatasetBaseResult(evaluationCorpus);
LOG.info(baseResult);
LOG.info("Reformatting.");
Map<SurfaceFormOccurrence, AnnotatedSurfaceFormOccurrence> goldSurfaceFormOccurrences = new HashMap<SurfaceFormOccurrence, AnnotatedSurfaceFormOccurrence>();
for(AnnotatedSurfaceFormOccurrence annotatedSurfaceFormOccurrence : evaluationCorpus.getInstances()) {
SurfaceFormOccurrence sfo = annotatedSurfaceFormOccurrence.toSurfaceFormOccurrence();
goldSurfaceFormOccurrences.put(sfo, annotatedSurfaceFormOccurrence);
//goldSurfaceFormOccurrences.put(getNameVariation(sfo), annotatedSurfaceFormOccurrence);
}
List<Text> documents = evaluationCorpus.getTexts();
evaluate(documents, goldSurfaceFormOccurrences, baseResult, lingPipeFactory, configuration);
LOG.info("Done.");