ReutersCorpusDocument doc;
try {
doc = ReutersCorpusDocument.fromFile(aFile.toString(),substitutions);
if(doc.hasCodedCountries()){
ExtractedEntities entities = ParseManager.extractAndResolve(doc.getCompiledText());
logger.info("Checking file "+aFile);
articlesWithLocations++;
List<GeoName> countriesTheyCoded = new ArrayList<GeoName>();
for(CountryCode countryCode:doc.getCountryCodeObjects()){
countriesTheyCoded.add( CountryGeoNameLookup.lookup(countryCode.name()) );
}
logger.info(doc.getId()+": "+countriesTheyCoded);
List<GeoName> ourMentionedCountries = entities.getUniqueCountryGeoNames();
// check to make sure we found all the countries they coded
if(ourMentionedCountries.size()>0){
boolean allMatched = true;
for(GeoName countryTheyCoded:countriesTheyCoded){
if(!ourMentionedCountries.contains(countryTheyCoded)){
allMatched = false;
}
}
if(allMatched){
mentionsArticlesWeGotRight++;
} else {
logger.warn(doc.getId()+": mentions "+ourMentionedCountries+" they coded "+countriesTheyCoded);
}
}
//also have a measure for making sure the main "about" country is included in their list of countries
FocusStrategy focus = ParseManager.getFocusStrategy();
List<FocusLocation> ourAboutnessCountries = focus.selectCountries(entities.getResolvedLocations());
List<GeoName> ourAboutnessGeoNames = new ArrayList<GeoName>();
for(FocusLocation aboutLocation: ourAboutnessCountries){
ourAboutnessGeoNames.add(aboutLocation.getGeoName());
}
if(ourAboutnessGeoNames.size()>0){