LineTokenizer t = new LineTokenizer(line);
//[0] geonames id
String id = t.next();
Integer geoNamesId = Integer.parseInt(id);
//create a new Doc based on the first Element (geonamesID)
Representation doc = valueFactory.createRepresentation(
new StringBuilder(GEONAMES_RESOURCE_NS).append(id).toString());
//add the Integer id so that we do not need to parse it from the subject URI
doc.add(GeonamesPropertyEnum.idx_id.toString(), geoNamesId);
//add the geonames:Feature type
doc.add(GeonamesPropertyEnum.rdf_type.toString(), getReference(GeonamesPropertyEnum.gn_Feature.toString()));
//[1] UTF-8 name
String utf8Label = t.next();
//[2] ASKII Name as rdfs:label
String askiiLabel = t.next();
if(utf8Label == null){
utf8Label = askiiLabel; //use ASKII label as fallback for the utf8 version
}
doc.addNaturalText(GeonamesPropertyEnum.gn_name.toString(),utf8Label);
//[3] Alternate Names
t.next(); //alternate names are added later during processing
//addAlternateNames(geoNamesId, doc);
//[4] lat
doc.add(GeonamesPropertyEnum.geo_lat.toString(),new BigDecimal(t.next()));
//[5] lon
doc.add(GeonamesPropertyEnum.geo_long.toString(),new BigDecimal(t.next()));
//[6] featureClass
String featureClass = new StringBuilder(GEONAMES_ONTOLOGY_NS).append(t.next()).toString();
doc.add(GeonamesPropertyEnum.gn_featureClass.toString(),getReference(featureClass));
//[7] featureCode (-> need to use <featureClass>.<featureCode>!!)
doc.add(GeonamesPropertyEnum.gn_featureCode.toString(),getReference(
new StringBuilder(featureClass).append(t.next()).toString()));
//countryCode
// -> geonames uses here the link to an HTML Page showing the Country
// We would like to use an Link to a SKOS:Concept representing the Country
// ... But luckily here we need only to add the URI!
Set<String> ccs = new HashSet<String>();
//[8] countryCode
String countryCode = t.next();
if(countryCode != null){
countryCode = countryCode.trim(); //need to trim because some country codes use ' ' to indicate null!
if(countryCode.length() == 2){ //Yes there are some features that are in no country!
ccs.add(countryCode);
}
}
//[9] alternate countryCodes
String altCc = t.next();
if(altCc != null){
StringTokenizer altCcT = new StringTokenizer(altCc,",");
while(altCcT.hasMoreElements()){
countryCode = altCcT.nextToken();
if(countryCode.length() ==2){
ccs.add(countryCode);
}
}
}
if(!ccs.isEmpty()){
doc.add(GeonamesPropertyEnum.gn_countryCode.toString(),ccs);
}
//[10 TO 13] Admin codes
//first read them -> we need to consume the tokens anyway
String[] adminCodes = new String[] {
countryCode, //country
t.next(), //ADM1
t.next(), //ADM2
t.next(), //ADM3
t.next()};//ADM4
//Workaround for Admin1 -> add leading '0' for single Value
if(adminCodes[1] != null && adminCodes[1].length() < 2){
adminCodes[1] = '0'+adminCodes[1];
}
//now process the admin Codes (including the country at index 0)
for(int i=0;i<adminCodes.length;i++){
if(adminCodes[i] != null && !adminCodes[i].equals("00")){ //00 is used to indicate not known
StringBuilder parentCode = new StringBuilder();
for(int j=0;j<i;j++){
parentCode.append(adminCodes[j]); //add all the previous
parentCode.append('.'); //add the seperator char
}
parentCode.append(adminCodes[i]);//add the current (last) Element
String property = i==0 ? GeonamesPropertyEnum.idx_CC.toString() :
new StringBuilder(GeonamesPropertyEnum.idx_ADM.toString()).append(i).toString();
doc.add(property, parentCode.toString());
}
}
//[14] population
String populationString = t.next();
if(populationString != null){
//NOTE: we need to used Long, because of Asia (3.800.000)
Long population = new Long(populationString);
if(population.intValue() > 0){
doc.add(GeonamesPropertyEnum.gn_population.toString(),population);
}
}
//[15 TO 16] elevation and gtopo30
String altString = t.next();
if(altString == null){
altString = t.next(); //if no elevation than use the gtopo30
} else {
t.next(); //if there is already en elevation, than consume these entry
}
Integer alt = Integer.valueOf(altString);
if(alt.intValue() > -9999){ //it looks like that -9999 is sometimes used as not known!
doc.add(GeonamesPropertyEnum.geo_alt.toString(),alt);
}
//[17] time zone
t.next(); //not used
//[18] mod-date
String modDateString = t.next();
if(modDateString != null){
try {
doc.add(GeonamesPropertyEnum.dc_date.toString(),TimeUtils.toDate(DataTypeEnum.DateTime, modDateString));
}catch (IllegalArgumentException e) {
log.warn(String.format("Unable to parse modificationDate for geonamesID %s from value %s",doc.getId(),modDateString));
}
}
//no creator as this is anyway provided by attribution
//doc.add(GeonamesPropertyEnum.dc_creator.toString(),"http://www.geonames.org/");
return doc;