record.addLabelValue("format","WARC File Format 1.0");
record.addLabelValue("conformsTo","http://bibnum.bnf.fr/WARC/WARC_ISO_28500_version1_latestdraft.pdf");
// Get other values from metadata provider
CrawlMetadata provider = getMetadataProvider();
addIfNotBlank(record,"operator", provider.getOperator());
addIfNotBlank(record,"publisher", provider.getOrganization());
addIfNotBlank(record,"audience", provider.getAudience());
addIfNotBlank(record,"isPartOf", provider.getJobName());
// TODO: make date match 'job creation date' as in Heritrix 1.x
// until then, leave out (plenty of dates already in WARC
// records
// String rawDate = provider.getBeginDate();
// if(StringUtils.isNotBlank(rawDate)) {
// Date date;
// try {
// date = ArchiveUtils.parse14DigitDate(rawDate);
// addIfNotBlank(record,"created",ArchiveUtils.getLog14Date(date));
// } catch (ParseException e) {
// logger.log(Level.WARNING,"obtaining warc created date",e);
// }
// }
addIfNotBlank(record,"description", provider.getDescription());
addIfNotBlank(record,"robots", provider.getRobotsPolicyName().toLowerCase());
addIfNotBlank(record,"http-header-user-agent",
provider.getUserAgent());
addIfNotBlank(record,"http-header-from",
provider.getOperatorFrom());
// really ugly to return as List<String>, but changing would require
// larger refactoring
return Collections.singletonList(record.toString());
}