*
* file, offset, timestamp, digest, urlKey, originalUrl
*/
private CaptureSearchResult genericResult(WARCRecord rec) {
CaptureSearchResult result = new CaptureSearchResult();
result.setMimeType(DEFAULT_VALUE);
result.setHttpCode(DEFAULT_VALUE);
result.setRedirectUrl(DEFAULT_VALUE);
ArchiveRecordHeader header = rec.getHeader();
String file = transformWARCFilename(header.getReaderIdentifier());
long offset = header.getOffset();
result.setCaptureTimestamp(transformWARCDate(header.getDate()));
result.setFile(file);
result.setOffset(offset);
result.setDigest(transformWARCDigest(header.getHeaderValue(
WARCRecord.HEADER_KEY_PAYLOAD_DIGEST)));
String origUrl = header.getUrl();
if(origUrl == null) {
String type = header.getHeaderValue(WARCConstants.HEADER_KEY_TYPE).toString();
if(type.equals(WARCConstants.WARCRecordType.warcinfo)) {
String filename = header.getHeaderValue(
WARCConstants.HEADER_KEY_FILENAME).toString();
result.setOriginalUrl("filedesc:"+filename);
result.setUrlKey("filedesc:"+filename);
} else {
result.setOriginalUrl(DEFAULT_VALUE);
result.setUrlKey(DEFAULT_VALUE);
}
} else {
result.setOriginalUrl(origUrl);
try {
String urlKey = canonicalizer.urlStringToKey(origUrl);
result.setUrlKey(urlKey);
} catch (URIException e) {
String shortUrl =
(origUrl.length() < 100)
? origUrl
:origUrl.substring(0,100);
LOGGER.warning("FAILED canonicalize(" + shortUrl + "):" +
file + " " + offset);
result.setUrlKey(origUrl);
}
}
return result;
}