}
}
@Override
public int writeLine(CDXLine line) {
FastCaptureSearchResult result = new FastCaptureSearchResult();
String timestamp = line.getTimestamp();
String originalUrl = line.getOriginalUrl();
if ((prevResult != null) && (preferContains != null) &&
prevResult.getCaptureTimestamp().equals(timestamp) &&
prevResult.getOriginalUrl().equals(originalUrl) &&
prevLine.getLength().equals(line.getLength()) &&
prevLine.getOffset().equals(line.getOffset())) {
String currFile = line.getFilename();
String prevFile = prevLine.getFilename();
if (currFile.contains(preferContains) && !prevFile.contains(preferContains)) {
prevResult.setFile(currFile);
}
return 0;
}
result.setUrlKey(line.getUrlKey());
result.setCaptureTimestamp(timestamp);
result.setOriginalUrl(originalUrl);
// Special case: filter out captures that have userinfo
boolean hasUserInfo = (UrlOperations.urlToUserInfo(result.getOriginalUrl()) != null);
if (hasUserInfo) {
return 0;
}
result.setRedirectUrl(line.getRedirect());
result.setHttpCode(line.getStatusCode());
if (selfRedirFilter != null && !result.getRedirectUrl().equals(CDXLine.EMPTY_VALUE)) {
if (selfRedirFilter.filterObject(result) != ObjectFilter.FILTER_INCLUDE) {
return 0;
}
}
if (exclusionFilter != null) {
if (exclusionFilter.filterObject(result) != ObjectFilter.FILTER_INCLUDE) {
return 0;
}
}
result.setMimeType(line.getMimeType());
result.setDigest(line.getDigest());
result.setOffset(NumberUtils.toLong(line.getOffset(), -1));
result.setCompressedLength(NumberUtils.toLong(line.getLength(), -1));
result.setFile(line.getFilename());
result.setRobotFlags(line.getRobotFlags());
boolean isRevisit = false;
if (resolveRevisits) {
isRevisit = result.getFile().equals(CDXLine.EMPTY_VALUE) ||
result.getMimeType().equals(REVISIT_VALUE);
String digest = result.getDigest();
if (isRevisit) {
if (!isReverse) {
CaptureSearchResult payload = digestToOriginal.get(digest);
if (payload != null) {
result.flagDuplicateDigest(payload);
} else {
result.flagDuplicateDigest();
}
} else {
LinkedList<CaptureSearchResult> revisits = digestToRevisits.get(digest);
if (revisits == null) {
revisits = new LinkedList<CaptureSearchResult>();
digestToRevisits.put(digest, revisits);
}
revisits.add(result);
}
} else {
if (!isReverse) {
digestToOriginal.put(digest, result);
} else {
LinkedList<CaptureSearchResult> revisits = digestToRevisits.remove(digest);
if (revisits != null) {
for (CaptureSearchResult revisit : revisits) {
revisit.flagDuplicateDigest(result);
}
}
}
}
}
// String payloadFile = line.getField(RevisitResolver.origfilename);
//
// if (!payloadFile.equals(CDXLine.EMPTY_VALUE)) {
// FastCaptureSearchResult payload = new FastCaptureSearchResult();
// payload.setFile(payloadFile);
// payload.setOffset(NumberUtils.toLong(line.getField(RevisitResolver.origoffset), -1));
// payload.setCompressedLength(NumberUtils.toLong(line.getField(RevisitResolver.origlength), -1));
// result.flagDuplicateDigest(payload);
// }
if ((targetTimestamp != null) && (closest == null)) {
closest = determineClosest(result);
}
results.addSearchResult(result, !isReverse);
prevResult = result;
prevLine = line;
// Short circuit the load if seeking single capture
if (seekSingleCapture && resolveRevisits) {
if (closest != null) {
// If not a revisit, we're done
if (!isRevisit) {
done = true;
// Else make sure the revisit is resolved
} else if (result.getDuplicatePayload() != null) {
done = true;
}
}
}