}
CaptureSearchResults captureResults =
(CaptureSearchResults) results;
CaptureSearchResult closest = null;
closest =
getReplay().getClosest(wbRequest, captureResults);
//CaptureSearchResult originalClosest = closest;
int counter = 0;
//TODO: parameterize
//int maxTimeouts = 2;
//int maxMissingRevisits = 2;
Set<String> skipFiles = null;
//boolean isRevisit = false;
while (true) {
// Support for redirect from the CDX redirectUrl field
// This was the intended use of the redirect field, but has not actually be tested
// To enable this functionality, uncomment the lines below
// This is an optimization that allows for redirects to be handled without loading the original content
//
//String redir = closest.getRedirectUrl();
//if ((redir != null) && !redir.equals("-")) {
// String fullRedirect = getUriConverter().makeReplayURI(closest.getCaptureTimestamp(), redir);
// throw new BetterRequestException(fullRedirect, Integer.valueOf(closest.getHttpCode()));
//}
Resource httpHeadersResource = null;
Resource payloadResource = null;
boolean isRevisit = false;
try {
counter++;
if (closest == null) {
throw new ResourceNotAvailableException("Self-Redirect: No Closest Match Found", 404);
}
closest.setClosest(true);
checkAnchorWindow(wbRequest,closest);
// Attempt to resolve any not-found embedded content with next-best
// For "best last" capture, skip not-founds and redirects, hoping to find the best 200 response.
if ((wbRequest.isAnyEmbeddedContext() && closest.isHttpError()) ||
(wbRequest.isBestLatestReplayRequest() && !closest.isHttpSuccess())) {
CaptureSearchResult nextClosest = closest;
while ((nextClosest = findNextClosest(nextClosest, captureResults, requestMS)) != null) {
// If redirect, save but keep looking -- if no better match, will use the redirect
if (nextClosest.isHttpRedirect()) {
closest = nextClosest;
// If success, pick that one!
} else if (nextClosest.isHttpSuccess()) {
closest = nextClosest;
break;
}
}
}
// Redirect to url for the actual closest capture, if not a retry
if (counter == 1) {
handleReplayRedirect(wbRequest, httpResponse, captureResults, closest);
}
// If revisit, may load two resources separately
if (closest.isDuplicateDigest()) {
isRevisit = true;
// If the payload record is known and it failed before with this payload, don't try
// loading the header resource even.. outcome will likely be same
if ((closest.getDuplicatePayloadFile() != null) &&
(skipFiles != null) && skipFiles.contains(closest.getDuplicatePayloadFile())) {
counter--; //don't really count this as we're not even checking the file anymore
throw new ResourceNotAvailableException("Revisit: Skipping already failed " + closest.getDuplicatePayloadFile());
} else if ((closest.getDuplicatePayloadFile() == null) && wbRequest.isTimestampSearchKey()) {
// If a missing revisit and loaded optimized, try loading the entire timeline again
wbRequest.setTimestampSearchKey(false);
results = queryIndex(wbRequest);
captureResults = (CaptureSearchResults)results;
closest = getReplay().getClosest(wbRequest, captureResults);
//originalClosest = closest;
//maxTimeouts *= 2;
//maxMissingRevisits *= 2;
continue;
}
// If old-style arc revisit (no mimetype, filename is '-'), then don't load
// headersResource = payloadResource
if (EMPTY_VALUE.equals(closest.getFile())) {
closest.setFile(closest.getDuplicatePayloadFile());
closest.setOffset(closest.getDuplicatePayloadOffset());
// See that this is successful
httpHeadersResource = getResource(closest, skipFiles);
// Hmm, since this is a revisit it should not redirect -- was: if both headers and payload are from a different timestamp, redirect to that timestamp
// if (!closest.getCaptureTimestamp().equals(closest.getDuplicateDigestStoredTimestamp())) {
// throwRedirect(wbRequest, httpResponse, captureResults, closest.getDuplicateDigestStoredTimestamp(), closest.getOriginalUrl(), closest.getHttpCode());
// }
payloadResource = httpHeadersResource;
} else {
httpHeadersResource = getResource(closest, skipFiles);
CaptureSearchResult payloadLocation = retrievePayloadForIdenticalContentRevisit(wbRequest, httpHeadersResource, closest);
if (payloadLocation == null) {
throw new ResourceNotAvailableException("Revisit: Missing original for revisit record " + closest.toString(), 404);
}
payloadResource = getResource(payloadLocation, skipFiles);
// If zero length old-style revisit with no headers, then must use payloadResource as headersResource
if (httpHeadersResource.getRecordLength() <= 0) {
httpHeadersResource.close();
httpHeadersResource = payloadResource;
}
}
} else {
httpHeadersResource = getResource(closest, skipFiles);
payloadResource = httpHeadersResource;
}
// Ensure that we are not self-redirecting!
// If the status is a redirect, check that the location or url date's are different from the current request
// Otherwise, replay the previous matched capture.
// This chain is unlikely to go past one previous capture, but is possible
if (isSelfRedirect(httpHeadersResource, closest, wbRequest, requestURL)) {
LOGGER.info("Self-Redirect: Skipping " + closest.getCaptureTimestamp() + "/" + closest.getOriginalUrl());
closest = findNextClosest(closest, captureResults, requestMS);
continue;
}
if (counter > 1) {
handleReplayRedirect(wbRequest, httpResponse, captureResults, closest);
}
p.retrieved();
ReplayRenderer renderer =
getReplay().getRenderer(wbRequest, closest, httpHeadersResource, payloadResource);
if (this.isEnableWarcFileHeader() && (warcFileHeader != null)) {
if (isRevisit && (closest.getDuplicatePayloadFile() != null)) {
httpResponse.addHeader(warcFileHeader, closest.getDuplicatePayloadFile());
} else {
httpResponse.addHeader(warcFileHeader, closest.getFile());
}
}
// Memento URL-M response
if (this.isEnableMemento()) {
MementoUtils.addMementoHeaders(httpResponse, captureResults, closest, wbRequest);
}
renderer.renderResource(httpRequest, httpResponse, wbRequest,
closest, httpHeadersResource, payloadResource, getUriConverter(), captureResults);
p.rendered();
p.write(wbRequest.getReplayTimestamp() + " " +
wbRequest.getRequestUrl());
break;
} catch (SpecificCaptureReplayException scre) {
//final String SOCKET_TIMEOUT_MSG = "java.net.SocketTimeoutException: Read timed out";
CaptureSearchResult nextClosest = null;
// if exceed maxRedirectAttempts, stop
if ((counter > maxRedirectAttempts) && ((this.getLiveWebPrefix() == null) || !isWaybackReferer(wbRequest, this.getLiveWebPrefix()))) {
LOGGER.info("LOADFAIL: Timeout: Too many retries, limited to " + maxRedirectAttempts);
} else if ((closest != null) && !wbRequest.isIdentityContext()) {
nextClosest = findNextClosest(closest, captureResults, requestMS);
}
// Skip any nextClosest that has the same exact filename?
// Removing in case skip something that works..
// while ((nextClosest != null) && closest.getFile().equals(nextClosest.getFile())) {
// nextClosest = findNextClosest(nextClosest, captureResults, requestMS);
//}
String msg = null;
if (closest != null) {
msg = scre.getMessage() + " /" + closest.getCaptureTimestamp() + "/" + closest.getOriginalUrl();
} else {
msg = scre.getMessage() + " /" + wbRequest.getReplayTimestamp() + "/" + wbRequest.getRequestUrl();
}
if (nextClosest != null) {
// Store failed filename for revisits, as they may be repeated
if (isRevisit) {
if (scre.getDetails() != null) {
if (skipFiles == null) {
skipFiles = new HashSet<String>();
}
// Details should contain the failed filename from the ResourceStore
skipFiles.add(scre.getDetails());
}
}
if (msg.startsWith("Self-Redirect")) {
LOGGER.info("(" + counter + ")LOADFAIL-> " + msg + " -> " + nextClosest.getCaptureTimestamp());
} else {
LOGGER.warning("(" + counter + ")LOADFAIL-> " + msg + " -> " + nextClosest.getCaptureTimestamp());
}
closest = nextClosest;
} else if (wbRequest.isTimestampSearchKey()) {
wbRequest.setTimestampSearchKey(false);