numRedirects = redirects.intValue();
}
hostAddress = (String)(localContext.getAttribute(HOST_ADDRESS));
if (hostAddress == null) {
throw new UrlFetchException(url, "Host address not saved in context");
}
Header cth = response.getFirstHeader(HttpHeaderNames.CONTENT_TYPE);
if (cth != null) {
contentType = cth.getValue();
}
// Check if we should abort due to mime-type filtering. Note that this will fail if the server
// doesn't report a mime-type, but that's how we want it as this configuration is typically
// used when only a subset of parsers are installed/enabled, so we don't want the auto-detect
// code in Tika to get triggered & try to process an unsupported type. If you want unknown
// mime-types from the server to be processed, set "" as one of the valid mime-types in FetcherPolicy.
mimeType = HttpUtils.getMimeTypeFromContentType(contentType);
Set<String> mimeTypes = _fetcherPolicy.getValidMimeTypes();
if ((mimeTypes != null) && (mimeTypes.size() > 0)) {
if (!mimeTypes.contains(mimeType)) {
throw new AbortedFetchException(url, "Invalid mime-type: " + mimeType, AbortedFetchReason.INVALID_MIMETYPE);
}
}
needAbort = false;
} catch (ClientProtocolException e) {
// Oleg guarantees that no abort is needed in the case of an IOException (which is is a subclass of)
needAbort = false;
// If the root case was a "too many redirects" error, we want to map this to a specific
// exception that contains the final redirect.
if (e.getCause() instanceof MyRedirectException) {
MyRedirectException mre = (MyRedirectException)e.getCause();
String redirectUrl = url;
try {
redirectUrl = mre.getUri().toURL().toExternalForm();
} catch (MalformedURLException e2) {
LOGGER.warn("Invalid URI saved during redirect handling: " + mre.getUri());
}
throw new RedirectFetchException(url, redirectUrl, mre.getReason());
} else if (e.getCause() instanceof RedirectException) {
throw new RedirectFetchException(url, extractRedirectedUrl(url, localContext), RedirectExceptionReason.TOO_MANY_REDIRECTS);
} else {
throw new IOFetchException(url, e);
}
} catch (IOException e) {
// Oleg guarantees that no abort is needed in the case of an IOException
needAbort = false;
if (e instanceof ConnectionPoolTimeoutException) {
// Should never happen, so let's dump some info about the connection pool.
ThreadSafeClientConnManager cm = (ThreadSafeClientConnManager)_httpClient.getConnectionManager();
int numConnections = cm.getConnectionsInPool();
cm.closeIdleConnections(0, TimeUnit.MILLISECONDS);
LOGGER.error(String.format("Got ConnectionPoolTimeoutException: %d connections before, %d after idle close", numConnections, cm.getConnectionsInPool()));
}
throw new IOFetchException(url, e);
} catch (URISyntaxException e) {
throw new UrlFetchException(url, e.getMessage());
} catch (IllegalStateException e) {
throw new UrlFetchException(url, e.getMessage());
} catch (BaseFetchException e) {
throw e;
} catch (Exception e) {
// Map anything else to a generic IOFetchException
// TODO KKr - create generic fetch exception