// mime-types from the server to be processed, set "" as one of the valid mime-types in FetcherPolicy.
mimeType = HttpUtils.getMimeTypeFromContentType(contentType);
Set<String> mimeTypes = _fetcherPolicy.getValidMimeTypes();
if ((mimeTypes != null) && (mimeTypes.size() > 0)) {
if (!mimeTypes.contains(mimeType)) {
throw new AbortedFetchException(url, "Invalid mime-type: " + mimeType, AbortedFetchReason.INVALID_MIMETYPE);
}
}
needAbort = false;
} catch (ClientProtocolException e) {
// Oleg guarantees that no abort is needed in the case of an IOException (which is is a subclass of)
needAbort = false;
// If the root case was a "too many redirects" error, we want to map this to a specific
// exception that contains the final redirect.
if (e.getCause() instanceof MyRedirectException) {
MyRedirectException mre = (MyRedirectException)e.getCause();
String redirectUrl = url;
try {
redirectUrl = mre.getUri().toURL().toExternalForm();
} catch (MalformedURLException e2) {
LOGGER.warn("Invalid URI saved during redirect handling: " + mre.getUri());
}
throw new RedirectFetchException(url, redirectUrl, mre.getReason());
} else if (e.getCause() instanceof RedirectException) {
throw new RedirectFetchException(url, extractRedirectedUrl(url, localContext), RedirectExceptionReason.TOO_MANY_REDIRECTS);
} else {
throw new IOFetchException(url, e);
}
} catch (IOException e) {
// Oleg guarantees that no abort is needed in the case of an IOException
needAbort = false;
if (e instanceof ConnectionPoolTimeoutException) {
// Should never happen, so let's dump some info about the connection pool.
ThreadSafeClientConnManager cm = (ThreadSafeClientConnManager)_httpClient.getConnectionManager();
int numConnections = cm.getConnectionsInPool();
cm.closeIdleConnections(0, TimeUnit.MILLISECONDS);
LOGGER.error(String.format("Got ConnectionPoolTimeoutException: %d connections before, %d after idle close", numConnections, cm.getConnectionsInPool()));
}
throw new IOFetchException(url, e);
} catch (URISyntaxException e) {
throw new UrlFetchException(url, e.getMessage());
} catch (IllegalStateException e) {
throw new UrlFetchException(url, e.getMessage());
} catch (BaseFetchException e) {
throw e;
} catch (Exception e) {
// Map anything else to a generic IOFetchException
// TODO KKr - create generic fetch exception
throw new IOFetchException(url, new IOException(e));
} finally {
safeAbort(needAbort, request);
}
// Figure out how much data we want to try to fetch.
int maxContentSize = getMaxContentSize(mimeType);
int targetLength = maxContentSize;
boolean truncated = false;
String contentLengthStr = headerMap.getFirst(HttpHeaderNames.CONTENT_LENGTH);
if (contentLengthStr != null) {
try {
int contentLength = Integer.parseInt(contentLengthStr);
if (contentLength > targetLength) {
truncated = true;
} else {
targetLength = contentLength;
}
} catch (NumberFormatException e) {
// Ignore (and log) invalid content length values.
LOGGER.warn("Invalid content length in header: " + contentLengthStr);
}
}
// Now finally read in response body, up to targetLength bytes.
// Note that entity might be null, for zero length responses.
byte[] content = new byte[0];
long readRate = 0;
HttpEntity entity = response.getEntity();
needAbort = true;
if (entity != null) {
InputStream in = null;
try {
in = entity.getContent();
byte[] buffer = new byte[BUFFER_SIZE];
int bytesRead = 0;
int totalRead = 0;
ByteArrayOutputStream out = new ByteArrayOutputStream(DEFAULT_BYTEARRAY_SIZE);
int readRequests = 0;
int minResponseRate = _fetcherPolicy.getMinResponseRate();
// TODO KKr - we need to monitor the rate while reading a
// single block. Look at HttpClient
// metrics support for how to do this. Once we fix this, fix
// the test to read a smaller (< 20K)
// chuck of data.
while ((totalRead < targetLength)
&& ((bytesRead = in.read(buffer, 0, Math.min(buffer.length, targetLength - totalRead))) != -1)) {
readRequests += 1;
totalRead += bytesRead;
out.write(buffer, 0, bytesRead);
// Assume read time is at least one millisecond, to avoid DBZ exception.
long totalReadTime = Math.max(1, System.currentTimeMillis() - readStartTime);
readRate = (totalRead * 1000L) / totalReadTime;
// Don't bail on the first read cycle, as we can get a hiccup starting out.
// Also don't bail if we've read everything we need.
if ((readRequests > 1) && (totalRead < targetLength) && (readRate < minResponseRate)) {
throw new AbortedFetchException(url, "Slow response rate of " + readRate + " bytes/sec", AbortedFetchReason.SLOW_RESPONSE_RATE);
}
// Check to see if we got interrupted.
if (Thread.interrupted()) {
throw new AbortedFetchException(url, AbortedFetchReason.INTERRUPTED);
}
}
content = out.toByteArray();
needAbort = truncated || (in.available() > 0);
} catch (IOException e) {
// We don't need to abort if there's an IOException
throw new IOFetchException(url, e);
} finally {
safeAbort(needAbort, request);
safeClose(in);
}
}
// Toss truncated image content.
if ( (truncated)
&& (!isTextMimeType(mimeType))) {
throw new AbortedFetchException(url, "Truncated image", AbortedFetchReason.CONTENT_SIZE);
}
// Now see if we need to uncompress the content.
String contentEncoding = headerMap.getFirst(HttpHeaderNames.CONTENT_ENCODING);
if (contentEncoding != null) {
if (LOGGER.isTraceEnabled()) {
fetchTrace.append("; Content-Encoding: " + contentEncoding);
}
// TODO KKr We might want to just decompress a truncated gzip
// containing text (since we have a max content size to save us
// from any gzip corruption). We might want to break the following
// out into a separate method, by the way (if not refactor this
// entire monolithic method).
//
try {
if ("gzip".equals(contentEncoding) || "x-gzip".equals(contentEncoding)) {
if (truncated) {
throw new AbortedFetchException(url, "Truncated compressed data", AbortedFetchReason.CONTENT_SIZE);
} else {
ExpandedResult expandedResult = EncodingUtils.processGzipEncoded(content, maxContentSize);
truncated = expandedResult.isTruncated();
if ( (truncated)
&& (!isTextMimeType(mimeType))) {
throw new AbortedFetchException(url, "Truncated decompressed image", AbortedFetchReason.CONTENT_SIZE);
} else {
content = expandedResult.getExpanded();
if (LOGGER.isTraceEnabled()) {
fetchTrace.append("; unzipped to " + content.length + " bytes");
}