ByteBuffer buff = new ByteBuffer();
// the connection to the HTTP server
// HttpConnection httpConn = null;
InputStream is = null;
BufferedWriter bwrite = null;
// get host
host = u.getHost();
if (host == null) {
throw new HttpException("no host part in URL found");
}
// get address, if not using a proxy
// if the client runs behind a proxy it is possible, that name
// resolution for the internet is not possible
if(! useProxy()) {
try {
addr = InetAddress.getByName(host);
} catch (UnknownHostException e) {
addr = null;
}
if (addr == null) {
throw new HttpException("host part (" + host + ") does not resolve");
}
}
// get path
path = u.getFile();
if (path.equals("")) {
path = "/";
}
// replace spaces
path=path.replaceAll(" ","%20");
// get protocol and port
port = u.getPort();
protocol = u.getProtocol().toLowerCase();
if (protocol.equals("http")) {
if (port == -1) {
port = DEFAULT_HTTPPORT;
}
} else if (protocol.equals("https")) {
if (port == -1) {
port = DEFAULT_HTTPSPORT;
}
secureConnection=true;
} else {
throw new HttpException("protocol " + protocol + " not supported");
}
// if using the proxy, request path is the whole URL, otherwise only
// the path part of the URL
if (useProxy() && (! secureConnection)) {
requestPath="http://"+host+path;
} else {
requestPath=path;
}
// get user info
userinfo = u.getUserInfo();
if (userinfo != null) {
if (userinfo.equals("")) {
userinfo=null;
} else {
// Store user info for this host
userInfos.setProperty(host,userinfo);
}
} else {
// do we hae a stored user info?
userinfo=userInfos.getProperty(host);
}
if (callback != null) {
callback.setHttpToolDocUrl(u.toString());
callback.setHttpToolStatus(STATUS_CONNECTING);
}
// okay, we got all needed information, try to connect to the host
try {
if (httpConn == null) {
// connect and initialize streams
// timeout is stored in seconds in HttpTool, but
// HttpConnection uses milliseconds
if (secureConnection) {
HttpsHelper helper = new HttpsHelper(proxyAddr,proxyPort,useProxy());
httpConn = helper.createHttpsConnection(host,port);
} else {
if (useProxy()) {
httpConn = HttpConnection.createConnection(proxyAddr,
proxyPort,
socketTimeout*1000);
} else {
httpConn = HttpConnection.createConnection(addr,
port,
socketTimeout*1000);
}
}
}
is = new LimitedBandwidthStream(
new BufferedInputStream(httpConn.getInputStream(), 256),
bandwidth);
bwrite = new BufferedWriter(
new OutputStreamWriter(httpConn.getOutputStream()));
if (callback != null) {
callback.setHttpToolStatus(STATUS_CONNECTED);
}
// write HTTP request
// get or post ?
if (method == HttpConstants.GET) {
bwrite.write("GET ");
bwrite.write(requestPath);
if ((parameters != null)
&& (! parameters.equals(""))) {
bwrite.write("?");
bwrite.write(parameters);
}
} else if (method == HttpConstants.POST) {
bwrite.write("POST " + requestPath);
} else {
throw new HttpException("HTTP method " + method + " not supported");
}
// last part of request line
bwrite.write(" ");
bwrite.write(HTTP_VERSION);
bwrite.write("\r\n");
// Referer header only if defined
if (referer != null) {
bwrite.write("Referer: " + referer + "\r\n");
}
// if cookies are enabled, write a Cookie: header
if (cookiesEnabled) {
String cookieString = cookieManager.cookiesForURL(u);
if (cookieString != null) {
bwrite.write("Cookie: ");
bwrite.write(cookieString);
bwrite.write("\r\n");
log.debug("Cookie request header: "+cookieString);
}
}
// Write other headers
bwrite.write("Host: " + host + "\r\n");
bwrite.write("User-Agent: " + agentName + "\r\n");
bwrite.write("Accept: */*\r\n");
if (ntlmAuthorizationInfo == null) {
bwrite.write("Connection: close\r\n");
} else {
bwrite.write("Connection: keep-alive\r\n");
}
// Write "From:" header only if a fromAddress is defined
if (fromAddress != null) {
bwrite.write("From: "+fromAddress+"\r\n");
}
// if we have username and password, lets write an Authorization
// header
if (userinfo != null) {
// special hack to support usernames with "@"
// TO DO: find a better solution for this problem
userinfo = userinfo.replace('%','@');
bwrite.write("Authorization: Basic ");
bwrite.write(Base64.encode(userinfo));
bwrite.write("\r\n");
log.debug(userinfo);
}
if (ntlmAuthorizationInfo != null) {
bwrite.write("Authorization: NTLM ");
bwrite.write(ntlmAuthorizationInfo);
bwrite.write("\r\n");
}
// if there is a "If-Modified-Since" date, also write this header
if (modifyDate != null) {
String dateStr = df.format(modifyDate);
bwrite.write("If-Modified-Since: ");
bwrite.write(dateStr);
bwrite.write("\r\n");
log.debug("If-Modified-Since header: "+dateStr);
}
// for a POST request we also need a content-length header
if (method == HttpConstants.POST) {
bwrite.write("Content-Type: application/x-www-form-urlencoded\r\n");
bwrite.write("Content-Length: "+parameters.length()+"\r\n");
}
// finished headers
bwrite.write("\r\n");
// if this is a POST request, we have to add the POST parameters
if (method == HttpConstants.POST) {
bwrite.write(parameters);
}
bwrite.flush();
if (callback != null) {
callback.setHttpToolStatus(STATUS_RETRIEVING);
}
// read the first line (HTTP return code)
while ((i = is.read()) != 10) {
if (i == -1) {
throw new HttpException("Could not get HTTP return code "+
"(buffer content is "+buff.toString()+")");
}
buff.append((byte)i);
}
String httpCode = lineString(buff.getContent());
buff.clean();
doc.setHttpCode(httpCode);
// read the HTTP headers
boolean finishedHeaders = false;
while (!finishedHeaders) {
i = is.read();
if (i == -1) {
throw new HttpException("Could not read HTTP headers");
}
if (i >= 32) {
buff.append((byte)i);
}
// HTTP header processing
if (i == LF) {
String line = lineString(buff.getContent());
buff.clean();
// empty line means "end of headers"
if (line.trim().equals("")) {
finishedHeaders = true;
} else {
HttpHeader head = new HttpHeader(line);
doc.addHeader(head);
if (cookiesEnabled
&& head.isSetCookie()) {
try {
Cookie cookie = new Cookie(head.toLine(),u);
cookieManager.add(cookie);
log.debug("Got a cookie "+cookie);
} catch (CookieException e) {
log.info("Could not interpret cookie: "+e.getMessage());
}
}
// Content chunked ?
if (head.getName().equalsIgnoreCase("Transfer-Encoding")
&& head.getValue().equalsIgnoreCase("chunked")) {
chunkedEncoding = true;
}
}
}
}
buff.clean();
// if there is a DownloadRule, ask if we should download
// the data
if (downloadRules != null) {
// if it is not allowed to download this URL, close socket
// and return a null document
boolean isNotModified = false;
if (modifyDate != null) {
HttpHeader lastModifiedHeader = doc.getHttpHeader("Last-Modified");
if (lastModifiedHeader != null) {
try {
Date lastModifiedDate = df.parse(lastModifiedHeader.getValue());
if (lastModifiedDate.compareTo(modifyDate) <= 0) {
isNotModified = true;
}
} catch (ParseException e) {}
}
}
if (! downloadRules.downloadAllowed(doc.getHttpHeader()) || isNotModified) {
if (doc.isNotModified()) {
log.info("If-Not-Modified successfull for: " + u);
} else if (isNotModified) {
log.info("Header indicates not modified for: " + u);
} else {
log.info("Download not allowed by download rule.");
}
// Close connection
httpConn.close(); httpConn = null;
if (callback != null) {
callback.setHttpToolStatus(STATUS_DENIEDBYRULE);
}
return null;
}
}
// if we got encoding "chunked", use the ChunkedInputStream
if (chunkedEncoding) {
chunkStream = new ChunkedInputStream(is);
}
// did we got an Content-Length header ?
HttpHeader contentLength = doc.getHeader(HttpHeader.CONTENT_LENGTH);
if (contentLength != null) {
try {
docSize = Integer.parseInt(contentLength.getValue());
} catch (NumberFormatException e) {
log.error("Got a malformed Content-Length header from the server");
docSize = -1;
}
// send information to callback
if (callback != null) {
callback.setHttpToolDocSize(docSize);
}
// initialize the byte buffer with the given document size
// there is no need to increase the buffer size dynamically
if (docSize > 0) {
buff.setSize(docSize);
}
}
// read data
boolean finished = false;
int count=0;
while (! finished) {
if (chunkedEncoding) {
i = chunkStream.read();
} else {
i = is.read();
}
if (i == -1) {
// this should only happen on HTTP/1.0 responses
// without a Content-Length header