/***************************************************************************
* Copyright 2001-2007 The VietSpider All rights reserved. *
**************************************************************************/
package org.vietspider.ui.htmlexplorer;
import java.io.File;
import java.net.URL;
import java.util.Hashtable;
import org.apache.http.HttpHost;
import org.apache.http.HttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.vietspider.chars.URLEncoder;
import org.vietspider.chars.refs.RefsDecoder;
import org.vietspider.html.HTMLDocument;
import org.vietspider.html.HTMLNode;
import org.vietspider.html.path2.NodePath;
import org.vietspider.html.path2.NodePathParser;
import org.vietspider.html.util.HTMLAnchorUtil;
import org.vietspider.html.util.HTMLNodeUtil;
import org.vietspider.html.util.HTMLParserDetector;
import org.vietspider.html.util.URLCodeGenerator;
import org.vietspider.net.client.HttpHandlers;
import org.vietspider.net.client.HttpResponseReader;
import org.vietspider.net.client.WebClient;
import org.vietspider.ui.text.TextHandler;
/**
* Author : Nhu Dinh Thuan
* nhudinhthuan@yahoo.com
* Oct 23, 2007
*/
public class FastWebClient extends WebClient {
private Hashtable<String, HttpGet> currentGets = new Hashtable<String, HttpGet>();
public void abort(String address) {
if(address == null || address.trim().length() == 0) return;
if(!currentGets.containsKey(address)) return;
HttpGet httpGet = currentGets.remove(address);
if(httpGet != null) httpGet.abort();
}
public byte[] loadContent(String referer, String address) throws Exception {
URLEncoder urlEncoder = new URLEncoder();
if(host == null) {
setURL(referer, new URL(address));
} else {
URL url = new URL(address);
URLCodeGenerator urlCodeUtil = new URLCodeGenerator();
if(!urlCodeUtil.compareHost(url.getHost(), getHost())) {
setURL(referer, new URL(address));
}
}
HttpGet httpGet = null;
try {
address = urlEncoder.encode(address);
httpGet = createGetMethod(address, referer);
currentGets.put(address, httpGet);
if(httpGet == null) return null;
HttpHost httpHost = createHttpHost(address);
HttpResponse httpResponse = execute(httpHost, httpGet);
currentGets.remove(address);
// StatusLine statusLine = httpResponse.getStatusLine();
// int statusCode = statusLine.getStatusCode();
// System.out.println(" status code la "+ statusCode);
HttpResponseReader httpResponseReader = HttpHandlers.getInstance().createReader();
return httpResponseReader.readBody(httpResponse);
} catch(Exception exp) {
throw exp;
}
}
public HTMLDocument createDocument(String refer,
String address, boolean cache, HTMLParserDetector detector) throws Exception {
if( address == null || address.trim().length() < 1) return null;
char [] chars = getCacheData().get(address);
if(chars != null) return detector.createDocument(chars);
File file = new File(address);
if(file.exists()) return detector.loadDocument(file);
URL url = new URL(address);
String ref = url.getRef();
if(ref != null && (ref = ref.trim()).isEmpty()) ref = null;
if(ref != null) address = address.substring(0, address.indexOf('#'));
HTMLDocument document = null;
if(address.startsWith("file")){
file = new File(url.toURI());
document = detector.loadDocument(file);
} else {
byte[] obj = loadContent(refer, address);
if( obj == null || obj.length < 1) return null;
document = detector.createDocument(obj);
chars = document.getTextValue().toCharArray();
if(cache) cacheResponse(address, chars);
}
return document == null || ref == null ?
document : new HTMLAnchorUtil().searchDocument(document, ref);
}
public NodePath findNodeByText(HTMLNode node, String start, String end) throws Exception {
RefsDecoder decoder = new RefsDecoder();
NodePathParser pathParser = new NodePathParser();
TextHandler textHandler = new TextHandler();
if(start == null || start.trim().length() == 0) return pathParser.toPath(node);
start = textHandler.trim(start);
HTMLNode startNode = textHandler.findByText(node, start, decoder);
if(end == null || end.trim().length() == 0) {
return startNode != null ? pathParser.toPath(startNode) : pathParser.toPath(node);
}
end = textHandler.trim(end);
HTMLNode endNode = textHandler.findByText(node, end, decoder);
if(endNode == null)
return startNode != null ? pathParser.toPath(startNode) : pathParser.toPath(node);
if(startNode == null)
return endNode != null ? pathParser.toPath(endNode) : pathParser.toPath(node);
HTMLNodeUtil nodeUtil = new HTMLNodeUtil();
String indexPath = nodeUtil.getCommonIndexPath(startNode, endNode);
return pathParser.toPath(nodeUtil.getNodeByIndex(node, indexPath));
}
}