Package org.vietspider.ui.htmlexplorer

Source Code of org.vietspider.ui.htmlexplorer.FastWebClient

/***************************************************************************
* Copyright 2001-2007 The VietSpider         All rights reserved.       *
**************************************************************************/
package org.vietspider.ui.htmlexplorer;

import java.io.File;
import java.net.URL;
import java.util.Hashtable;

import org.apache.http.HttpHost;
import org.apache.http.HttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.vietspider.chars.URLEncoder;
import org.vietspider.chars.refs.RefsDecoder;
import org.vietspider.html.HTMLDocument;
import org.vietspider.html.HTMLNode;
import org.vietspider.html.path2.NodePath;
import org.vietspider.html.path2.NodePathParser;
import org.vietspider.html.util.HTMLAnchorUtil;
import org.vietspider.html.util.HTMLNodeUtil;
import org.vietspider.html.util.HTMLParserDetector;
import org.vietspider.html.util.URLCodeGenerator;
import org.vietspider.net.client.HttpHandlers;
import org.vietspider.net.client.HttpResponseReader;
import org.vietspider.net.client.WebClient;
import org.vietspider.ui.text.TextHandler;

/**
* Author : Nhu Dinh Thuan
*          nhudinhthuan@yahoo.com
* Oct 23, 2007 
*/
public class FastWebClient extends WebClient {
 
  private Hashtable<String, HttpGet> currentGets = new Hashtable<String, HttpGet>();
 
  public void abort(String address) {
    if(address == null || address.trim().length() == 0) return;
    if(!currentGets.containsKey(address)) return;
    HttpGet httpGet = currentGets.remove(address);
    if(httpGet != null) httpGet.abort();
  }

  public byte[] loadContent(String referer, String address) throws Exception {
    URLEncoder urlEncoder = new URLEncoder();
   
    if(host == null) {
      setURL(referer, new URL(address));
    } else {
      URL url = new URL(address)
      URLCodeGenerator urlCodeUtil = new URLCodeGenerator();
      if(!urlCodeUtil.compareHost(url.getHost(), getHost())) {
        setURL(referer, new URL(address));
      }
    }

    HttpGet httpGet = null;
    try {
      address = urlEncoder.encode(address);
      httpGet = createGetMethod(address, referer);     
      currentGets.put(address, httpGet);

      if(httpGet == null) return null;
      HttpHost httpHost = createHttpHost(address);
      HttpResponse httpResponse = execute(httpHost, httpGet);
      currentGets.remove(address);
     
//      StatusLine statusLine = httpResponse.getStatusLine();
//      int statusCode = statusLine.getStatusCode();
//      System.out.println(" status code la "+ statusCode);

      HttpResponseReader httpResponseReader = HttpHandlers.getInstance().createReader();
      return httpResponseReader.readBody(httpResponse);
    } catch(Exception exp) {
      throw exp;
    }
  }

  public HTMLDocument createDocument(String refer,
      String address, boolean cache, HTMLParserDetector detector) throws Exception
    if( address == null || address.trim().length() < 1) return null;
    char [] chars = getCacheData().get(address);
    if(chars != null) return detector.createDocument(chars);

    File file = new File(address);
    if(file.exists()) return detector.loadDocument(file);
   
    URL url = new URL(address);
    String ref = url.getRef();
    if(ref != null && (ref = ref.trim()).isEmpty())  ref = null;
    if(ref != null) address = address.substring(0, address.indexOf('#'));
   
    HTMLDocument document = null;
    if(address.startsWith("file")){
      file = new File(url.toURI());     
      document = detector.loadDocument(file);
    } else {
      byte[] obj = loadContent(refer, address);      
      if( obj == null || obj.length < 1) return null;
      document = detector.createDocument(obj);
      chars = document.getTextValue().toCharArray();
      if(cache) cacheResponse(address, chars);
    }
   
    return document == null || ref == null ?
        document : new HTMLAnchorUtil().searchDocument(document, ref);
  }
 
  public NodePath findNodeByText(HTMLNode node, String start, String end) throws Exception {
    RefsDecoder decoder = new RefsDecoder();
    NodePathParser pathParser = new NodePathParser();
    TextHandler textHandler = new TextHandler();
    if(start == null || start.trim().length() == 0) return pathParser.toPath(node);
    start = textHandler.trim(start);
    HTMLNode startNode = textHandler.findByText(node, start, decoder);
    if(end == null || end.trim().length() == 0) {
      return startNode != null ?  pathParser.toPath(startNode) : pathParser.toPath(node);
    }
    end = textHandler.trim(end);
   
    HTMLNode endNode = textHandler.findByText(node, end, decoder);   
    if(endNode == null)
      return startNode != null ? pathParser.toPath(startNode) : pathParser.toPath(node);
    if(startNode  == null
      return endNode != null  ?  pathParser.toPath(endNode) : pathParser.toPath(node);
    HTMLNodeUtil nodeUtil = new HTMLNodeUtil()
    String indexPath = nodeUtil.getCommonIndexPath(startNode, endNode);
    return pathParser.toPath(nodeUtil.getNodeByIndex(node, indexPath));
  }

}
TOP

Related Classes of org.vietspider.ui.htmlexplorer.FastWebClient

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.