Package

Source Code of ProxiesChecker

import java.io.File;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;

import org.apache.http.HttpResponse;
import org.vietspider.chars.URLEncoder;
import org.vietspider.common.io.DataReader;
import org.vietspider.common.io.DataWriter;
import org.vietspider.net.client.HttpHandlers;
import org.vietspider.net.client.HttpMethodHandler;
import org.vietspider.net.client.HttpResponseReader;
import org.vietspider.net.client.WebClient;

/**
* Author : Nhu Dinh Thuan
*          nhudinhthuan@yahoo.com
* Jul 30, 2008 
*/
public class ProxiesChecker {
 
  private static WebClient webClient = new WebClient();
 
  private static HttpMethodHandler methodHandler = HttpHandlers.getInstance().createMethod(webClient);
 
//  static {
//    methodHandler.setTimeout(30);
//  }
 
  public static byte[] loadContent(String address) throws Exception {
    try{
      URLEncoder urlEncoder = new URLEncoder();
      address = urlEncoder.encode(address);
     
      HttpResponse httpResponse = methodHandler.execute(address, "");
     
      HttpResponseReader httpResponseReader = HttpHandlers.getInstance().createReader();
      return httpResponseReader.readBody(httpResponse);
    } catch(Exception exp){
      return null;
    }
  }
 
  public static void main(String[] args) throws Exception {
    final String home = "http://www.talawas.org/";
    final String address = "http://www.talawas.org/talaDB/showFile.php?res=9681&rb=0401";
   
    final long WAIT = 30*1000;
//    String home  = "http://vietnamnet.vn/";
//    String address = "http://vietnamnet.vn/thegioi/2008/08/799026/";
   
    System.out.println(home);
   
    webClient.setUserAgent("Mozilla/5.0 (compatible; Yahoo! VN Slurp; http://help.yahoo.com/help/us/ysearch/slurp)");
//    webClient.registryProxy("216.194.70.3", 8118, null, null);
   
    final DataWriter writer = new DataWriter();
    final DataReader reader = new DataReader();
    File file  = new File("F:\\Temp2\\webclient\\proxies.txt") ;
    String textValue  = new String(reader.load(file), "utf-8");
    String [] proxies = textValue.split("\n");
    File newFile  = new File("F:\\Temp2\\webclient\\good.proxies.txt") ;
  
    final File folder = new File("F:\\Temp2\\webclient\\proxy\\");
    deleteFolder(folder);

    final List<String> aliveList = new ArrayList<String>();

    for(int i = 0; i < proxies.length; i++) {
      try {
        proxies[i] = proxies[i].trim();
        webClient.setURL(null, new URL(home));
        System.out.println("=== >"+ proxies[i]+" ==> "+i+"/"+proxies.length);
//      System.out.println("=== >"+Thread.currentThread().getId()+ " : "+ array[i]);
        String [] elements = proxies[i].split(":");
        String proxyHost = elements[0].trim();
        int proxyPort = Integer.parseInt(elements[1].trim());

        long start = System.currentTimeMillis();
        webClient.registryProxy(proxyHost, proxyPort, null, null);
        byte[] bytes = loadContent(address);
        long end = System.currentTimeMillis();
        if((start - end) > WAIT) {
          System.out.println(" timeout ");
          continue;
        }
       
        if(bytes == null) continue;
        String fileName = proxies[i].replace('.', '_');
        fileName = fileName.replace(':', '_');
        writer.save(new File(folder, fileName+".html"), bytes);
       
        System.out.println(bytes.length+ " : "+ (bytes.length > 15000));
        if(bytes.length > 30000) {
          String value = proxyHost + ":" + proxyPort;
          if(aliveList.contains(value)) continue;
          aliveList.add(value);
          writer.append(newFile, (value+"\n").getBytes());
        }
      } catch (Exception e) {
        e.printStackTrace();
      }
    }//het for
   
    System.out.println(" da kiem tra xong toan bo\n\n");
    System.exit(0);
  }
 
  synchronized public static void deleteFolder(File file){
    File[] list = file.listFiles();
    for(File ele : list){
      ele.delete();
    }
  }
}
TOP

Related Classes of ProxiesChecker

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.