import java.io.File;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import org.apache.http.HttpResponse;
import org.vietspider.chars.URLEncoder;
import org.vietspider.common.io.DataReader;
import org.vietspider.common.io.DataWriter;
import org.vietspider.net.client.HttpHandlers;
import org.vietspider.net.client.HttpMethodHandler;
import org.vietspider.net.client.HttpResponseReader;
import org.vietspider.net.client.WebClient;
/**
* Author : Nhu Dinh Thuan
* nhudinhthuan@yahoo.com
* Jul 30, 2008
*/
public class ProxiesChecker {
private static WebClient webClient = new WebClient();
private static HttpMethodHandler methodHandler = HttpHandlers.getInstance().createMethod(webClient);
// static {
// methodHandler.setTimeout(30);
// }
public static byte[] loadContent(String address) throws Exception {
try{
URLEncoder urlEncoder = new URLEncoder();
address = urlEncoder.encode(address);
HttpResponse httpResponse = methodHandler.execute(address, "");
HttpResponseReader httpResponseReader = HttpHandlers.getInstance().createReader();
return httpResponseReader.readBody(httpResponse);
} catch(Exception exp){
return null;
}
}
public static void main(String[] args) throws Exception {
final String home = "http://www.talawas.org/";
final String address = "http://www.talawas.org/talaDB/showFile.php?res=9681&rb=0401";
final long WAIT = 30*1000;
// String home = "http://vietnamnet.vn/";
// String address = "http://vietnamnet.vn/thegioi/2008/08/799026/";
System.out.println(home);
webClient.setUserAgent("Mozilla/5.0 (compatible; Yahoo! VN Slurp; http://help.yahoo.com/help/us/ysearch/slurp)");
// webClient.registryProxy("216.194.70.3", 8118, null, null);
final DataWriter writer = new DataWriter();
final DataReader reader = new DataReader();
File file = new File("F:\\Temp2\\webclient\\proxies.txt") ;
String textValue = new String(reader.load(file), "utf-8");
String [] proxies = textValue.split("\n");
File newFile = new File("F:\\Temp2\\webclient\\good.proxies.txt") ;
final File folder = new File("F:\\Temp2\\webclient\\proxy\\");
deleteFolder(folder);
final List<String> aliveList = new ArrayList<String>();
for(int i = 0; i < proxies.length; i++) {
try {
proxies[i] = proxies[i].trim();
webClient.setURL(null, new URL(home));
System.out.println("=== >"+ proxies[i]+" ==> "+i+"/"+proxies.length);
// System.out.println("=== >"+Thread.currentThread().getId()+ " : "+ array[i]);
String [] elements = proxies[i].split(":");
String proxyHost = elements[0].trim();
int proxyPort = Integer.parseInt(elements[1].trim());
long start = System.currentTimeMillis();
webClient.registryProxy(proxyHost, proxyPort, null, null);
byte[] bytes = loadContent(address);
long end = System.currentTimeMillis();
if((start - end) > WAIT) {
System.out.println(" timeout ");
continue;
}
if(bytes == null) continue;
String fileName = proxies[i].replace('.', '_');
fileName = fileName.replace(':', '_');
writer.save(new File(folder, fileName+".html"), bytes);
System.out.println(bytes.length+ " : "+ (bytes.length > 15000));
if(bytes.length > 30000) {
String value = proxyHost + ":" + proxyPort;
if(aliveList.contains(value)) continue;
aliveList.add(value);
writer.append(newFile, (value+"\n").getBytes());
}
} catch (Exception e) {
e.printStackTrace();
}
}//het for
System.out.println(" da kiem tra xong toan bo\n\n");
System.exit(0);
}
synchronized public static void deleteFolder(File file){
File[] list = file.listFiles();
for(File ele : list){
ele.delete();
}
}
}