package com.jeck.microblogging.utils;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import org.htmlcleaner.CleanerProperties;
import org.htmlcleaner.HtmlCleaner;
import org.htmlcleaner.TagNode;
import org.htmlcleaner.XPatherException;
import weibo4j.http.HttpClient;
import weibo4j.http.Response;
import weibo4j.model.WeiboException;
public class HtmlUtils {
private String html;
private URL url;
private TagNode root;
private HtmlCleaner cleaner;
public HtmlUtils(String html) {
this.html = html;
// create an instance of HtmlCleaner
cleaner = new HtmlCleaner();
// take default cleaner properties
CleanerProperties props = cleaner.getProperties();
// customize cleaner's behaviour with property setters
// props.setXXX(...);
props.setOmitXmlDeclaration(true);
props.setOmitHtmlEnvelope(true);
// Clean HTML taken from simple string, file, URL, input stream,
// input source or reader. Result is root node of created
// tree-like structure. Single cleaner instance may be safely used
// multiple times.
root = cleaner.clean(html);
}
public HtmlUtils(URL url) throws IOException {
this.url = url;
// create an instance of HtmlCleaner
cleaner = new HtmlCleaner();
// take default cleaner properties
CleanerProperties props = cleaner.getProperties();
// customize cleaner's behaviour with property setters
// props.setXXX(...);
props.setOmitXmlDeclaration(true);
props.setOmitHtmlEnvelope(true);
// Clean HTML taken from simple string, file, URL, input stream,
// input source or reader. Result is root node of created
// tree-like structure. Single cleaner instance may be safely used
// multiple times.
root = cleaner.clean(url);
}
public List<String> readByXPath(String xPath) throws XPatherException {
Object[] nodes = root.evaluateXPath(xPath);
List<String> rsList = new ArrayList<String>();
if (null != nodes) {
for (Object object : nodes) {
TagNode node = (TagNode) object;
rsList.add(node.getText().toString());
}
}
return rsList;
}
public List<String> readByAttrValue(String attrName,String value) throws XPatherException {
Object[] nodes = root.getElementsByAttValue(attrName, value, true, true);
List<String> rsList = new ArrayList<String>();
if (null != nodes) {
for (Object object : nodes) {
TagNode node = (TagNode) object;
rsList.add(node.getText().toString());
}
}
return rsList;
}
public static void main(String[] arg) throws Exception {
// HttpClient client = new HttpClient();
// client.setToken("test");
// Response response = client.get("http://www.baidu.com");
List<String> rs = new HtmlUtils(new URL("http://www.haha.mx/new"))
.readByAttrValue("class", "item");
if (rs != null) {
for (String string : rs) {
System.out.println("===="+string);
}
}
}
}