package com.digital;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.htmlparser.Parser;
import org.htmlparser.Tag;
import org.htmlparser.tags.Div;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.util.ParserException;
import org.htmlparser.visitors.NodeVisitor;
import com.commons.IUrlGetter;
import com.commons.UrlData;
import com.utils.LoggerUtils;
public class DigitalUrlGetter implements IUrlGetter {
private String channelUrl = "http://channel.jd.com/digital.html";
@Override
public List<UrlData> getUrlData() {
final List<UrlData> dataList = new ArrayList<UrlData>();
try {
Parser parser = new Parser();
parser.setURL(channelUrl);
parser.setEncoding("Gb2312");
NodeVisitor visitor = new NodeVisitor() {
public void visitTag(Tag tag) {
if (Div.class.equals(tag.getClass())
&& "m".equals(tag.getAttribute("class"))
&& "sortlist".equals(tag.getAttribute("id"))) {
tag.accept(new NodeVisitor() {
public void visitTag(Tag tag) {
if (Div.class.equals(tag.getClass())
&& "con".equals(tag
.getAttribute("class"))) {
tag.accept(new NodeVisitor() {
public void visitTag(Tag tag) {
UrlData data = new UrlData();
if (LinkTag.class.equals(tag
.getClass())) {
data.setUrlName(tag
.getAttribute("title"));
data.setUrl(tag
.getAttribute("href"));
try {
LoggerUtils
.log(DigitalUrlGetter.class
.getName(),
data.toString());
} catch (IOException e) {
// TODO Auto-generated catch
e.printStackTrace();
}
System.out.println(data);
dataList.add(data);
}
};
});
}
};
});
}
}
};
parser.visitAllNodesWith(visitor);
} catch (ParserException e) {
e.printStackTrace();
}
return dataList;
}
public void setChannelUrl(String channelUrl) {
this.channelUrl = channelUrl;
}
public static void main(String[] args) {
new DigitalUrlGetter().getUrlData();
}
}