package com.digital;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParserFactory;
import org.htmlparser.Parser;
import org.htmlparser.Tag;
import org.htmlparser.tags.Div;
import org.htmlparser.tags.ImageTag;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.util.ParserException;
import org.htmlparser.visitors.NodeVisitor;
import org.w3c.dom.DOMException;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
import com.commons.Data;
import com.commons.IDataGetter;
import com.utils.LoggerUtils;
public class CameraGetter implements IDataGetter {
private String url = "http://list.jd.com/652-654-832.html";
public void setUrl(String url) {
this.url = url;
}
public List<Data> getData() {
return htmlParser();
}
private List<Data> htmlParser() {
final List<Data> dataList = new ArrayList<Data>();
try {
Parser parser = new Parser();
parser.setURL(url);
parser.setEncoding("Gb2312");
NodeVisitor visitor = new NodeVisitor() {
public void visitTag(Tag tag) {
if (Div.class.equals(tag.getClass())
&& "p-img".equals(tag.getAttribute("class"))) {
final Data data = new Data();
tag.accept(new NodeVisitor() {
public void visitTag(Tag tag) {
if (ImageTag.class.equals(tag.getClass())) {
data.setName(tag.getAttribute("alt"));
}
if (LinkTag.class.equals(tag.getClass())) {
data.setUri(tag.getAttribute("href"));
}
};
});
getDataId(data);
getPrice(data);
try {
LoggerUtils.log(CameraGetter.class.getName(),
data.toString());
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
dataList.add(data);
}
}
};
parser.visitAllNodesWith(visitor);
} catch (ParserException e) {
e.printStackTrace();
}
return dataList;
}
private void getDataId(final Data data) {
String[] split = data.getUri().split("/");
String[] idSplit = split[split.length - 1].split("\\.");
data.setId(idSplit[0]);
}
private void getPrice(Data data) {
try {
String url = "http://p.3.cn/prices/get?skuid=J_" + data.getId();
URL u = new URL(url);
InputStream in = new BufferedInputStream(u.openStream());
Reader rd = new InputStreamReader(in, "Gb2312");
StringBuffer temp = new StringBuffer();
int c = 0;
while ((c = rd.read()) != -1) {
temp.append((char) c);
}
String priceStrint = temp.toString();
String p = priceStrint.split(",")[0].split(":")[1].substring(1);
p = p.substring(0, p.length() - 1);
data.setPrice(Float.parseFloat(p));
} catch (MalformedURLException e) {
getPrice(data);
} catch (IOException e) {
getPrice(data);
}
}
private List<Data> saxParser() {
final List<Data> dataList = new ArrayList<Data>();
try {
SAXParserFactory.newInstance().newSAXParser()
.parse(url, new DefaultHandler() {
private Data data;
public void startElement(String uri, String localName,
String qName, Attributes attributes)
throws SAXException {
System.out.println("===");
if ("div".equals(qName)
&& "p-img".equals(attributes
.getValue("class"))) {
System.out.println("found one!");
data = new Data();
}
if ("a".equals(qName)) {
if (data != null)
data.setUri(attributes.getValue("href"));
}
if ("img".equals(qName)) {
if (data != null)
data.setName(attributes.getValue("alt"));
}
}
public void endElement(String uri, String localName,
String qName) throws SAXException {
if (data != null) {
dataList.add(data);
data = null;
}
}
});
} catch (MalformedURLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (SAXException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (ParserConfigurationException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return dataList;
}
private void domParser() {
try {
URL u = new URL(url);
InputStream input = u.openStream();
Document doc = DocumentBuilderFactory.newInstance()
.newDocumentBuilder().parse(input);
NodeList nodeList = doc.getElementsByTagName("div");
for (int i = 0; i < nodeList.getLength(); i++) {
Node node = nodeList.item(i);
Node attr = node.getAttributes().getNamedItem("class");
if (null != attr && attr.getNodeValue().equals("p-img")) {
System.out.println("found one!");
NodeList children = node.getChildNodes();
for (int j = 0; j < children.getLength(); i++) {
Node link = children.item(j);
Node href = link.getAttributes().getNamedItem("href");
System.out.println("href:" + href.getNodeValue());
}
}
}
} catch (MalformedURLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (DOMException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (SAXException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (ParserConfigurationException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public static void main(String[] args) {
CameraGetter getter = new CameraGetter();
getter.setUrl("http://list.jd.com/652-654-832.html");
getter.getData();
}
}