NodePathParser pathParser = new NodePathParser();
HTMLExtractor htmlExtractor = new HTMLExtractor();
byte [] bytes = download(webClient, "http://news.google.com.vn/");
HTMLDocument document = new HTMLParser2().createDocument(bytes, null);
String [] paths = {
"BODY[0].TABLE[2].TBODY[0].TR[0].TD[3].TABLE[1].TBODY[0].TR[0].TD[0].TABLE[0].TBODY[0].TR[0].TD[0].DIV[0].TABLE[0].TBODY[0].TR[0].TD[0].DIV[*]"
};
NodePath [] nodePaths = new NodePath[paths.length];
for(int i=0; i<paths.length; i++){
nodePaths[i] = pathParser.toPath(paths[i]);
}
HTMLDocument doc = htmlExtractor.extract(document, nodePaths);
System.out.println(doc.getTextValue());
paths = new String[]{
"DIV[*].BR[*]",
};
nodePaths = new NodePath[paths.length];
for(int i=0; i<paths.length; i++){
nodePaths[i] = pathParser.toPath(paths[i]);
}
htmlExtractor.remove(doc.getRoot(), nodePaths);
System.out.println(doc.getRoot().getTextValue());
File file = new File("a.html");
byte[] data = doc.getTextValue().getBytes();
new DataWriter().save(file, data);
}