NodeFilter discFilter = new AndFilter(
new NodeClassFilter(TableRow.class),
new HasChildFilter(new HasChildFilter(new CssClassFilter("searchDisplay")))
);
NodeCollector searchCollector = new NodeCollector(new NodeFilter[] {
new CssClassFilter("activeTab"),
discFilter,
});
NodeCollector infoCollector = new NodeCollector(new NodeFilter[] {
new AndFilter(new NodeClassFilter(LinkTag.class),
new HasParentFilter(new CssClassFilter("searchDisplay"))),
new AndFilter(new NodeClassFilter(Span.class),
new HasParentFilter(new CssClassFilter("searchDisplay"))),
new AndFilter(new NodeClassFilter(ImageTag.class),
new CssClassFilter("star"))
});
parser.extractAllNodesThatMatch(searchCollector);
Node activeTab = searchCollector.getNode(0);
NodeList discs = searchCollector.getNodeList(1);
if (totalResultsOut != null) {
String totalStr = NodeUtils.getTextData(activeTab);
Pattern totalResultsPattern = Pattern.compile(SEARCH_RESULT_COUNT_EXPR);
Matcher matcher = totalResultsPattern.matcher(totalStr);
if (matcher.matches()) {
totalResultsOut[0] = Integer.parseInt(matcher.group(1));
}
}
Pattern ratingPattern = Pattern.compile(RATING_EXPR);
Pattern movieIdPattern = Pattern.compile(MOVIE_ID_EXPR);
Node disc;
NodeList info = new NodeList();
SimpleNodeIterator iter = discs.elements();
while (iter.hasMoreNodes()) {
info.removeAll();
infoCollector.clear();
disc = iter.nextNode();
//collect all the nodes for the movie items
disc.collectInto(info, infoCollector);
int i=0;
//parse out all the information
NodeList linkNodes = infoCollector.getNodeList(i++);
LinkTag titleLink = (LinkTag)linkNodes.elementAt(0);
String movieId = null;
Matcher matcher = movieIdPattern.matcher(titleLink.getLink());
if (matcher.matches()) {
movieId = matcher.group(1);
} else {
//not a valid movie
continue;
}
String title = titleLink.getLinkText();
String mpaa = NodeUtils.getTextData(linkNodes.elementAt(1));
String year = NodeUtils.getTextData(infoCollector.getNode(i++));
year = year.replaceAll("\\((\\d*)\\)", "$1");
int rating = -1;
ImageTag ratingNode = (ImageTag)infoCollector.getNode(i++);
if (ratingNode != null) {
String ratingStr = ratingNode.getImageURL();
matcher = ratingPattern.matcher(ratingStr);
if (matcher.matches()) {
rating = Integer.parseInt(matcher.group(1));