Package com.almilli.htmlparser

Examples of com.almilli.htmlparser.NodeCollector


      NodeFilter discFilter = new AndFilter(
          new NodeClassFilter(TableRow.class),
          new HasChildFilter(new HasChildFilter(new CssClassFilter("searchDisplay")))
      );
     
      NodeCollector searchCollector = new NodeCollector(new NodeFilter[] {
          new CssClassFilter("activeTab"),
          discFilter,
      });
     
 
      NodeCollector infoCollector = new NodeCollector(new NodeFilter[] {
          new AndFilter(new NodeClassFilter(LinkTag.class),
              new HasParentFilter(new CssClassFilter("searchDisplay"))),
          new AndFilter(new NodeClassFilter(Span.class),
              new HasParentFilter(new CssClassFilter("searchDisplay"))),
          new AndFilter(new NodeClassFilter(ImageTag.class),
              new CssClassFilter("star"))
          });
 
      parser.extractAllNodesThatMatch(searchCollector);
      Node activeTab = searchCollector.getNode(0);
      NodeList discs = searchCollector.getNodeList(1);
     
      if (totalResultsOut != null) {
        String totalStr = NodeUtils.getTextData(activeTab);
        Pattern totalResultsPattern = Pattern.compile(SEARCH_RESULT_COUNT_EXPR);
        Matcher matcher = totalResultsPattern.matcher(totalStr);
        if (matcher.matches()) {
          totalResultsOut[0] = Integer.parseInt(matcher.group(1));
        }
      }
 
      Pattern ratingPattern = Pattern.compile(RATING_EXPR);
      Pattern movieIdPattern = Pattern.compile(MOVIE_ID_EXPR);
     
      Node disc;
      NodeList info = new NodeList();
      SimpleNodeIterator iter = discs.elements();
      while (iter.hasMoreNodes()) {
        info.removeAll();
        infoCollector.clear();
       
        disc = iter.nextNode();
       
        //collect all the nodes for the movie items
        disc.collectInto(info, infoCollector);
 
        int i=0;
 
        //parse out all the information
        NodeList linkNodes = infoCollector.getNodeList(i++);
        LinkTag titleLink = (LinkTag)linkNodes.elementAt(0);
        String movieId = null;
        Matcher matcher = movieIdPattern.matcher(titleLink.getLink());
        if (matcher.matches()) {
          movieId = matcher.group(1);
        } else {
          //not a valid movie
          continue;
        }
       
        String title = titleLink.getLinkText();
        String mpaa = NodeUtils.getTextData(linkNodes.elementAt(1));
 
        String year = NodeUtils.getTextData(infoCollector.getNode(i++));
        year = year.replaceAll("\\((\\d*)\\)", "$1");
 
        int rating = -1;
        ImageTag ratingNode = (ImageTag)infoCollector.getNode(i++);
        if (ratingNode != null) {
          String ratingStr = ratingNode.getImageURL();
          matcher = ratingPattern.matcher(ratingStr);
          if (matcher.matches()) {
            rating = Integer.parseInt(matcher.group(1));
View Full Code Here


      NodeFilter discFilter = new AndFilter(
          new NodeClassFilter(TableRow.class),
          new HasChildFilter(new HasChildFilter(new CssClassFilter("searchDisplay")))
      );

      NodeCollector infoCollector = new NodeCollector(new NodeFilter[] {
          new AndFilter(new NodeClassFilter(LinkTag.class),
              new HasParentFilter(new HasParentFilter(new CssClassFilter("searchDisplay")))),
          new AndFilter(new NodeClassFilter(TextNode.class),
              new HasParentFilter(new CssClassFilter("searchSmallText"))),
          new AndFilter(new NodeClassFilter(ImageTag.class),
              new CssClassFilter("star"))
          });

      NodeList discs = parser.extractAllNodesThatMatch(discFilter);

      Pattern ratingPattern = Pattern.compile(RATING_EXPR);
      Pattern movieIdPattern = Pattern.compile(MOVIE_ID_EXPR);
     
      Node disc;
      NodeList info = new NodeList();
      SimpleNodeIterator iter = discs.elements();
      while (iter.hasMoreNodes()) {
        info.removeAll();
        infoCollector.clear();
       
        disc = iter.nextNode();
       
        //collect all the nodes for the movie items
        disc.collectInto(info, infoCollector);

        int i=0;

        //parse out all the information
        NodeList links = infoCollector.getNodeList(i++);
        LinkTag titleLink = (LinkTag)links.elementAt(0);
        LinkTag mpaaLink = (LinkTag)links.elementAt(1);
        String movieId = null;
        Matcher matcher = movieIdPattern.matcher(titleLink.getLink());
        if (matcher.matches()) {
          movieId = matcher.group(1);
        } else {
          //not a valid movie
          continue;
        }
       
        String title = titleLink.getLinkText();
        String mpaa = mpaaLink.getLinkText();

        String year = NodeUtils.getTextData(infoCollector.getNode(i++));
        year = year.replaceAll("\\((\\d*)\\)", "$1");

        int rating = -1;
        ImageTag ratingNode = (ImageTag)infoCollector.getNode(i++);
        if (ratingNode != null) {
          String ratingStr = ratingNode.getImageURL();
          matcher = ratingPattern.matcher(ratingStr);
          if (matcher.matches()) {
            rating = Integer.parseInt(matcher.group(1));
View Full Code Here

TOP

Related Classes of com.almilli.htmlparser.NodeCollector

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.