Package com.almilli.htmlparser

Examples of com.almilli.htmlparser.NodeCollector


    log.info("syncing movie queue...");
        checkLogin();
    try {
      parser.setURL(QUEUE_URL);

            NodeCollector infoCollector = new NodeCollector(new NodeFilter[] {
                    new HasAttributeFilter("id", "queueShippedList"),
                    new HasAttributeFilter("id", "queueCurrentList")
            });

      parser.extractAllNodesThatMatch(infoCollector);

            int i=0;
            Node shippedNode = infoCollector.getNode(i++);
      if (shippedNode != null) {
                //there is something in the shipped list
        List<MovieItem> shippedList = new ArrayList<MovieItem>();
                parseMovieQueue(shippedNode, shippedList);
                this.shippedList = shippedList;
            }

            Node queueNode = infoCollector.getNode(i++);
            if (queueNode != null) {
        List<MovieItem> movieQueue = new ArrayList<MovieItem>();
        parseMovieQueue(queueNode, movieQueue);
        this.movieQueue = movieQueue;
      }
View Full Code Here


            String response = rolloverMethod.getResponseBodyAsString();
            Map<String, Object> props = new JSONParser().parse(response);
           
            parser.setInputHTML((String)props.get("responseBody"));
           
            NodeCollector infoCollector = new NodeCollector(new NodeFilter[] {
                    new CssClassFilter("boxart"),
                    new AndFilter(
                            new HasAncestorFilter(new CssClassFilter("detailDescription")),
                            new NodeClassFilter(TextNode.class)),
                    new AndFilter(
                            new HasAncestorFilter(new CssClassFilter("detailSpecs")),
                            new CssClassFilter("movieInfo"))
            });
            NodeList all = parser.extractAllNodesThatMatch(infoCollector);

            if (all.size() >= 3) {
                int i=0;
                //get the icon
                ImageTag icon = (ImageTag)infoCollector.getNode(i++);
                item.setIconUrl(icon.getImageURL().replace("&amp;&amp;", "&"));

                //get the description
                NodeList list = infoCollector.getNodeList(i++);
                item.setSummary(NodeUtils.getTextData(list.elementAt(list.size()-1)));
               
                //strip off the query params
                int index = item.getIconUrl().indexOf('?');
                if (index != -1) {
                    item.setIconUrl(item.getIconUrl().substring(0, index)+ "?wid=130&hei=182&cvt=jpeg");
                    item.setIconWidth(130);
                    item.setIconHeight(182);
                }
                if (item.getIconUrl().startsWith("//")) {
                    item.setIconUrl("http:" + item.getIconUrl());
                }
               
                list = infoCollector.getNodeList(i++);
                setListValues(item, list);
                item.addDetailFlag(MovieItem.DETAIL_MEDIUM);
            } else {
                if (log.isEnabledFor(Level.ERROR)) {
                    log.error("Unparseable data: " + url);
View Full Code Here

            if (log.isDebugEnabled()) {
                log.debug("Loading movie details at " + url);
            }
            parser.setURL(url);

            NodeCollector infoCollector = new NodeCollector(new NodeFilter[] {
                    new AndFilter(
                            new HasParentFilter(new HasAttributeFilter("href",
                                    "/catalog/movieDetails/" + item.getMovieId())),
                            new NodeClassFilter(ImageTag.class)
                    ),

                    new CssClassFilter("bvr-rating"),
                    new CssClassFilter("description")
                    });
            parser.extractAllNodesThatMatch(infoCollector);
           
          int i=0;
            //get the icon
            ImageTag icon = (ImageTag)infoCollector.getNode(i++);
            item.setIconUrl(icon.getImageURL());
           
            //strip off the query params
            int index = item.getIconUrl().indexOf('?');
            if (index != -1) {
                item.setIconUrl(item.getIconUrl().substring(0, index)+ "?wid=130&hei=182&cvt=jpeg");
                item.setIconWidth(130);
                item.setIconHeight(182);
            }
            if (item.getIconUrl().startsWith("//")) {
                item.setIconUrl("http:" + item.getIconUrl());
            }

      Node ratingNode = infoCollector.getNode(i++);
      String ratingStr = ((Tag)ratingNode).getAttribute("src");
      int rating = -1;
            boolean myRating = false;
      Pattern ratingPattern = Pattern.compile(RATING_IMG_EXPR);
      Matcher matcher = ratingPattern.matcher(ratingStr);
      if (matcher.matches()) {
                myRating = "rt".equals(matcher.group(1));
        rating = Integer.parseInt(matcher.group(2));
      }
      item.setRating(rating);
            if (myRating) {
                item.setMyRating(rating);
            }

            Node infoNode = infoCollector.getNode(i++);
           
            //info
            NodeList list = new NodeList();
            NodeFilter groupsFilter = new CssClassFilter("movieInfo");
            infoNode.collectInto(list, groupsFilter);
View Full Code Here

    }
 
  private void parseMovieQueue(Node queue, List<MovieItem> items) {
    NodeFilter discFilter = new CssClassFilter("disc");

        NodeCollector infoCollector = new NodeCollector(new NodeFilter[] {
                new CssClassFilter("bvr-qremove"),
                new AndFilter(new NodeClassFilter(LinkTag.class),
                        new HasParentFilter(new CssClassFilter("title"))),
                new CssClassFilter("mpaa"),
                new CssClassFilter("bvr-rating"),
                new AndFilter(new NodeClassFilter(LinkTag.class),
                        new CssClassFilter("availability")),
                new CssClassFilter("shipped"),
                new CssClassFilter("arrival")
        });
   
    Pattern ratingPattern = Pattern.compile(RATING_IMG_EXPR);
        SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
   
    NodeList discs = new NodeList();
    queue.collectInto(discs, discFilter);

        int index;
    NodeList all = new NodeList();
    SimpleNodeIterator iter = discs.elements();
    while (iter.hasMoreNodes()) {
            all.removeAll();
            infoCollector.clear();
      Node disc = iter.nextNode();
     
      //collect all the nodes for the movie items
      disc.collectInto(all, infoCollector);
     
      int i=0;
      //parse out all the information
            String queueId = null;
            boolean discSet = false;
            LinkTag removeNode = (LinkTag)infoCollector.getNode(i++);
            if (removeNode != null) {
                queueId = removeNode.getLink();
                discSet = queueId.indexOf("setId") != -1;
                index = queueId.lastIndexOf('=');
                if (index != -1) {
                    queueId = queueId.substring(index+1);
                }
            }
            LinkTag titleNode = (LinkTag)infoCollector.getNode(i++);
            String movieId = titleNode.getLink();
            index = movieId.lastIndexOf('/');
            if (index != -1) {
                movieId = movieId.substring(index+1);
            }
            String title = NodeUtils.getTextData(titleNode);
      String mpaa = NodeUtils.getTextData(infoCollector.getNode(i++));

            int rating = -1;
            boolean myRating = false;
      Node ratingNode = infoCollector.getNode(i++);
            if (ratingNode != null) {
          String ratingStr = ((Tag)ratingNode).getAttribute("src");
          Matcher matcher = ratingPattern.matcher(ratingStr);
          if (matcher.matches()) {
                    myRating = "rt".equals(matcher.group(1));
            rating = Integer.parseInt(matcher.group(2));
          }
            }
      String availability = NodeUtils.getTextData(infoCollector.getNode(i++));

            Node node;
            Date shippedDate = null;
            Date arrivalDate = null;
            try {
                node = infoCollector.getNode(i++);
                if (node != null) {
                    shippedDate = dateFormat.parse(NodeUtils.getTextData(node));
                }
            } catch (ParseException e) {}
            try {
                node = infoCollector.getNode(i++);
                if (node != null) {
                    arrivalDate = dateFormat.parse(NodeUtils.getTextData(node));
                }
            } catch (ParseException e) {}
     
View Full Code Here

        }
        return userRecommendations;
    }

    private MovieItem parseMovieSearch(Node movie) {
        NodeCollector infoCollector = new NodeCollector(new NodeFilter[] {
                new CssClassFilter("title"),
                new AndFilter(new HasAncestorFilter(new CssClassFilter("titleInfo")),
                        new NodeClassFilter(ImageTag.class)),
                new CssClassFilter("mpaa"),
                new CssClassFilter("year"),
                new CssClassFilter("bvr-rating"),
                new CssClassFilter("summary")
        });

    Pattern ratingPattern = Pattern.compile(RATING_IMG_EXPR);

    NodeList all = new NodeList();
    movie.collectInto(all, infoCollector);

        String movieId = ((Tag)movie).getAttribute("id");
    MovieItem item = null;
    if (all.size() > 0) {
            item = createMovieItem(movieId);
     
      int i=0;
           
            //save off the title and image
            item.setTitle(NodeUtils.getTextData(infoCollector.getNode(i++)));

            //get the icon
            ImageTag icon = (ImageTag)infoCollector.getNode(i++);
            item.setIconUrl(icon.getImageURL().replace("&amp;&amp;", "&"));
           
            //strip off the query params
            int index = item.getIconUrl().indexOf('?');
            if (index != -1) {
                item.setIconUrl(item.getIconUrl().substring(0, index)+ "?wid=130&hei=182&cvt=jpeg");
                item.setIconWidth(130);
                item.setIconHeight(182);
            }
            if (item.getIconUrl().startsWith("//")) {
                item.setIconUrl("http:" + item.getIconUrl());
            }
           
            //the mpaa rating
      item.setMpaa(NodeUtils.getTextData(infoCollector.getNode(i++)));
           
            //the release year
      item.setYear(NodeUtils.getTextData(infoCollector.getNode(i++)));

      //rating
            int rating = -1;
            boolean myRating = false;
      Node ratingNode = infoCollector.getNode(i++);
            if (ratingNode != null) {
          String ratingStr = ((Tag)ratingNode).getAttribute("src");
          Matcher matcher = ratingPattern.matcher(ratingStr);
          if (matcher.matches()) {
                    myRating = "rt".equals(matcher.group(1));
            rating = Integer.parseInt(matcher.group(2));
          }
          item.setRating(rating);
            }
            if (myRating) {
                item.setMyRating(rating);
            }
     
      //summary
            item.setSummary(NodeUtils.getTextData(infoCollector.getNode(i++)));
           
            item.addDetailFlag(MovieItem.DETAIL_SEARCH);
    }
    return item;
  }
View Full Code Here

        if (log.isDebugEnabled()) {
          log.debug("Searching url " + searchUrl);
        }
        parser.setURL(searchUrl);

      NodeCollector searchCollector = new NodeCollector(new NodeFilter[] {
          new HasParentFilter(new HasAttributeFilter("id", "tabHeader2")),
          new CssClassFilter("movie"),
      });
       
        parser.extractAllNodesThatMatch(searchCollector);
        Node moviesTab = searchCollector.getNode(0);
        NodeList searchNodes = searchCollector.getNodeList(1);
       
            if (searchNodes != null) {
            if (totalResultsOut != null) {
              //parse out the total results
            String totalStr = NodeUtils.getTextData(moviesTab);
View Full Code Here

            if (log.isDebugEnabled()) {
                log.debug("getListingDetails url=" + url);
            }
            parser.setURL(url);
           
            NodeCollector infoCollector = new NodeCollector(new NodeFilter[] {
                    new BetweenFilter(new TagNameFilter("strong"), new TagNameFilter("/strong")),
                    new CssClassFilter("movie")
            });
           
            parser.extractAllNodesThatMatch(infoCollector);
           
            String selectedPage = NodeUtils.getTextData(infoCollector.getNode(0));
            if (selectedPage != null && selectedPage.equals(Integer.toString(page))) {
           
                NodeList browseNodes = infoCollector.getNodeList(1);
               
                if (browseNodes.size() > 1) {
                    NodeIterator iter = browseNodes.elements();
                   
                    while (iter.hasMoreNodes()) {
View Full Code Here

    }
  }
 
  private void parseShippedList(Node queue, List<MovieItem> items) {
    NodeFilter discFilter = new CssClassFilter("or");
    NodeCollector infoCollector = new NodeCollector(new NodeFilter[] {
        new AndFilter(new NodeClassFilter(LinkTag.class),
            new HasParentFilter(new CssClassFilter("qt"))),
        new AndFilter(new NodeClassFilter(ImageTag.class),
            new CssClassFilter("star")),
        new CssClassFilter("ec"),
        new CssClassFilter("e"),
        new CssClassFilter("ee")
        });
   
    Pattern ratingPattern = Pattern.compile(RATING_EXPR);
    Pattern movieIdPattern = Pattern.compile(MOVIE_ID_EXPR);
    SimpleDateFormat dateFormat = new SimpleDateFormat("MM/dd/yy");
   
    NodeList discs = new NodeList();
    queue.collectInto(discs, discFilter);

    NodeList info = new NodeList();
    SimpleNodeIterator iter = discs.elements();
    while (iter.hasMoreNodes()) {
      info.removeAll();
      infoCollector.clear();
      Node disc = iter.nextNode();
     
      //collect all the nodes for the movie items
      disc.collectInto(info, infoCollector);

      int i=0;

      //parse out all the information
      LinkTag titleLink = (LinkTag)infoCollector.getNode(i++);
      String movieId = null;
      Matcher matcher = movieIdPattern.matcher(titleLink.getLink());
      if (matcher.matches()) {
        movieId = matcher.group(1);
      } else {
        //not a valid movie
        continue;
      }
     
      String title = titleLink.getLinkText();

      int rating = -1;
      ImageTag ratingNode = (ImageTag)infoCollector.getNode(i++);
      if (ratingNode != null) {
        String ratingStr = ratingNode.getImageURL();
        matcher = ratingPattern.matcher(ratingStr);
        if (matcher.matches()) {
          rating = Integer.parseInt(matcher.group(1));
        }
      }
     
      String mpaa = NodeUtils.getTextData(infoCollector.getNode(i++));
      String category = NodeUtils.getTextData(infoCollector.getNode(i++));

      Date shippedDate = null;
      Date arrivalDate = null;
      String dateStr = NodeUtils.getTextData(infoCollector.getNode(i++));
      if (dateStr != null) {
        String[] dates = dateStr.split("\\s+");
        try {
          shippedDate = dateFormat.parse(dates[0]);
        } catch (ParseException e) {}
View Full Code Here

    }
  }
 
  private void parseMovieQueue(Node queue, List<MovieItem> items) {
    NodeFilter discFilter = new CssClassFilter("bd");
    NodeCollector infoCollector = new NodeCollector(new NodeFilter[] {
        new AndFilter(new NodeClassFilter(LinkTag.class),
            new HasParentFilter(new CssClassFilter("stc"))),
           
        new AndFilter(new NodeClassFilter(ImageTag.class),
            new CssClassFilter("star")),
           
        new CssClassFilter("ec"),
        new CssClassFilter("e")
        });
   
    Pattern ratingPattern = Pattern.compile(RATING_EXPR);
    Pattern movieIdPattern = Pattern.compile(MOVIE_ID_EXPR);
   
    NodeList discs = new NodeList();
    queue.collectInto(discs, discFilter);

    NodeList info = new NodeList();
    SimpleNodeIterator iter = discs.elements();
    while (iter.hasMoreNodes()) {
      info.removeAll();
      infoCollector.clear();
      Node disc = iter.nextNode();
     
      //collect all the nodes for the movie items
      disc.collectInto(info, infoCollector);

      int i=0;

      //parse out all the information
      LinkTag titleLink = (LinkTag)infoCollector.getNode(i++);
      String movieId = null;
      Matcher matcher = movieIdPattern.matcher(titleLink.getLink());
      if (matcher.matches()) {
        movieId = matcher.group(1);
      } else {
        //not a valid movie
        continue;
      }
     
      String title = titleLink.getLinkText();

      int rating = -1;
      ImageTag ratingNode = (ImageTag)infoCollector.getNode(i++);
      if (ratingNode != null) {
        String ratingStr = ratingNode.getImageURL();
        matcher = ratingPattern.matcher(ratingStr);
        if (matcher.matches()) {
          rating = Integer.parseInt(matcher.group(1));
        }
      }
     
      String mpaa = NodeUtils.getTextData(infoCollector.getNode(i++));
     
      NodeList list = infoCollector.getNodeList(i++);
      String category = NodeUtils.getTextData(list.elementAt(0));
      String availability = NodeUtils.getTextData(list.elementAt(1));
     
      //create a movie item
      MovieItem movie = createMovieItem(movieId);
View Full Code Here

          new NodeClassFilter(TableRow.class),
          new HasChildFilter(new HasChildFilter(new CssClassFilter("list-title")))
      );
     
 
      NodeCollector infoCollector = new NodeCollector(new NodeFilter[] {
          new AndFilter(new NodeClassFilter(LinkTag.class),
              new HasParentFilter(new CssClassFilter("list-title"))),
          new CssClassFilter("list-titleyear"),
          new CssClassFilter("list-mpaa"),
          new AndFilter(new NodeClassFilter(ImageTag.class),
              new CssClassFilter("star"))
          });
 
      NodeList discs = parser.extractAllNodesThatMatch(discFilter);
 
      Pattern ratingPattern = Pattern.compile(RATING_EXPR);
      Pattern movieIdPattern = Pattern.compile(MOVIE_ID_EXPR);
     
      Node disc;
      NodeList info = new NodeList();
      SimpleNodeIterator iter = discs.elements();
      while (iter.hasMoreNodes()) {
        info.removeAll();
        infoCollector.clear();
       
        disc = iter.nextNode();
       
        //collect all the nodes for the movie items
        disc.collectInto(info, infoCollector);
 
        int i=0;
 
        //parse out all the information
        LinkTag titleLink = (LinkTag)infoCollector.getNode(i++);
        String movieId = null;
        Matcher matcher = movieIdPattern.matcher(titleLink.getLink());
        if (matcher.matches()) {
          movieId = matcher.group(1);
        } else {
          //not a valid movie
          continue;
        }
       
        String title = titleLink.getLinkText();
 
        String year = NodeUtils.getTextData(infoCollector.getNode(i++));
        year = year.replaceAll("\\((\\d*)\\)", "$1");
 
        String mpaa = NodeUtils.getTextData(infoCollector.getNode(i++));
 
        int rating = -1;
        ImageTag ratingNode = (ImageTag)infoCollector.getNode(i++);
        if (ratingNode != null) {
          String ratingStr = ratingNode.getImageURL();
          matcher = ratingPattern.matcher(ratingStr);
          if (matcher.matches()) {
            rating = Integer.parseInt(matcher.group(1));
View Full Code Here

TOP

Related Classes of com.almilli.htmlparser.NodeCollector

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.