Package com.almilli.htmlparser

Examples of com.almilli.htmlparser.CssClassFilter


        log.debug("Adding movie at url " + url);
      }
      parser.setURL(url);

      NodeFilter discFilter = new AndFilter(
        new CssClassFilter("disc"),
        new HasChildFilter(new HasChildFilter(
            new HasAttributeFilter("href", "/catalog/movieDetails/" + item.getMovieId())))
      );
     
      NodeFilter removeFilter = new AndFilter(
          new NodeClassFilter(LinkTag.class),
          new CssClassFilter("bvr-qremove")
      );

      NodeList discNodes = parser.extractAllNodesThatMatch(discFilter);

      if (discNodes.size() == 1) {
View Full Code Here


            Map<String, Object> props = new JSONParser().parse(response);
           
            parser.setInputHTML((String)props.get("responseBody"));
           
            NodeCollector infoCollector = new NodeCollector(new NodeFilter[] {
                    new CssClassFilter("boxart"),
                    new AndFilter(
                            new HasAncestorFilter(new CssClassFilter("detailDescription")),
                            new NodeClassFilter(TextNode.class)),
                    new AndFilter(
                            new HasAncestorFilter(new CssClassFilter("detailSpecs")),
                            new CssClassFilter("movieInfo"))
            });
            NodeList all = parser.extractAllNodesThatMatch(infoCollector);

            if (all.size() >= 3) {
                int i=0;
View Full Code Here

                            new HasParentFilter(new HasAttributeFilter("href",
                                    "/catalog/movieDetails/" + item.getMovieId())),
                            new NodeClassFilter(ImageTag.class)
                    ),

                    new CssClassFilter("bvr-rating"),
                    new CssClassFilter("description")
                    });
            parser.extractAllNodesThatMatch(infoCollector);
           
          int i=0;
            //get the icon
            ImageTag icon = (ImageTag)infoCollector.getNode(i++);
            item.setIconUrl(icon.getImageURL());
           
            //strip off the query params
            int index = item.getIconUrl().indexOf('?');
            if (index != -1) {
                item.setIconUrl(item.getIconUrl().substring(0, index)+ "?wid=130&hei=182&cvt=jpeg");
                item.setIconWidth(130);
                item.setIconHeight(182);
            }
            if (item.getIconUrl().startsWith("//")) {
                item.setIconUrl("http:" + item.getIconUrl());
            }

      Node ratingNode = infoCollector.getNode(i++);
      String ratingStr = ((Tag)ratingNode).getAttribute("src");
      int rating = -1;
            boolean myRating = false;
      Pattern ratingPattern = Pattern.compile(RATING_IMG_EXPR);
      Matcher matcher = ratingPattern.matcher(ratingStr);
      if (matcher.matches()) {
                myRating = "rt".equals(matcher.group(1));
        rating = Integer.parseInt(matcher.group(2));
      }
      item.setRating(rating);
            if (myRating) {
                item.setMyRating(rating);
            }

            Node infoNode = infoCollector.getNode(i++);
           
            //info
            NodeList list = new NodeList();
            NodeFilter groupsFilter = new CssClassFilter("movieInfo");
            infoNode.collectInto(list, groupsFilter);
            setListValues(item, list);
           
            //summary
            list.removeAll();
            NodeFilter summaryFilter = new CssClassFilter("summary");
            infoNode.collectInto(list, summaryFilter);
            item.setSummary(NodeUtils.getTextData(list));
           
            item.addDetailFlag(MovieItem.DETAIL_MEDIUM);
           
View Full Code Here

        }
        return item;
    }
 
  private void parseMovieQueue(Node queue, List<MovieItem> items) {
    NodeFilter discFilter = new CssClassFilter("disc");

        NodeCollector infoCollector = new NodeCollector(new NodeFilter[] {
                new CssClassFilter("bvr-qremove"),
                new AndFilter(new NodeClassFilter(LinkTag.class),
                        new HasParentFilter(new CssClassFilter("title"))),
                new CssClassFilter("mpaa"),
                new CssClassFilter("bvr-rating"),
                new AndFilter(new NodeClassFilter(LinkTag.class),
                        new CssClassFilter("availability")),
                new CssClassFilter("shipped"),
                new CssClassFilter("arrival")
        });
   
    Pattern ratingPattern = Pattern.compile(RATING_IMG_EXPR);
        SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
   
View Full Code Here

        return userRecommendations;
    }

    private MovieItem parseMovieSearch(Node movie) {
        NodeCollector infoCollector = new NodeCollector(new NodeFilter[] {
                new CssClassFilter("title"),
                new AndFilter(new HasAncestorFilter(new CssClassFilter("titleInfo")),
                        new NodeClassFilter(ImageTag.class)),
                new CssClassFilter("mpaa"),
                new CssClassFilter("year"),
                new CssClassFilter("bvr-rating"),
                new CssClassFilter("summary")
        });

    Pattern ratingPattern = Pattern.compile(RATING_IMG_EXPR);

    NodeList all = new NodeList();
View Full Code Here

        }
        parser.setURL(searchUrl);

      NodeCollector searchCollector = new NodeCollector(new NodeFilter[] {
          new HasParentFilter(new HasAttributeFilter("id", "tabHeader2")),
          new CssClassFilter("movie"),
      });
       
        parser.extractAllNodesThatMatch(searchCollector);
        Node moviesTab = searchCollector.getNode(0);
        NodeList searchNodes = searchCollector.getNodeList(1);
View Full Code Here

      try {
        parser.setURL(CATEGORY_LIST_URL);
 
        NodeFilter categoryFilter = new AndFilter(
            new HasAncestorFilter(
                                new OrFilter(new CssClassFilter("column33"),
                                        new CssClassFilter("column34"))),
            new CssClassFilter("listMain")
        );
 
        NodeList categoryNodes = parser.extractAllNodesThatMatch(categoryFilter);

                List<Category> categories = new ArrayList<Category>();
View Full Code Here

            }
            parser.setURL(url);
           
            NodeCollector infoCollector = new NodeCollector(new NodeFilter[] {
                    new BetweenFilter(new TagNameFilter("strong"), new TagNameFilter("/strong")),
                    new CssClassFilter("movie")
            });
           
            parser.extractAllNodesThatMatch(infoCollector);
           
            String selectedPage = NodeUtils.getTextData(infoCollector.getNode(0));
View Full Code Here

      */
     
      parser.setURL(QUEUE_URL);

      NodeFilter queueFilter = new OrFilter(
        new CssClassFilter("dvd-out"),
        new HasAttributeFilter("id", "dvd-queue")
      );

      NodeList queueNodes = parser.extractAllNodesThatMatch(queueFilter);

View Full Code Here

      throw io;
    }
  }
 
  private void parseShippedList(Node queue, List<MovieItem> items) {
    NodeFilter discFilter = new CssClassFilter("or");
    NodeCollector infoCollector = new NodeCollector(new NodeFilter[] {
        new AndFilter(new NodeClassFilter(LinkTag.class),
            new HasParentFilter(new CssClassFilter("qt"))),
        new AndFilter(new NodeClassFilter(ImageTag.class),
            new CssClassFilter("star")),
        new CssClassFilter("ec"),
        new CssClassFilter("e"),
        new CssClassFilter("ee")
        });
   
    Pattern ratingPattern = Pattern.compile(RATING_EXPR);
    Pattern movieIdPattern = Pattern.compile(MOVIE_ID_EXPR);
    SimpleDateFormat dateFormat = new SimpleDateFormat("MM/dd/yy");
View Full Code Here

TOP

Related Classes of com.almilli.htmlparser.CssClassFilter

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.