Package org.htmlparser

Examples of org.htmlparser.Parser


        ServletOutputStream out = response.getOutputStream();
       
        try{

            htmlBuffer = "<html>" + htmlBuffer + "</html>";
            Parser parser = new Parser();
            parser.setInputHTML(htmlBuffer);
            NodeList nodelist = parser.parse(null);

            NodeList tableList = nodelist.extractAllNodesThatMatch(new TagNameFilter("TABLE"), true);
            NodeList  headList = tableList.extractAllNodesThatMatch(new TagNameFilter("THEAD"), true);
            NodeList  footList = tableList.extractAllNodesThatMatch(new TagNameFilter("TFOOT"), true);
            NodeList  rowList = tableList.extractAllNodesThatMatch(new TagNameFilter("TR"), true);
View Full Code Here


        if (log.isDebugEnabled()) {
            log.debug("Parsing html of: " + baseUrl);
        }

        Parser htmlParser = null;
        try {
            String contents = new String(html,encoding);
            htmlParser = new Parser();
            htmlParser.setInputHTML(contents);
        } catch (Exception e) {
            throw new HTMLParseException(e);
        }

        // Now parse the DOM tree
        try {
            // we start to iterate through the elements
            parseNodes(htmlParser.elements(), new URLPointer(baseUrl), urls);
            log.debug("End   : parseNodes");
        } catch (ParserException e) {
            throw new HTMLParseException(e);
        }
View Full Code Here

   
  }
 
  public String getGraphSourceString(String html) {
    try {
      Parser parser = new Parser(html);
      Node node = getElementById(parser, GRAPH_STRING_ID);
      return node != null ? node.toPlainTextString().replaceAll(LINE_BREAK, "\n") : "";
    } catch (ParserException e) {
      e.printStackTrace();
    }
View Full Code Here

 
  public void testGetElementById() {
    // Should output:
    //      This is a level 2 span
    try {
      Parser parser = new Parser(TEST_STRING);
      Node node = getElementById(parser, "L2");
      if (node != null) {
        System.out.println(node.toPlainTextString());
      } else {
        System.out.println("null");
View Full Code Here

    }

    public static String getGraphSourceString(String html) {
        try {
            Parser parser = new Parser(html);
            Node node = getElementById(parser, GRAPH_STRING_ID);
            return node != null ? node.toPlainTextString().replaceAll(
                    LINE_BREAK, "\n") : "";
        } catch (ParserException e) {
            e.printStackTrace();
View Full Code Here

    public void testGetElementById() {
        // Should output:
        // This is a level 2 span
        try {
            Parser parser = new Parser(TEST_STRING);
            Node node = getElementById(parser, "L2");
            if (node != null) {
                System.out.println(node.toPlainTextString());
            } else {
                System.out.println("null");
View Full Code Here

  @Override
  public List<UrlData> getUrlData() {
    final List<UrlData> dataList = new ArrayList<UrlData>();
    try {
      Parser parser = new Parser();
      parser.setURL(channelUrl);
      parser.setEncoding("Gb2312");
      NodeVisitor visitor = new NodeVisitor() {

        public void visitTag(Tag tag) {
          if (Div.class.equals(tag.getClass())
              && "m".equals(tag.getAttribute("class"))
              && "sortlist".equals(tag.getAttribute("id"))) {
            tag.accept(new NodeVisitor() {
              public void visitTag(Tag tag) {
                if (Div.class.equals(tag.getClass())
                    && "con".equals(tag
                        .getAttribute("class"))) {
                  tag.accept(new NodeVisitor() {
                    public void visitTag(Tag tag) {
                      UrlData data = new UrlData();
                      if (LinkTag.class.equals(tag
                          .getClass())) {
                        data.setUrlName(tag
                            .getAttribute("title"));
                        data.setUrl(tag
                            .getAttribute("href"));
                        try {
                          LoggerUtils
                              .log(DigitalUrlGetter.class
                                  .getName(),
                                  data.toString());
                        } catch (IOException e) {
                          // TODO Auto-generated catch
                          e.printStackTrace();
                        }
                        System.out.println(data);
                        dataList.add(data);
                      }
                    };
                  });
                }
              };
            });
          }
        }
      };
      parser.visitAllNodesWith(visitor);
    } catch (ParserException e) {
      e.printStackTrace();
    }
    return dataList;
  }
View Full Code Here

  }

  private List<Data> htmlParser() {
    final List<Data> dataList = new ArrayList<Data>();
    try {
      Parser parser = new Parser();
      parser.setURL(url);
      parser.setEncoding("Gb2312");
      NodeVisitor visitor = new NodeVisitor() {

        public void visitTag(Tag tag) {
          if (Div.class.equals(tag.getClass())
              && "p-img".equals(tag.getAttribute("class"))) {
            final Data data = new Data();
            tag.accept(new NodeVisitor() {
              public void visitTag(Tag tag) {
                if (ImageTag.class.equals(tag.getClass())) {
                  data.setName(tag.getAttribute("alt"));
                }
                if (LinkTag.class.equals(tag.getClass())) {
                  data.setUri(tag.getAttribute("href"));
                }
              };
            });
            getDataId(data);
            getPrice(data);
            try {
              LoggerUtils.log(CameraGetter.class.getName(),
                  data.toString());
            } catch (IOException e) {
              // TODO Auto-generated catch block
              e.printStackTrace();
            }
            dataList.add(data);
          }
        }
      };
      parser.visitAllNodesWith(visitor);
    } catch (ParserException e) {
      e.printStackTrace();
    }
    return dataList;
  }
View Full Code Here

        if (DEBUG) _log.debug("Parsing HTML data:\n" + htmlData);

        try
        {
            Parser parser = Parser.createParser(htmlData, null);

            NodeList heads = parser.parse(new TagNameFilter("HEAD"));
            if (heads.size() != 1)
                throw new DiscoveryException(
                        "HTML response must have exactly one HEAD element, " +
                                "found " + heads.size() + " : " + heads.toHtml());
            Node head = heads.elementAt(0);
View Full Code Here

   private int http(File rootDir) throws Exception {

      String s = loadFromUrl("http://" + BINGOBANKO_URL + "/print/?boardCount=9");

      Parser parser = new Parser(s);

      OrFilter filter = new OrFilter(new TagNameFilter("IMG"), new TagNameFilter("script"));

      if (bingoIndex == null) {
         bingoIndex = (int) ((System.currentTimeMillis() - 1317495600085l) / 604800000) + 40;
View Full Code Here

TOP

Related Classes of org.htmlparser.Parser

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.