Package org.htmlparser.util

Examples of org.htmlparser.util.DefaultParserFeedback


            out.println ("</html>");
            // fill our 16K buffer on read
            for (int i = 0; i < InputStreamSource.BUFFER_SIZE; i++)
                out.println ();
            out.close ();
            parser = new Parser (file.getAbsolutePath (), new DefaultParserFeedback(DefaultParserFeedback.QUIET));
            parser.setNodeFactory (new PrototypicalNodeFactory (true));
            enumeration = parser.elements ();
            enumeration.nextNode ();
            if (-1 != System.getProperty ("os.name").indexOf("Windows"))
                // linux/unix lets you delete a file even when it's open
View Full Code Here


        }
        return imgTagCount;
    }

    public int countImageTagsWithHTMLParser() throws ParserException {
        Parser parser = new Parser("http://education.yahoo.com/",new DefaultParserFeedback());
        parser.setNodeFactory (new PrototypicalNodeFactory (new ImageTag ()));
        setParser (parser);
        int parserImgTagCount = 0;
        Node node;
        for (NodeIterator e= parser.elements();e.hasMoreNodes();) {
View Full Code Here

    }

    protected void createParser(String inputHTML, String url,int numNodes) {
        Lexer lexer = new Lexer (inputHTML);
        lexer.getPage ().setUrl (url);
        parser = new Parser (lexer, new DefaultParserFeedback(DefaultParserFeedback.QUIET));
        node = new Node[numNodes];
    }
View Full Code Here

        parseNodes();
    }

    protected void createParser(String inputHTML) {
        mLexer =  new Lexer (new Page (inputHTML));
        parser = new Parser(mLexer, new DefaultParserFeedback(DefaultParserFeedback.QUIET));
        node = new Node[40];
    }
View Full Code Here

    }

    protected void createParser(String inputHTML,int numNodes)
    {
        Lexer lexer = new Lexer (inputHTML);
        parser = new Parser (lexer, new DefaultParserFeedback(DefaultParserFeedback.QUIET));
        node = new Node[numNodes];
    }
View Full Code Here

    }

    protected void createParser(String inputHTML, String url) {
        Lexer lexer = new Lexer (inputHTML);
        lexer.getPage ().setUrl (url);
        parser = new Parser (lexer, new DefaultParserFeedback(DefaultParserFeedback.QUIET));
        node = new Node[40];
    }
View Full Code Here

    String test = "  fdfdf dfdf   ";
    assertEquals("Expected Pruned string", "fdfdf dfdf", TagParser.pruneSpaces(test));
  }

  protected void setUp() {
    tagParser = new TagParser(new DefaultParserFeedback());
  }
View Full Code Here

   * @param resourceLocation
   *            url to be ripped
   */
  public MailRipper(String resourceLocation) {
    try {
      parser = new Parser(resourceLocation, new DefaultParserFeedback());
      parser.registerScanners();
    } catch (ParserException e) {
      System.err.println("Could not create parser object");
      e.printStackTrace();
    }
View Full Code Here

  /**
   * Robot crawler - Provide the starting url
   */
  public Robot(String resourceLocation) {
    try {
      parser = new Parser(resourceLocation, new DefaultParserFeedback());
      parser.registerScanners();
    } catch (ParserException e) {
      System.err.println("Error, could not create parser object");
      e.printStackTrace();
    }
View Full Code Here

          if (!linkTag.isMailLink()) {
            if (linkTag.getLink().toUpperCase().indexOf("HTM") != -1
                || linkTag.getLink().toUpperCase().indexOf("COM") != -1
                || linkTag.getLink().toUpperCase().indexOf("ORG") != -1) {
              if (crawlDepth > 0) {
                Parser newParser = new Parser(linkTag.getLink(), new DefaultParserFeedback());
                newParser.registerScanners();
                System.out.print("Crawling to " + linkTag.getLink());
                crawl(newParser, crawlDepth - 1);
              } else
                System.out.println(linkTag.getLink());
View Full Code Here

TOP

Related Classes of org.htmlparser.util.DefaultParserFeedback

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.