Package org.htmlparser.scanners

Examples of org.htmlparser.scanners.LinkScanner


        createParser(
            "view these documents, you must have <A href='http://www.adobe.com'>Adobe \n"
                + "Acrobat Reader</A> installed on your computer.");
        Parser.setLineSeparator("\r\n");
        parser.addScanner(new LinkScanner("-l"));
        parseAndAssertNodeCount(3);
        // The first node should be a HTMLStringNode-  with the text - view these documents, you must have
        assertTrue(
            "First node should be a HTMLStringNode",
            node[0] instanceof StringNode);
View Full Code Here


     * The string node is not correctly identified
     */
    public void testTagCharsInStringNode() throws ParserException
    {
        createParser("<a href=\"http://asgard.ch\">[> ASGARD <]</a>");
        parser.addScanner(new LinkScanner("-l"));
        parseAndAssertNodeCount(1);
        assertTrue(
            "Node identified must be a link tag",
            node[0] instanceof LinkTag);
        LinkTag linkTag = (LinkTag) node[0];
View Full Code Here

                + "&nbsp;&#8226;&nbsp;<a href=/advanced_search?hl=en>Advanced&nbsp;Search</a><br>&nbsp;&#8226;"
                + "&nbsp;<a href=/preferences?hl=en>Preferences</a><br>&nbsp;&#8226;&nbsp;<a href=/"
                + "language_tools?hl=en>Language Tools</a></font></td></tr></table></form>");

        parser.addScanner(new FormScanner("", parser));
        parser.addScanner(new LinkScanner());
        parseAndAssertNodeCount(1);
        assertTrue("Should be a HTMLFormTag", node[0] instanceof FormTag);
        FormTag formTag = (FormTag) node[0];
        LinkTag[] linkTag = new LinkTag[10];
        int i = 0;
View Full Code Here

                + "<INPUT TYPE=\"checkbox\">\n"
                + "<P>\n"
                + "<INPUT TYPE=\"SUBMIT\">\n"
                + "</FORM>");
        parser.addScanner(new FormScanner("", parser));
        parser.addScanner(new LinkScanner());
        parseAndAssertNodeCount(6);
        assertTrue("Fifth Node is a link", node[4] instanceof LinkTag);
        LinkTag linkTag = (LinkTag) node[4];
        assertEquals("Link Text", "Yahoo!\r\n", linkTag.getLinkText());
        assertEquals("Link URL", "http://www.yahoo.com", linkTag.getLink());
View Full Code Here

                "registerScanners() should be called first, when no other scanner has been registered.");
            System.err.println(
                "Other scanners already exist, hence this method call wont have any effect");
            return;
        }
        LinkScanner linkScanner = new LinkScanner(LinkTag.LINK_TAG_FILTER);
        // Note - The BaseHREF and Image scanners share the same
        // link processor - internally linked up with the factory
        // method in the link scanner class
        addScanner(linkScanner);
        addScanner(linkScanner.createImageScanner(ImageTag.IMAGE_TAG_FILTER));
        addScanner(new ScriptScanner("-s"));
        addScanner(new StyleScanner("-t"));
        addScanner(new JspScanner("-j"));
        addScanner(new AppletScanner("-a"));
        addScanner(new MetaTagScanner("-m"));
        addScanner(new TitleScanner("-T"));
        addScanner(new DoctypeScanner("-d"));
        addScanner(new FormScanner("-f", this));
        addScanner(new FrameSetScanner("-r"));
        addScanner(linkScanner.createBaseHREFScanner("-b"));
        addScanner(new BulletListScanner("-bulletList", this));
        //  addScanner(new SpanScanner("-p"));
        addScanner(new DivScanner("-div"));
        addScanner(new TableScanner(this));
    }
View Full Code Here

    }

    public static Parser createLinkRecognizingParser(String inputHTML)
    {
        Parser parser = createParser(inputHTML);
        parser.addScanner(new LinkScanner(LinkTag.LINK_TAG_FILTER));
        return parser;
    }
View Full Code Here

        // add body tag scanner
        parser.addScanner(new BodyScanner());
        // add BaseHRefTag scanner
        parser.addScanner(new BaseHrefScanner());
        // add ImageTag and BaseHrefTag scanners
        LinkScanner linkScanner= new LinkScanner(LinkTag.LINK_TAG_FILTER);
        // parser.addScanner(linkScanner);
        parser.addScanner(
            linkScanner.createImageScanner(ImageTag.IMAGE_TAG_FILTER));
        parser.addScanner(
            linkScanner.createBaseHREFScanner("-b"));
                            // Taken from org.htmlparser.Parser
        // add input tag scanner
        parser.addScanner(new InputTagScanner());
        // add applet tag scanner
        parser.addScanner(new AppletScanner());
View Full Code Here

    super(name);
  }

  public void testAccessKey() throws ParserException {
    createParser("<a href=\"http://www.kizna.com/servlets/SomeServlet?name=Sam Joseph\" accessKey=1>Click Here</A>");
    parser.addScanner(new LinkScanner("-l"));
    parseAndAssertNodeCount(1);
    assertTrue("The node should be a link tag", node[0] instanceof LinkTag);
    LinkTag linkTag = (LinkTag) node[0];
    assertEquals("Link URL of link tag", "http://www.kizna.com/servlets/SomeServlet?name=Sam Joseph", linkTag
        .getLink());
View Full Code Here

        "<A HREF=\"s/8741\"><IMG BORDER=\"0\" WIDTH=\"16\" SRC=\"http://us.i1.yimg.com/us.yimg.com/i/i16/mov_popc.gif\" HEIGHT=\"16\">This is a test\r\n</A>",
        linkTag.toHtml());
  }

  public void testEvaluate() {
    LinkScanner scanner = new LinkScanner("-l");
    boolean retVal = scanner.evaluate("   a href ", null);
    assertEquals("Evaluation of the Link tag", new Boolean(true), new Boolean(retVal));
  }
View Full Code Here

   */
  public void testExtractLinkInvertedCommasBug() throws ParserException {
    String tagContents = "a href=r/anorth/top.html";
    Tag tag = new Tag(new TagData(0, 0, tagContents, ""));
    String url = "c:\\cvs\\html\\binaries\\yahoo.htm";
    LinkScanner scanner = new LinkScanner("-l");
    assertEquals("Extracted Link", "r/anorth/top.html", scanner.extractLink(tag, url));
  }
View Full Code Here

TOP

Related Classes of org.htmlparser.scanners.LinkScanner

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.