Package org.htmlparser.scanners

Examples of org.htmlparser.scanners.LinkScanner


   * tag is not identified. Creation date: (6/17/2001 4:01:06 PM)
   */
  public void testNullTagBug() throws ParserException {
    createParser("<A HREF=>Something</A>", "http://www.google.com/test/index.html");
    // Register the image scanner
    parser.addScanner(new LinkScanner("-l"));

    parseAndAssertNodeCount(1);
    // The node should be an HTMLLinkTag
    assertTrue("Node should be a HTMLLinkTag", node[0] instanceof LinkTag);
    LinkTag linkTag = (LinkTag) node[0];
View Full Code Here


  }

  public void testToPlainTextString() throws ParserException {
    createParser("<A HREF='mailto:somik@yahoo.com'>hello</A>", "http://www.cj.com/");
    // Register the image scanner
    parser.addScanner(new LinkScanner("-l"));

    parseAndAssertNodeCount(1);
    assertTrue("Node should be a HTMLLinkTag", node[0] instanceof LinkTag);
    LinkTag linkTag = (LinkTag) node[0];
    assertEquals("Link Plain Text", "hello", linkTag.toPlainTextString());
View Full Code Here

        + "<LI><font color=\"FF0000\" size=-1><b>Tech Samachar:</b></font><a \n"
        + "href=\"http://ads.samachar.com/bin/redirect/tech.txt?http://www.samachar.com/tech\n"
        + "nical.html\"> Journalism 3.0</a> by Rajesh Jain", "http://www.cj.com/");
    Parser.setLineSeparator("\r\n");
    // Register the image scanner
    parser.addScanner(new LinkScanner("-l"));

    parseAndAssertNodeCount(9);
    assertTrue("First Node should be a HTMLLinkTag", node[0] instanceof LinkTag);
    LinkTag linkTag = (LinkTag) node[0];
    assertStringEquals("Link Raw Text", "<A HREF=\"mailto:somik@yahoo.com\">hello</A>", linkTag.toHtml());
View Full Code Here

  public void testMailToIsNotAHTTPLink() throws ParserException {
    LinkTag link;

    createParser("<A HREF='mailto:derrickoswald@users.sourceforge.net'>Derrick</A>", "http://sourceforge.net");
    // Register the link scanner
    parser.addScanner(new LinkScanner("-l"));

    parseAndAssertNodeCount(1);
    assertTrue("Node should be a HTMLLinkTag", node[0] instanceof LinkTag);
    link = (LinkTag) node[0];
    assertTrue("bug #738504 MailLink != HTTPLink", !link.isHTTPLink());
View Full Code Here

  public void testScan() throws ParserException {
    createParser(
        "<html><head><TITLE>test page</TITLE><BASE HREF=\"http://www.abc.com/\"><a href=\"home.cfm\">Home</a>...</html>",
        "http://www.google.com/test/index.html");
    LinkScanner linkScanner = new LinkScanner("-l");
    parser.addScanner(linkScanner);
    parser.addScanner(new TitleScanner("-t"));
    parser.addScanner(linkScanner.createBaseHREFScanner("-b"));
    parseAndAssertNodeCount(7);
    // Base href tag should be the 4th tag
    assertTrue(node[3] instanceof BaseHrefTag);
    BaseHrefTag baseRefTag = (BaseHrefTag) node[3];
    assertEquals("Base HREF Url", "http://www.abc.com", baseRefTag.getBaseUrl());
View Full Code Here

  private Parser parser;

  public UrlModifyingVisitor(Parser parser, String linkPrefix) {
    super(true, false);
    this.parser = parser;
    LinkScanner linkScanner = new LinkScanner();
    parser.addScanner(linkScanner);
    parser.addScanner(linkScanner.createImageScanner(ImageTag.IMAGE_TAG_FILTER));
    this.linkPrefix = linkPrefix;
    modifiedResult = new StringBuffer();
  }
View Full Code Here

    if (scanners.size() > 0) {
      System.err.println("registerScanners() should be called first, when no other scanner has been registered.");
      System.err.println("Other scanners already exist, hence this method call wont have any effect");
      return;
    }
    LinkScanner linkScanner = new LinkScanner(LinkTag.LINK_TAG_FILTER);
    // Note - The BaseHREF and Image scanners share the same
    // link processor - internally linked up with the factory
    // method in the link scanner class
    addScanner(linkScanner);
    addScanner(linkScanner.createImageScanner(ImageTag.IMAGE_TAG_FILTER));
    addScanner(new ScriptScanner("-s"));
    addScanner(new StyleScanner("-t"));
    addScanner(new JspScanner("-j"));
    addScanner(new AppletScanner("-a"));
    addScanner(new MetaTagScanner("-m"));
    addScanner(new TitleScanner("-T"));
    addScanner(new DoctypeScanner("-d"));
    addScanner(new FormScanner("-f", this));
    addScanner(new FrameSetScanner("-r"));
    addScanner(linkScanner.createBaseHREFScanner("-b"));
    addScanner(new BulletListScanner("-bulletList", this));
    // addScanner(new SpanScanner("-p"));
    addScanner(new DivScanner("-div"));
    addScanner(new TableScanner(this));
  }
View Full Code Here

    return new Parser(reader);
  }

  public static Parser createLinkRecognizingParser(String inputHTML) {
    Parser parser = createParser(inputHTML);
    parser.addScanner(new LinkScanner(LinkTag.LINK_TAG_FILTER));
    return parser;
  }
View Full Code Here

        + "&nbsp;&#8226;&nbsp;<a href=/advanced_search?hl=en>Advanced&nbsp;Search</a><br>&nbsp;&#8226;"
        + "&nbsp;<a href=/preferences?hl=en>Preferences</a><br>&nbsp;&#8226;&nbsp;<a href=/"
        + "language_tools?hl=en>Language Tools</a></font></td></tr></table></form>");

    parser.addScanner(new FormScanner("", parser));
    parser.addScanner(new LinkScanner());
    parseAndAssertNodeCount(1);
    assertTrue("Should be a HTMLFormTag", node[0] instanceof FormTag);
    FormTag formTag = (FormTag) node[0];
    LinkTag[] linkTag = new LinkTag[10];
    int i = 0;
View Full Code Here

    createParser("<A HREF=\"http://www.oygevalt.org/\">Home</A>\n" + "<P>\n" + "And now, the good stuff:\n"
        + "<P>\n" + "<A HREF=\"http://www.yahoo.com\">Yahoo!\n" + "<FORM ACTION=\".\" METHOD=\"GET\">\n"
        + "<INPUT TYPE=\"TEXT\">\n" + "<BR>\n" + "<A HREF=\"http://www.helpme.com\">Help</A> "
        + "<INPUT TYPE=\"checkbox\">\n" + "<P>\n" + "<INPUT TYPE=\"SUBMIT\">\n" + "</FORM>");
    parser.addScanner(new FormScanner("", parser));
    parser.addScanner(new LinkScanner());
    parseAndAssertNodeCount(6);
    assertTrue("Fifth Node is a link", node[4] instanceof LinkTag);
    LinkTag linkTag = (LinkTag) node[4];
    assertEquals("Link Text", "Yahoo!\r\n", linkTag.getLinkText());
    assertEquals("Link URL", "http://www.yahoo.com", linkTag.getLink());
View Full Code Here

TOP

Related Classes of org.htmlparser.scanners.LinkScanner

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.