Package org.htmlparser.scanners

Examples of org.htmlparser.scanners.LinkScanner


  /**
   * This is the reproduction of a bug which produces multiple text copies.
   */
  public void testExtractLinkInvertedCommasBug2() throws ParserException {
    createParser("<a href=\"http://cbc.ca/artsCanada/stories/greatnorth271202\" class=\"lgblacku\">Vancouver schools plan 'Great Northern Way'</a>");
    parser.addScanner(new LinkScanner("-l"));
    parseAndAssertNodeCount(1);
    assertTrue("The node should be a link tag", node[0] instanceof LinkTag);
    LinkTag linkTag = (LinkTag) node[0];
    assertStringEquals("Extracted Text", "Vancouver schools plan 'Great Northern Way'", linkTag.getLinkText());
  }
View Full Code Here


   * Bug pointed out by Sam Joseph (sam@neurogrid.net) Links with spaces in
   * them will get their spaces absorbed
   */
  public void testLinkSpacesBug() throws ParserException {
    createParser("<a href=\"http://www.kizna.com/servlets/SomeServlet?name=Sam Joseph\">Click Here</A>");
    parser.addScanner(new LinkScanner("-l"));
    parseAndAssertNodeCount(1);
    assertTrue("The node should be a link tag", node[0] instanceof LinkTag);
    LinkTag linkTag = (LinkTag) node[0];
    assertEquals("Link URL of link tag", "http://www.kizna.com/servlets/SomeServlet?name=Sam Joseph", linkTag
        .getLink());
View Full Code Here

  public void testMultipleLineBug() throws ParserException {
    createParser("<LI><font color=\"FF0000\" size=-1><b>Tech Samachar:</b></font><a \n"
        + "href=\"http://ads.samachar.com/bin/redirect/tech.txt?http://www.samachar.com/tech\n"
        + "nical.html\"> Journalism 3.0</a> by Rajesh Jain");
    Parser.setLineSeparator("\r\n");
    parser.addScanner(new LinkScanner("-l"));
    parseAndAssertNodeCount(8);
    assertTrue("Seventh node should be a link tag", node[6] instanceof LinkTag);
    LinkTag linkTag = (LinkTag) node[6];
    String exp = new String("http://ads.samachar.com/bin/redirect/tech.txt?http://www.samachar.com/technical.html");
    // assertEquals("Length of link tag",exp.length(),
View Full Code Here

  }

  public void testRelativeLinkScan() throws ParserException {
    createParser("<A HREF=\"mytest.html\"> Hello World</A>", "http://www.yahoo.com");
    // Register the image scanner
    parser.addScanner(new LinkScanner("-l"));
    parseAndAssertNodeCount(1);
    assertTrue("Node identified should be HTMLLinkTag", node[0] instanceof LinkTag);
    LinkTag linkTag = (LinkTag) node[0];
    assertEquals("Expected Link", "http://www.yahoo.com/mytest.html", linkTag.getLink());
  }
View Full Code Here

  }

  public void testRelativeLinkScan2() throws ParserException {
    createParser("<A HREF=\"abc/def/mytest.html\"> Hello World</A>", "http://www.yahoo.com");
    // Register the image scanner
    parser.addScanner(new LinkScanner("-l"));
    parseAndAssertNodeCount(1);
    assertTrue("Node identified should be HTMLLinkTag", node[0] instanceof LinkTag);
    LinkTag linkTag = (LinkTag) node[0];
    assertStringEquals("Expected Link", "http://www.yahoo.com/abc/def/mytest.html", linkTag.getLink());
  }
View Full Code Here

  }

  public void testRelativeLinkScan3() throws ParserException {
    createParser("<A HREF=\"../abc/def/mytest.html\"> Hello World</A>", "http://www.yahoo.com/ghi");
    // Register the image scanner
    parser.addScanner(new LinkScanner("-l"));
    parseAndAssertNodeCount(1);
    assertTrue("Node identified should be HTMLLinkTag", node[0] instanceof LinkTag);
    LinkTag linkTag = (LinkTag) node[0];
    assertStringEquals("Expected Link", "http://www.yahoo.com/abc/def/mytest.html", linkTag.getLink());
  }
View Full Code Here

   * Test scan with data which is of diff nodes type
   */
  public void testScan() throws ParserException {
    createParser("<A HREF=\"mytest.html\"><IMG SRC=\"abcd.jpg\">Hello World</A>", "http://www.yahoo.com");
    // Register the image scanner
    LinkScanner linkScanner = new LinkScanner("-l");
    parser.addScanner(linkScanner);
    parser.addScanner(linkScanner.createImageScanner("-i"));

    parseAndAssertNodeCount(1);
    assertTrue("Node should be a link node", node[0] instanceof LinkTag);

    LinkTag linkTag = (LinkTag) node[0];
View Full Code Here

  }

  public void testReplaceFaultyTagWithEndTag() throws ParserException {
    String currentLine = "<p>Site Comments?<br><a href=\"mailto:sam@neurogrid.com?subject=Site Comments\">Mail Us<a></p>";
    Tag tag = new Tag(new TagData(85, 87, "a", currentLine));
    LinkScanner linkScanner = new LinkScanner();
    String newLine = linkScanner.replaceFaultyTagWithEndTag(tag, currentLine);
    assertEquals("Expected replacement",
        "<p>Site Comments?<br><a href=\"mailto:sam@neurogrid.com?subject=Site Comments\">Mail Us</A></p>",
        newLine);
  }
View Full Code Here

  }

  public void testInsertEndTagBeforeTag() throws ParserException {
    String currentLine = "<a href=s/7509><b>Yahoo! Movies</b></a>";
    Tag tag = new Tag(new TagData(0, 14, "a href=s/7509", currentLine));
    LinkScanner linkScanner = new LinkScanner();
    String newLine = linkScanner.insertEndTagBeforeNode(tag, currentLine);
    assertEquals("Expected insertion", "</A><a href=s/7509><b>Yahoo! Movies</b></a>", newLine);
  }
View Full Code Here

   * &lt;A&gt;Revision&lt;\a&gt; Reported by Mazlan Mat
   */
  public void testFreshMeatBug() throws ParserException {
    createParser("<a>Revision</a>", "http://www.yahoo.com");
    // Register the image scanner
    parser.addScanner(new LinkScanner("-l"));

    parseAndAssertNodeCount(3);
    assertTrue("Node 0 should be a tag", node[0] instanceof Tag);
    Tag tag = (Tag) node[0];
    assertEquals("Tag Contents", "a", tag.getText());
View Full Code Here

TOP

Related Classes of org.htmlparser.scanners.LinkScanner

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.