Package org.htmlparser.scanners

Examples of org.htmlparser.scanners.TitleScanner


    createParser(
        "<html><head><TITLE>test page</TITLE><BASE HREF=\"http://www.abc.com/\"><a href=\"home.cfm\">Home</a>...</html>",
        "http://www.google.com/test/index.html");
    LinkScanner linkScanner = new LinkScanner("-l");
    parser.addScanner(linkScanner);
    parser.addScanner(new TitleScanner("-t"));
    parser.addScanner(linkScanner.createBaseHREFScanner("-b"));
    parseAndAssertNodeCount(7);
    // Base href tag should be the 4th tag
    assertTrue(node[3] instanceof BaseHrefTag);
    BaseHrefTag baseRefTag = (BaseHrefTag) node[3];
View Full Code Here


    addScanner(new ScriptScanner("-s"));
    addScanner(new StyleScanner("-t"));
    addScanner(new JspScanner("-j"));
    addScanner(new AppletScanner("-a"));
    addScanner(new MetaTagScanner("-m"));
    addScanner(new TitleScanner("-T"));
    addScanner(new DoctypeScanner("-d"));
    addScanner(new FormScanner("-f", this));
    addScanner(new FrameSetScanner("-r"));
    addScanner(linkScanner.createBaseHREFScanner("-b"));
    addScanner(new BulletListScanner("-bulletList", this));
View Full Code Here

    super(name);
  }

  public void testScan() throws ParserException {
    createParser("<html><head><title>Yahoo!</title><base href=http://www.yahoo.com/ target=_top><meta http-equiv=\"PICS-Label\" content='(PICS-1.1 \"http://www.icra.org/ratingsv02.html\" l r (cz 1 lz 1 nz 1 oz 1 vz 1) gen true for \"http://www.yahoo.com\" r (cz 1 lz 1 nz 1 oz 1 vz 1) \"http://www.rsac.org/ratingsv01.html\" l r (n 0 s 0 v 0 l 0) gen true for \"http://www.yahoo.com\" r (n 0 s 0 v 0 l 0))'><style>a.h{background-color:#ffee99}</style></head>");
    TitleScanner titleScanner = new TitleScanner("-t");
    parser.addScanner(titleScanner);
    parser.addScanner(new StyleScanner("-s"));
    parser.addScanner(new MetaTagScanner("-m"));
    parseAndAssertNodeCount(7);
    assertTrue(node[2] instanceof TitleTag);
View Full Code Here

   * Testcase to reproduce a bug reported by Cedric Rosa, on not ending the
   * title tag correctly, we would get null pointer exceptions..
   */
  public void testIncompleteTitle() throws ParserException {
    createParser("<TITLE>SISTEMA TERRA, VOL. VI , No. 1-3, December 1997</TITLE\n" + "</HEAD>");
    TitleScanner titleScanner = new TitleScanner("-t");
    parser.addScanner(titleScanner);
    parseAndAssertNodeCount(2);
    assertTrue("First Node is a title tag", node[0] instanceof TitleTag);
    TitleTag titleTag = (TitleTag) node[0];
    assertEquals("Title", "SISTEMA TERRA, VOL. VI , No. 1-3, December 1997", titleTag.getTitle());
View Full Code Here

   * was reported by Claude Duguay
   */
  public void testDoubleTitleTag() throws ParserException {
    createParser("<html><head><TITLE>\n" + "<html><head><TITLE>\n" + "Double tags can hang the code\n"
        + "</TITLE></head><body>\n" + "<body><html>");
    TitleScanner titleScanner = new TitleScanner("-t");
    parser.addScanner(titleScanner);
    parseAndAssertNodeCount(7);
    assertTrue("Third tag should be a title tag", node[2] instanceof TitleTag);
    TitleTag titleTag = (TitleTag) node[2];
    assertEquals("Title", "Double tags can hang the code\r\n", titleTag.getTitle());
View Full Code Here

   * Testcase based on Claude Duguay's report. This proves that the parser
   * throws exceptions when faced with malformed html
   */
  public void testNoEndTitleTag() throws ParserException {
    createParser("<TITLE>KRP VALIDATION<PROCESS/TITLE>");
    TitleScanner titleScanner = new TitleScanner("-t");
    parser.addScanner(titleScanner);
    parseAndAssertNodeCount(1);
    TitleTag titleTag = (TitleTag) node[0];
    assertEquals("Expected title", "KRP VALIDATION", titleTag.getTitle());
  }
View Full Code Here

TOP

Related Classes of org.htmlparser.scanners.TitleScanner

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.