Package org.htmlparser.scanners

Examples of org.htmlparser.scanners.MetaTagScanner


    {
        super.setUp();
        createParser("<html><head><title>Yahoo!</title><base href=http://www.yahoo.com/ target=_top><meta http-equiv=\"PICS-Label\" content='(PICS-1.1 \"http://www.icra.org/ratingsv02.html\" l r (cz 1 lz 1 nz 1 oz 1 vz 1) gen true for \"http://www.yahoo.com\" r (cz 1 lz 1 nz 1 oz 1 vz 1) \"http://www.rsac.org/ratingsv01.html\" l r (n 0 s 0 v 0 l 0) gen true for \"http://www.yahoo.com\" r (n 0 s 0 v 0 l 0))'><style>a.h{background-color:#ffee99}</style></head>");
        parser.addScanner(new TitleScanner("-t"));
        parser.addScanner(new StyleScanner("-s"));
        parser.addScanner(new MetaTagScanner("-m"));
        parseAndAssertNodeCount(7);
        assertTrue(node[2] instanceof TitleTag);
        titleTag = (TitleTag) node[2];
    }
View Full Code Here


                + "<META name=\"keywords\" content=\"SpamCop spam cop email filter abuse header headers parse parser utility script net net-abuse filter mail program system trace traceroute dns\">\n"
                + "<META name=\"language\" content=\"en\">\n"
                + "<META name=\"owner\" content=\"service@admin.spamcop.net\">\n"
                + "<META HTTP-EQUIV=\"content-type\" CONTENT=\"text/html; charset=ISO-8859-1\">",
            "http://www.google.com/test/index.html");
        MetaTagScanner scanner = new MetaTagScanner("-t");
        parser.addScanner(scanner);

        parseAndAssertNodeCount(11);
        assertTrue("Node 5 should be End Tag", node[5] instanceof EndTag);
        assertTrue("Node 6 should be META Tag", node[6] instanceof MetaTag);
View Full Code Here

    public void testScanTagsInMeta() throws ParserException
    {
        createParser(
            "<META NAME=\"Description\" CONTENT=\"Ethnoburb </I>versus Chinatown: Two Types of Urban Ethnic Communities in Los Angeles\">",
            "http://www.google.com/test/index.html");
        MetaTagScanner scanner = new MetaTagScanner("-t");
        parser.addScanner(scanner);
        parseAndAssertNodeCount(1);
        assertTrue("Node should be meta tag", node[0] instanceof MetaTag);
        MetaTag metaTag = (MetaTag) node[0];
        assertEquals("Meta Tag Name", "Description", metaTag.getMetaTagName());
View Full Code Here

    {
        createParser("<html><head><title>Yahoo!</title><base href=http://www.yahoo.com/ target=_top><meta http-equiv=\"PICS-Label\" content='(PICS-1.1 \"http://www.icra.org/ratingsv02.html\" l r (cz 1 lz 1 nz 1 oz 1 vz 1) gen true for \"http://www.yahoo.com\" r (cz 1 lz 1 nz 1 oz 1 vz 1) \"http://www.rsac.org/ratingsv01.html\" l r (n 0 s 0 v 0 l 0) gen true for \"http://www.yahoo.com\" r (n 0 s 0 v 0 l 0))'><style>a.h{background-color:#ffee99}</style></head>");
        TitleScanner titleScanner = new TitleScanner("-t");
        parser.addScanner(titleScanner);
        parser.addScanner(new StyleScanner("-s"));
        parser.addScanner(new MetaTagScanner("-m"));
        parseAndAssertNodeCount(7);
        assertTrue(node[2] instanceof TitleTag);
        // check the title node
        TitleTag titleTag = (TitleTag) node[2];
        assertEquals("Title", "Yahoo!", titleTag.getTitle());
View Full Code Here

        if (null != url_conn)
            try
            {
                if (null == scanners.get("-m"))
                {
                    addScanner(new MetaTagScanner("-m"));
                    remove_scanner = true;
                }

                /* pre-read up to </HEAD> looking for charset directive */
                while (null != (node = ret.peek()))
View Full Code Here

        addScanner(linkScanner.createImageScanner(ImageTag.IMAGE_TAG_FILTER));
        addScanner(new ScriptScanner("-s"));
        addScanner(new StyleScanner("-t"));
        addScanner(new JspScanner("-j"));
        addScanner(new AppletScanner("-a"));
        addScanner(new MetaTagScanner("-m"));
        addScanner(new TitleScanner("-T"));
        addScanner(new DoctypeScanner("-d"));
        addScanner(new FormScanner("-f", this));
        addScanner(new FrameSetScanner("-r"));
        addScanner(linkScanner.createBaseHREFScanner("-b"));
View Full Code Here

  protected void setUp() throws Exception {
    super.setUp();
    createParser("<html><head><title>Yahoo!</title><base href=http://www.yahoo.com/ target=_top><meta http-equiv=\"PICS-Label\" content='(PICS-1.1 \"http://www.icra.org/ratingsv02.html\" l r (cz 1 lz 1 nz 1 oz 1 vz 1) gen true for \"http://www.yahoo.com\" r (cz 1 lz 1 nz 1 oz 1 vz 1) \"http://www.rsac.org/ratingsv01.html\" l r (n 0 s 0 v 0 l 0) gen true for \"http://www.yahoo.com\" r (n 0 s 0 v 0 l 0))'><style>a.h{background-color:#ffee99}</style></head>");
    parser.addScanner(new TitleScanner("-t"));
    parser.addScanner(new StyleScanner("-s"));
    parser.addScanner(new MetaTagScanner("-m"));
    parseAndAssertNodeCount(7);
    assertTrue(node[2] instanceof TitleTag);
    titleTag = (TitleTag) node[2];
  }
View Full Code Here

            + "<META name=\"keywords\" content=\"SpamCop spam cop email filter abuse header headers parse parser utility script net net-abuse filter mail program system trace traceroute dns\">\n"
            + "<META name=\"language\" content=\"en\">\n"
            + "<META name=\"owner\" content=\"service@admin.spamcop.net\">\n"
            + "<META HTTP-EQUIV=\"content-type\" CONTENT=\"text/html; charset=ISO-8859-1\">",
        "http://www.google.com/test/index.html");
    MetaTagScanner scanner = new MetaTagScanner("-t");
    parser.addScanner(scanner);

    parseAndAssertNodeCount(11);
    assertTrue("Node 5 should be End Tag", node[5] instanceof EndTag);
    assertTrue("Node 6 should be META Tag", node[6] instanceof MetaTag);
View Full Code Here

  public void testScanTagsInMeta() throws ParserException {
    createParser(
        "<META NAME=\"Description\" CONTENT=\"Ethnoburb </I>versus Chinatown: Two Types of Urban Ethnic Communities in Los Angeles\">",
        "http://www.google.com/test/index.html");
    MetaTagScanner scanner = new MetaTagScanner("-t");
    parser.addScanner(scanner);
    parseAndAssertNodeCount(1);
    assertTrue("Node should be meta tag", node[0] instanceof MetaTag);
    MetaTag metaTag = (MetaTag) node[0];
    assertEquals("Meta Tag Name", "Description", metaTag.getMetaTagName());
View Full Code Here

    String charset;
    EndTag end;
    if (null != url_conn)
      try {
        if (null == scanners.get("-m")) {
          addScanner(new MetaTagScanner("-m"));
          remove_scanner = true;
        }

        /* pre-read up to </HEAD> looking for charset directive */
        while (null != (node = ret.peek())) {
View Full Code Here

TOP

Related Classes of org.htmlparser.scanners.MetaTagScanner

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.