Examples of org.htmlparser.tags.LinkTag

org.htmlparser.tags.LinkTag
Identifies a link tag

        return;
      }


      for (int i = 0; i < list.size(); i++)
      {
        LinkTag link = (LinkTag) list.elementAt(i);
        String anchor = link.getLinkText();
        String url = normalizeURL(link.extractLink());


        if (url == null)
        {
          continue;
        }

View Full Code Here

        reporter.incrCounter(LinkCounter.PARSER_FAILED, 1);
        return;
      }


      for(int i = 0; i < list.size(); i++) {
        LinkTag link = (LinkTag) list.elementAt(i);
        String anchor = link.getLinkText();
        String url = link.extractLink();


        if(url == null) {
          continue;
        }

View Full Code Here

                    {
                        // then we need to download the binary
                        binUrlStr= input.getAttribute("src");
                    }
        } else if (node instanceof LinkTag){
          LinkTag link = (LinkTag)node;
          if (link.getChild(0) instanceof ImageTag){
            ImageTag img = (ImageTag)link.getChild(0);
            binUrlStr = img.getImageURL();
          }
        } else if (node instanceof ScriptTag){
          ScriptTag script = (ScriptTag)node;
          binUrlStr = script.getAttribute("src");

View Full Code Here

    protected URL[] extractLinks () throws ParserException
    {
        NodeFilter filter;
        NodeList list;
        Vector vector;
        LinkTag link;
        URL[] ret;


        mParser.reset ();
        filter = new NodeClassFilter (LinkTag.class);
        try
        {
            list = mParser.extractAllNodesThatMatch (filter);
        }
        catch (EncodingChangeException ece)
        {
            mParser.reset ();
            list = mParser.extractAllNodesThatMatch (filter);
        }
        vector = new Vector();
        for (int i = 0; i < list.size (); i++)
            try
            {
                link = (LinkTag)list.elementAt (i);
                vector.add(new URL (link.getLink ()));
            }
            catch (MalformedURLException murle)
            {
                //vector.remove (i);
                //i--;

View Full Code Here

          new TagNameFilter("META")
          }
      );
      NodeList list = par.parse( filter );
      System.out.println("Url: " + _url.toString() + " found " + list.size() + " items.");
      LinkTag lt = null; 
      for (SimpleNodeIterator e = list.elements (); e.hasMoreNodes (); ) {
        Node n = e.nextNode();
        if (n.getClass() == LinkTag.class ) {
          lt = (LinkTag)n;
          if ( FileTypes.isAllowedFileType(lt.getLink())) {
            String url = par.getLexer().getPage().getAbsoluteURL(lt.getLink());
            URL c;
            try {
              c = new URL(url);
              Page p = new Page(c.toString());
              if ( _crawler != null ) {

View Full Code Here

                if (ATT_IS_IMAGE.equalsIgnoreCase(tag.getAttribute(ATT_TYPE))) {
                    // then we need to download the binary
                    binUrlStr = tag.getAttribute(ATT_SRC);
                }
            } else if (tag instanceof LinkTag) {
                LinkTag link = (LinkTag) tag;
                if (link.getChild(0) instanceof ImageTag) {
                    ImageTag img = (ImageTag) link.getChild(0);
                    binUrlStr = img.getImageURL();
                }
            } else if (tag instanceof ScriptTag) {
                binUrlStr = tag.getAttribute(ATT_SRC);
            } else if (tag instanceof FrameTag) {

View Full Code Here

                if (ATT_IS_IMAGE.equalsIgnoreCase(tag.getAttribute(ATT_TYPE))) {
                    // then we need to download the binary
                    binUrlStr = tag.getAttribute(ATT_SRC);
                }
            } else if (tag instanceof LinkTag) {
                LinkTag link = (LinkTag) tag;
                if (link.getChild(0) instanceof ImageTag) {
                    ImageTag img = (ImageTag) link.getChild(0);
                    binUrlStr = img.getImageURL();
                }
            } else if (tag instanceof ScriptTag) {
                binUrlStr = tag.getAttribute(ATT_SRC);
            } else if (tag instanceof FrameTag) {

View Full Code Here

    protected URL[] extractLinks () throws ParserException
    {
        NodeFilter filter;
        NodeList list;
        Vector vector;
        LinkTag link;
        URL[] ret;


        mParser.reset ();
        filter = new NodeClassFilter (LinkTag.class);
        try
        {
            list = mParser.extractAllNodesThatMatch (filter);
        }
        catch (EncodingChangeException ece)
        {
            mParser.reset ();
            list = mParser.extractAllNodesThatMatch (filter);
        }
        vector = new Vector();
        for (int i = 0; i < list.size (); i++)
            try
            {
                link = (LinkTag)list.elementAt (i);
                vector.add(new URL (link.getLink ()));
            }
            catch (MalformedURLException murle)
            {
                //vector.remove (i);
                //i--;

View Full Code Here

        registerTag (new HeadingTag ());
        registerTag (new ImageTag ());
        registerTag (new InputTag ());
        registerTag (new JspTag ());
        registerTag (new LabelTag ());
        registerTag (new LinkTag ());
        registerTag (new MetaTag ());
        registerTag (new ObjectTag ());
        registerTag (new OptionTag ());
        registerTag (new ParagraphTag ());
        registerTag (new ProcessingInstructionTag ());

View Full Code Here

        // The first node should be a Text-  with the text - view these documents, you must have
        assertTrue("First node should be a Text",node[0] instanceof Text);
        Text stringNode = (Text)node[0];
        assertEquals("Text of the Text","view these documents, you must have ",stringNode.getText());
        assertTrue("Second node should be a link node",node[1] instanceof LinkTag);
        LinkTag linkNode = (LinkTag)node[1];
        assertEquals("Link is","http://www.adobe.com",linkNode.getLink());
        assertEquals("Link text is","Adobe \nAcrobat Reader",linkNode.getLinkText());


        assertTrue("Third node should be a string node",node[2] instanceof Text);
        Text stringNode2 = (Text)node[2];
        assertEquals("Contents of third node"," installed on your computer.",stringNode2.getText());
    }

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.htmlparser.tags.LinkTag

com.almilli.movierentals.blockbuster.BlockbusterService

com.almilli.movierentals.netflix.NetFlixService

com.knowgate.hipermail.HtmlMimeBodyPart

de.jungblut.crawl.extraction.OutlinkExtractor

edu.umd.cloud9.webgraph.ClueExtractLinks$Map

edu.umd.cloud9.webgraph.TrecExtractLinks$Map

hudson.plugins.rubyMetrics.rcov.RcovParser

hudson.plugins.rubyMetrics.saikuro.SaikuroParser

modelcrawler.Page

net.sf.regain.crawler.preparator.HtmlPreparator

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.