Examples of org.htmlparser.tags.LinkTag.extractLink()

org.htmlparser.tags.LinkTag.extractLink()
Extract the link from the HREF attribute. @return The URL from the HREF attibute. This is absolute if the tag hasa valid page.

            // Process any tag/node in your HTML 
            String name = tag.getTagName();
            // Set the Link's target to _blank if the href is external
            if ("a".equalsIgnoreCase(name)) {
              LinkTag lnk = (LinkTag) tag;
              String sUrl = lnk.extractLink();
                if(sUrl.startsWith("http://") || sUrl.startsWith("https://")) {
                    lnk.setLink(sRedirectorUrl+Gadgets.URLEncode(Base64Encoder.encode(sUrl)));
                }
            }
        }

View Full Code Here


      // Iterate over all links found
      Iterator linksIter = links.iterator();
      while (linksIter.hasNext()) {
        LinkTag currTag = ((LinkTag) linksIter.next());
        String link = CrawlerToolkit.removeAnchor(currTag.extractLink());


        // find urls which do not end with an '/' but are a directory
        link = CrawlerToolkit.completeDirectory(link);


        //link = CrawlerToolkit.toAbsoluteUrl(link, rawDocument.getUrl());

View Full Code Here


      for (int i = 0; i < list.size(); i++)
      {
        LinkTag link = (LinkTag) list.elementAt(i);
        String anchor = link.getLinkText();
        String url = normalizeURL(link.extractLink());


        if (url == null)
        {
          continue;
        }

View Full Code Here

      }


      for(int i = 0; i < list.size(); i++) {
        LinkTag link = (LinkTag) list.elementAt(i);
        String anchor = link.getLinkText();
        String url = link.extractLink();


        if(url == null) {
          continue;
        }

View Full Code Here

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.