@Override
public void head(Node node, int depth) {
if (!(node instanceof Element)) {
return;
}
Element tag = (Element) node;
String tagName = tag.tagName().toLowerCase();
if (tagName.equals(TAG_BODY)) {
extractAttribute(tag, ATT_BACKGROUND);
} else if (tagName.equals(TAG_SCRIPT)) {
extractAttribute(tag, ATT_SRC);
} else if (tagName.equals(TAG_BASE)) {
String baseref = tag.attr(ATT_HREF);
try {
if (!StringUtils.isEmpty(baseref))// Bugzilla 30713
{
baseUrl.url = ConversionUtils.makeRelativeURL(baseUrl.url, baseref);
}
} catch (MalformedURLException e1) {
throw new RuntimeException(e1);
}
} else if (tagName.equals(TAG_IMAGE)) {
extractAttribute(tag, ATT_SRC);
} else if (tagName.equals(TAG_APPLET)) {
extractAttribute(tag, ATT_CODE);
} else if (tagName.equals(TAG_OBJECT)) {
extractAttribute(tag, ATT_CODEBASE);
extractAttribute(tag, ATT_DATA);
} else if (tagName.equals(TAG_INPUT)) {
// we check the input tag type for image
if (ATT_IS_IMAGE.equalsIgnoreCase(tag.attr(ATT_TYPE))) {
// then we need to download the binary
extractAttribute(tag, ATT_SRC);
}
} else if (tagName.equals(TAG_SCRIPT)) {
extractAttribute(tag, ATT_SRC);
// Bug 51750
} else if (tagName.equals(TAG_FRAME) || tagName.equals(TAG_IFRAME)) {
extractAttribute(tag, ATT_SRC);
} else if (tagName.equals(TAG_EMBED)) {
extractAttribute(tag, ATT_SRC);
} else if (tagName.equals(TAG_BGSOUND)){
extractAttribute(tag, ATT_SRC);
} else if (tagName.equals(TAG_LINK)) {
// Putting the string first means it works even if the attribute is null
if (STYLESHEET.equalsIgnoreCase(tag.attr(ATT_REL))) {
extractAttribute(tag, ATT_HREF);
}
} else {
extractAttribute(tag, ATT_BACKGROUND);
}
// Now look for URLs in the STYLE attribute
String styleTagStr = tag.attr(ATT_STYLE);
if(styleTagStr != null) {
HtmlParsingUtils.extractStyleURLs(baseUrl.url, urls, styleTagStr);
}
}