String val = attributes.group(2);
// we will accept href in case of <A>
if ("a".equals(tag) && "href".equals(attr)) { // <a href="......">
String[] customSchemes = {"http", "https"};
if (new UrlValidator(customSchemes).isValid(val)) {
foundURL = true;
} else {
// may be it is a mailto?
// case <a href="mailto:pippo@pippo.com?subject=...."
if (val.toLowerCase().startsWith("mailto:") && val.indexOf("@") >= 0) {
String val1 = "http://www." + val.substring(val.indexOf("@") + 1);
if (new UrlValidator(customSchemes).isValid(val1)) {
foundURL = true;
} else {
ret.invalidTags.add(attr + " " + val);
val = "";
}
} else {
ret.invalidTags.add(attr + " " + val);
val = "";
}
}
} else if (tag.matches("img|embed") && "src".equals(attr)) { // <img src="......">
String[] customSchemes = {"http", "https"};
if (new UrlValidator(customSchemes).isValid(val)) {
foundURL = true;
} else {
ret.invalidTags.add(attr + " " + val);
val = "";
}
} else if ("href".equals(attr) || "src".equals(attr)) { // <tag src/href="......"> skipped
ret.invalidTags.add(tag + " " + attr + " " + val);
continue;
} else if (attr.matches("width|height")) { // <tag width/height="......">
if (!val.toLowerCase().matches("\\d+%|\\d+$")) { // test numeric values
ret.invalidTags.add(tag + " " + attr + " " + val);
continue;
}
} else if ("style".equals(attr)) { // <tag style="......">
// then test properties
Matcher styles = stylePattern.matcher(val);
String cleanStyle = "";
while (styles.find()) {
String styleName = styles.group(1).toLowerCase();
String styleValue = styles.group(2);
// suppress invalid styles values
if (forbiddenStylePattern.matcher(styleValue).find()) {
ret.invalidTags.add(tag + " " + attr + " " + styleValue);
continue;
}
// check if valid url
Matcher urlStyleMatcher = urlStylePattern.matcher(styleValue);
if (urlStyleMatcher.find()) {
String[] customSchemes = {"http", "https"};
String url = urlStyleMatcher.group(1);
if (!new UrlValidator(customSchemes).isValid(url)) {
ret.invalidTags.add(tag + " " + attr + " " + styleValue);
continue;
}
}