}, would return {@code
743744745746747748749750751752753
for (Element element : elements) { element.wrap(wrapHtml); } return body.html(); } else { // nothing to update return content; } }
771772773774775776777778779780781
if (elements.size() > 0) { for (Element element : elements) { element.remove(); } return body.html(); } else { // nothing changed return content; } }
10451046104710481049105010511052105310541055
String headingId = generateUniqueId(ids, headingSlug); heading.attr("id", headingId); } return body.html(); } else { // nothing to update return content; } }
717273747576777879
* @return */ public static String removeTag(String html, String tagName) { Element bodyElement = Jsoup.parse(html).body(); bodyElement.getElementsByTag(tagName).remove(); return bodyElement.html(); } }
273274275276277278279280281282283
{ Element td = elemLin.parent(); Elements elems = td.select("a:matches(Movie)"); if (elems.size() == 0) throw new DownloadFailedException(PbnTools.getStr("error.noMovie", td.html()), m_ow, false); Element movieElem = elems.get(0); String sOnClick = movieElem.attr("onclick"); if (sOnClick.length() == 0) throw new DownloadFailedException(PbnTools.getStr("error.noAttr", "onclick", movieElem.outerHtml()), m_ow, false);
4041424344454647484950
//The images are located in the 'script' part of the html Element script = doc.select("script").first(); Pattern p = Pattern.compile("\"cdnUrl\":\"(.*?)\","); Matcher m = p.matcher(script.html()); while (m.find()) { String imgUrl = m.group(1); //System.out.print(imgUrl + " -> ");
221222223224225226227228229230231
private void fixNode429() { if (! (origFile.getName().equals("node429.html") && pageTitle.contains("unix"))) return; SimpleLogger.debug("Fixing buggy heading"); Element buggyParagraph = findFirstElement(Selector.NODE429_BUGGY_PARAGRAPH); buggyParagraph.html("<h1><a>unix</a></h1>"); } private void removeClutterAroundMainContent() { // Keep JavaScript for source code colouring ('prettyPrint' function) in some books // deleteNodes(Selector.SCRIPTS);
216217218219220221222223224225226
clean.outputSettings(new OutputSettings().escapeMode(EscapeMode.base).prettyPrint(false)); Element body = clean.body(); if (keepTextOnly) { content = body.text(); } else { content = body.html(); } } return content; }
111112113114115116117118119120121
} else if (node instanceof Element) { Element enode = (Element) node; String tagname = enode.tagName(); String text = cleanOutControlChars(enode.text()); String outerHTML = cleanOutControlChars(enode.outerHtml()); String innerHTML = cleanOutControlChars(enode.html()); Attributes attrs = enode.attributes(); Map<String, String> attributeMap = new HashMap<String, String>(); for (Attribute attr : attrs) { attributeMap.put(attr.getKey(), attr.getValue()); }
4647484950515253545556
String methodName = docFile.getName().substring(0, docFile.getName().indexOf('_')); //System.out.println(methodName); for (Iterator it = elm.iterator(); it.hasNext();) { Element ele = (Element) it.next(); msg = "<html><body> <strong><div style=\"width: 300px; text-justification: justify;\"></strong><table cellpadding=\"0\" cellspacing=\"0\" border=\"0\" class=\"ref-item\">" + ele.html() + "</table></div></html></body></html>"; //mat.replaceAll(""); msg = msg.replaceAll("img src=\"", "img src=\"" + p5Ref.toURI().toURL().toString() + "/"); //System.out.println(ele.text()); }