HTMLTagBalancer tagBalancer = new HTMLTagBalancer();
DocumentHandler handler = new DocumentHandler(source);
tagBalancer.setDocumentHandler(handler);
htmlScanner.setDocumentHandler(tagBalancer);
HTMLConfiguration config = new HTMLConfiguration();
// Maintain original case for elements and attributes
config.setProperty("http://cyberneko.org/html/properties/names/elems", "match");
config.setProperty("http://cyberneko.org/html/properties/names/attrs", "no-change");
// Parse as fragment.
config.setFeature("http://cyberneko.org/html/features/balance-tags/document-fragment", true);
// Get notified of entity and character references
config.setFeature("http://apache.org/xml/features/scanner/notify-char-refs", true);
config.setFeature("http://cyberneko.org/html/features/scanner/notify-builtin-refs", true);
tagBalancer.reset(config);
htmlScanner.reset(config);
XMLInputSource inputSource = new XMLInputSource(null, null, null);
inputSource.setEncoding("UTF-8");
inputSource.setCharacterStream(new StringReader(source));