config.setFeature("http://cyberneko.org/html/features/balance-tags/document-fragment", true);
// Get notified of entity and character references
config.setFeature("http://apache.org/xml/features/scanner/notify-char-refs", true);
config.setFeature("http://cyberneko.org/html/features/scanner/notify-builtin-refs", true);
tagBalancer.reset(config);
htmlScanner.reset(config);
XMLInputSource inputSource = new XMLInputSource(null, null, null);
inputSource.setEncoding("UTF-8");
inputSource.setCharacterStream(new StringReader(source));
try {
htmlScanner.setInputSource(inputSource);