ElementRemover remover = new ElementRemover();
remover.removeElement(REMOVE_SCRIPT);
StringWriter contentWriter = new StringWriter();
Writer writer = new Writer(contentWriter, CHAR_ENCODING);
XMLDocumentFilter[] filters = { remover, writer, };
XMLParserConfiguration parser = new HTMLConfiguration();
parser.setProperty("http://cyberneko.org/html/properties/filters",
filters);
XMLInputSource source = new XMLInputSource(null, null, null,
contentReader, CHAR_ENCODING);
try {
parser.parse(source);
} catch (XNIException e) {
throw new NotIndexableException("Can not parse html -- ", e);
} catch (IOException e) {
throw new NotIndexableException("Can not parse html -- ", e);