props.setTranslateSpecialEntities(true);
props.setTransResCharsToNCR(true);
props.setOmitComments(true);
final TagNode tagNode = new HtmlCleaner(props).clean(xml);
final String cleansData = new CompactXmlSerializer(props).getAsString(tagNode);
final SAXParserFactory spf = SAXParserFactory.newInstance();
spf.setNamespaceAware(true);
final SAXParser saxParser = newSaxParser(spf);