super(htmlToXmlDef);
this.htmlToXmlDef = htmlToXmlDef;
}
public Variable execute(Scraper scraper, ScraperContext context) {
Variable body = getBodyTextContent(htmlToXmlDef, scraper, context);
HtmlCleaner cleaner = new HtmlCleaner();
CleanerProperties properties = cleaner.getProperties();
final ScriptEngine scriptEngine = scraper.getScriptEngine();
final String advancedXmlEscape = BaseTemplater.execute( htmlToXmlDef.getAdvancedXmlEscape(), scriptEngine);
if ( advancedXmlEscape != null) {
properties.setAdvancedXmlEscape(CommonUtil.isBooleanTrue(advancedXmlEscape) );
}
final String cdataForScriptAndStyle = BaseTemplater.execute( htmlToXmlDef.getUseCdataForScriptAndStyle(), scriptEngine);
if ( cdataForScriptAndStyle != null) {
properties.setUseCdataForScriptAndStyle(CommonUtil.isBooleanTrue(cdataForScriptAndStyle) );
}
final String specialEntities = BaseTemplater.execute( htmlToXmlDef.getTranslateSpecialEntities(), scriptEngine);
if ( specialEntities != null) {
properties.setTranslateSpecialEntities(CommonUtil.isBooleanTrue(specialEntities) );
}
final String recognizeUnicodeChars = BaseTemplater.execute(htmlToXmlDef.getRecognizeUnicodeChars(), scriptEngine);
if ( recognizeUnicodeChars != null) {
properties.setRecognizeUnicodeChars( CommonUtil.isBooleanTrue(recognizeUnicodeChars) );
}
final String omitUnknownTags = BaseTemplater.execute(htmlToXmlDef.getOmitUnknownTags(), scriptEngine);
if ( omitUnknownTags != null) {
properties.setOmitUnknownTags( CommonUtil.isBooleanTrue(omitUnknownTags) );
}
final String useEmptyElementTags = BaseTemplater.execute(htmlToXmlDef.getUseEmptyElementTags(), scriptEngine);
if ( useEmptyElementTags != null) {
properties.setUseEmptyElementTags( CommonUtil.isBooleanTrue(useEmptyElementTags) );
}
final String treatUnknownTagsAsContent = BaseTemplater.execute(htmlToXmlDef.getTreatUnknownTagsAsContent(), scriptEngine);
if ( treatUnknownTagsAsContent != null) {
properties.setTreatUnknownTagsAsContent( CommonUtil.isBooleanTrue(treatUnknownTagsAsContent) );
}
final String omitDeprecatedTags = BaseTemplater.execute(htmlToXmlDef.getOmitDeprecatedTags(), scriptEngine);
if ( omitDeprecatedTags != null) {
properties.setOmitDeprecatedTags( CommonUtil.isBooleanTrue(omitDeprecatedTags) );
}
final String treatDeprTagsAsContent = BaseTemplater.execute(htmlToXmlDef.getTreatDeprecatedTagsAsContent(), scriptEngine);
if ( treatDeprTagsAsContent != null) {
properties.setTreatDeprecatedTagsAsContent( CommonUtil.isBooleanTrue(treatDeprTagsAsContent) );
}
final String omitXmlDecl = BaseTemplater.execute(htmlToXmlDef.getOmitXmlDecl(), scriptEngine);
if ( omitXmlDecl != null) {
properties.setOmitXmlDeclaration( CommonUtil.isBooleanTrue(omitXmlDecl) );
}
final String omitComments = BaseTemplater.execute(htmlToXmlDef.getOmitComments(), scriptEngine);
if ( omitComments != null) {
properties.setOmitComments( CommonUtil.isBooleanTrue(omitComments) );
}
final String omitHtmlEnvelope = BaseTemplater.execute(htmlToXmlDef.getOmitHtmlEnvelope(), scriptEngine);
if ( omitHtmlEnvelope != null) {
properties.setOmitHtmlEnvelope( CommonUtil.isBooleanTrue(omitHtmlEnvelope) );
}
final String allowMultiWordAttributes = BaseTemplater.execute(htmlToXmlDef.getAllowMultiWordAttributes(), scriptEngine);
if ( allowMultiWordAttributes != null) {
properties.setAllowMultiWordAttributes( CommonUtil.isBooleanTrue(allowMultiWordAttributes) );
}
final String allowHtmlInsideAttributes = BaseTemplater.execute(htmlToXmlDef.getAllowHtmlInsideAttributes(), scriptEngine);
if ( allowHtmlInsideAttributes != null) {
properties.setAllowHtmlInsideAttributes( CommonUtil.isBooleanTrue(allowHtmlInsideAttributes) );
}
final String namespacesAware = BaseTemplater.execute(htmlToXmlDef.getNamespacesAware(), scriptEngine);
if ( namespacesAware != null) {
properties.setNamespacesAware( CommonUtil.isBooleanTrue(namespacesAware) );
} else {
properties.setNamespacesAware(false);
}
final String hyphenReplacement = BaseTemplater.execute(htmlToXmlDef.getHyphenReplacement(), scriptEngine);
if ( hyphenReplacement != null) {
properties.setHyphenReplacementInComment(hyphenReplacement);
}
final String pruneTags = BaseTemplater.execute(htmlToXmlDef.getPrunetags(), scriptEngine);
if ( pruneTags != null) {
properties.setPruneTags(pruneTags);
}
final String booleanAtts = BaseTemplater.execute(htmlToXmlDef.getBooleanAtts(), scriptEngine);
if ( booleanAtts != null) {
properties.setBooleanAttributeValues(booleanAtts);
}
String outputType = BaseTemplater.execute(htmlToXmlDef.getOutputType(), scriptEngine);
try {
TagNode node = cleaner.clean(body.toString());
String result;
if ( "simple".equalsIgnoreCase(outputType) ) {
result = new SimpleXmlSerializer(properties).getXmlAsString(node);
} else if ( "pretty".equalsIgnoreCase(outputType) ) {