Document document = getDOM();
return new DocumentReport( validator.validate(dURI, document, applyFix), document );
}
private Document parse() throws IOException, SAXException, TransformerException {
final DOMParser parser = new DOMParser() {
private QName currentQName;
private Augmentations currentAugmentations;
@Override
protected Element createElementNode(QName qName) {
final Element created = super.createElementNode(qName);
if (qName.equals(currentQName) && currentAugmentations != null) {
final ElementLocation elementLocation = createElementLocation(
currentAugmentations.getItem(AUGMENTATIONS_FEATURE)
);
created.setUserData(ELEMENT_LOCATION, elementLocation, null);
}
return created;
}
@Override
public void startElement(QName qName, XMLAttributes xmlAttributes, Augmentations augmentations)
throws XNIException {
super.startElement(qName, xmlAttributes, augmentations);
currentQName = qName;
currentAugmentations = augmentations;
}
private ElementLocation createElementLocation(Object obj) {
if(obj == null) return null;
String pattern = null;
try {
pattern = obj.toString();
if( "synthesized".equals(pattern) ) return null;
final String[] parts = pattern.split(":");
return new ElementLocation(
Integer.parseInt(parts[0]),
Integer.parseInt(parts[1]),
Integer.parseInt(parts[3]),
Integer.parseInt(parts[4])
);
} catch (Exception e) {
logger.warn(
String.format("Unexpected string format for given augmentation: [%s]", pattern),
e
);
return null;
}
}
};
parser.setFeature("http://xml.org/sax/features/namespaces", false);
parser.setFeature("http://cyberneko.org/html/features/scanner/script/strip-cdata-delims", true);
parser.setFeature(AUGMENTATIONS_FEATURE, true);
if (this.encoding != null)
parser.setProperty("http://cyberneko.org/html/properties/default-encoding", this.encoding);
/*
* NOTE: the SpanCloserInputStream has been added to wrap the stream passed to the CyberNeko
* parser. This will ensure the correct handling of inline HTML SPAN tags.
* This fix is documented at issue #78.
*/
parser.parse(new InputSource( new SpanCloserInputStream(input)));
return parser.getDocument();
}