/*
* Copyright (c) 2007 Henri Sivonen
* Copyright (c) 2007-2010 Mozilla Foundation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
package nu.validator.htmlparser.sax;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.LinkedList;
import java.util.List;
import java.util.HashMap;
import nu.validator.htmlparser.common.CharacterHandler;
import nu.validator.htmlparser.common.DoctypeExpectation;
import nu.validator.htmlparser.common.DocumentModeHandler;
import nu.validator.htmlparser.common.Heuristics;
import nu.validator.htmlparser.common.TokenHandler;
import nu.validator.htmlparser.common.TransitionHandler;
import nu.validator.htmlparser.common.XmlViolationPolicy;
import nu.validator.htmlparser.impl.ErrorReportingTokenizer;
import nu.validator.htmlparser.impl.Tokenizer;
import nu.validator.htmlparser.impl.TreeBuilder;
import nu.validator.htmlparser.io.Driver;
import nu.validator.saxtree.Document;
import nu.validator.saxtree.DocumentFragment;
import nu.validator.saxtree.TreeParser;
import org.xml.sax.ContentHandler;
import org.xml.sax.DTDHandler;
import org.xml.sax.EntityResolver;
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.SAXNotRecognizedException;
import org.xml.sax.SAXNotSupportedException;
import org.xml.sax.XMLReader;
import org.xml.sax.ext.LexicalHandler;
import org.xml.sax.helpers.DefaultHandler;
/**
* This class implements an HTML5 parser that exposes data through the SAX2
* interface.
*
* <p>By default, when using the constructor without arguments, the
* this parser coerces XML 1.0-incompatible infosets into XML 1.0-compatible
* infosets. This corresponds to <code>ALTER_INFOSET</code> as the general
* XML violation policy. To make the parser support non-conforming HTML fully
* per the HTML 5 spec while on the other hand potentially violating the SAX2
* API contract, set the general XML violation policy to <code>ALLOW</code>.
* It is possible to treat XML 1.0 infoset violations as fatal by setting
* the general XML violation policy to <code>FATAL</code>.
*
* <p>By default, this parser doesn't do true streaming but buffers everything
* first. The parser can be made truly streaming by calling
* <code>setStreamabilityViolationPolicy(XmlViolationPolicy.FATAL)</code>. This
* has the consequence that errors that require non-streamable recovery are
* treated as fatal.
*
* <p>By default, in order to make the parse events emulate the parse events
* for a DTDless XML document, the parser does not report the doctype through
* <code>LexicalHandler</code>. Doctype reporting through
* <code>LexicalHandler</code> can be turned on by calling
* <code>setReportingDoctype(true)</code>.
*
* @version $Id$
* @author hsivonen
*/
public class HtmlParser implements XMLReader {
private Driver driver = null;
private TreeBuilder<?> treeBuilder = null;
private SAXStreamer saxStreamer = null; // work around javac bug
private SAXTreeBuilder saxTreeBuilder = null; // work around javac bug
private ContentHandler contentHandler = null;
private LexicalHandler lexicalHandler = null;
private DTDHandler dtdHandler = null;
private EntityResolver entityResolver = null;
private ErrorHandler errorHandler = null;
private DocumentModeHandler documentModeHandler = null;
private DoctypeExpectation doctypeExpectation = DoctypeExpectation.HTML;
private boolean checkingNormalization = false;
private boolean scriptingEnabled = false;
private final List<CharacterHandler> characterHandlers = new LinkedList<CharacterHandler>();
private XmlViolationPolicy contentSpacePolicy = XmlViolationPolicy.FATAL;
private XmlViolationPolicy contentNonXmlCharPolicy = XmlViolationPolicy.FATAL;
private XmlViolationPolicy commentPolicy = XmlViolationPolicy.FATAL;
private XmlViolationPolicy namePolicy = XmlViolationPolicy.FATAL;
private XmlViolationPolicy streamabilityViolationPolicy = XmlViolationPolicy.ALLOW;
private boolean html4ModeCompatibleWithXhtml1Schemata = false;
private boolean mappingLangToXmlLang = false;
private XmlViolationPolicy xmlnsPolicy = XmlViolationPolicy.FATAL;
private boolean reportingDoctype = true;
private ErrorHandler treeBuilderErrorHandler = null;
private Heuristics heuristics = Heuristics.NONE;
private HashMap<String, String> errorProfileMap = null;
private TransitionHandler transitionHandler = null;
/**
* Instantiates the parser with a fatal XML violation policy.
*
*/
public HtmlParser() {
this(XmlViolationPolicy.FATAL);
}
/**
* Instantiates the parser with a specific XML violation policy.
* @param xmlPolicy the policy
*/
public HtmlParser(XmlViolationPolicy xmlPolicy) {
setXmlPolicy(xmlPolicy);
}
private Tokenizer newTokenizer(TokenHandler handler, boolean newAttributesEachTime) {
if (errorHandler == null && transitionHandler == null &&
contentNonXmlCharPolicy == XmlViolationPolicy.ALLOW) {
return new Tokenizer(handler, newAttributesEachTime);
}
ErrorReportingTokenizer tokenizer =
new ErrorReportingTokenizer(handler, newAttributesEachTime);
tokenizer.setErrorProfile(errorProfileMap);
return tokenizer;
}
/**
* This class wraps different tree builders depending on configuration. This
* method does the work of hiding this from the user of the class.
*/
private void lazyInit() {
if (driver == null) {
if (streamabilityViolationPolicy == XmlViolationPolicy.ALLOW) {
this.saxTreeBuilder = new SAXTreeBuilder();
this.treeBuilder = this.saxTreeBuilder;
this.saxStreamer = null;
this.driver = new Driver(newTokenizer(treeBuilder, true));
} else {
this.saxStreamer = new SAXStreamer();
this.treeBuilder = this.saxStreamer;
this.saxTreeBuilder = null;
this.driver = new Driver(newTokenizer(treeBuilder, false));
}
this.driver.setErrorHandler(errorHandler);
this.driver.setTransitionHandler(transitionHandler);
this.treeBuilder.setErrorHandler(treeBuilderErrorHandler);
this.driver.setCheckingNormalization(checkingNormalization);
this.driver.setCommentPolicy(commentPolicy);
this.driver.setContentNonXmlCharPolicy(contentNonXmlCharPolicy);
this.driver.setContentSpacePolicy(contentSpacePolicy);
this.driver.setHtml4ModeCompatibleWithXhtml1Schemata(html4ModeCompatibleWithXhtml1Schemata);
this.driver.setMappingLangToXmlLang(mappingLangToXmlLang);
this.driver.setXmlnsPolicy(xmlnsPolicy);
this.driver.setHeuristics(heuristics);
for (CharacterHandler characterHandler : characterHandlers) {
this.driver.addCharacterHandler(characterHandler);
}
this.treeBuilder.setDoctypeExpectation(doctypeExpectation);
this.treeBuilder.setDocumentModeHandler(documentModeHandler);
this.treeBuilder.setIgnoringComments(lexicalHandler == null);
this.treeBuilder.setScriptingEnabled(scriptingEnabled);
this.treeBuilder.setReportingDoctype(reportingDoctype);
this.treeBuilder.setNamePolicy(namePolicy);
if (saxStreamer != null) {
saxStreamer.setContentHandler(contentHandler == null ? new DefaultHandler()
: contentHandler);
saxStreamer.setLexicalHandler(lexicalHandler);
driver.setAllowRewinding(false);
}
}
}
/**
* @see org.xml.sax.XMLReader#getContentHandler()
*/
public ContentHandler getContentHandler() {
return contentHandler;
}
/**
* @see org.xml.sax.XMLReader#getDTDHandler()
*/
public DTDHandler getDTDHandler() {
return dtdHandler;
}
/**
* @see org.xml.sax.XMLReader#getEntityResolver()
*/
public EntityResolver getEntityResolver() {
return entityResolver;
}
/**
* @see org.xml.sax.XMLReader#getErrorHandler()
*/
public ErrorHandler getErrorHandler() {
return errorHandler;
}
/**
* Exposes the configuration of the emulated XML parser as well as
* boolean-valued configuration without using non-<code>XMLReader</code>
* getters directly.
*
* <dl>
* <dt><code>http://xml.org/sax/features/external-general-entities</code></dt>
* <dd><code>false</code></dd>
* <dt><code>http://xml.org/sax/features/external-parameter-entities</code></dt>
* <dd><code>false</code></dd>
* <dt><code>http://xml.org/sax/features/is-standalone</code></dt>
* <dd><code>true</code></dd>
* <dt><code>http://xml.org/sax/features/lexical-handler/parameter-entities</code></dt>
* <dd><code>false</code></dd>
* <dt><code>http://xml.org/sax/features/namespaces</code></dt>
* <dd><code>true</code></dd>
* <dt><code>http://xml.org/sax/features/namespace-prefixes</code></dt>
* <dd><code>false</code></dd>
* <dt><code>http://xml.org/sax/features/resolve-dtd-uris</code></dt>
* <dd><code>true</code></dd>
* <dt><code>http://xml.org/sax/features/string-interning</code></dt>
* <dd><code>false</code></dd>
* <dt><code>http://xml.org/sax/features/unicode-normalization-checking</code></dt>
* <dd><code>isCheckingNormalization</code></dd>
* <dt><code>http://xml.org/sax/features/use-attributes2</code></dt>
* <dd><code>false</code></dd>
* <dt><code>http://xml.org/sax/features/use-locator2</code></dt>
* <dd><code>false</code></dd>
* <dt><code>http://xml.org/sax/features/use-entity-resolver2</code></dt>
* <dd><code>false</code></dd>
* <dt><code>http://xml.org/sax/features/validation</code></dt>
* <dd><code>false</code></dd>
* <dt><code>http://xml.org/sax/features/xmlns-uris</code></dt>
* <dd><code>false</code></dd>
* <dt><code>http://xml.org/sax/features/xml-1.1</code></dt>
* <dd><code>false</code></dd>
* <dt><code>http://validator.nu/features/html4-mode-compatible-with-xhtml1-schemata</code></dt>
* <dd><code>isHtml4ModeCompatibleWithXhtml1Schemata</code></dd>
* <dt><code>http://validator.nu/features/mapping-lang-to-xml-lang</code></dt>
* <dd><code>isMappingLangToXmlLang</code></dd>
* <dt><code>http://validator.nu/features/scripting-enabled</code></dt>
* <dd><code>isScriptingEnabled</code></dd>
* </dl>
*
* @param name
* feature URI string
* @return a value per the list above
* @see org.xml.sax.XMLReader#getFeature(java.lang.String)
*/
public boolean getFeature(String name) throws SAXNotRecognizedException,
SAXNotSupportedException {
if ("http://xml.org/sax/features/external-general-entities".equals(name)) {
return false;
} else if ("http://xml.org/sax/features/external-parameter-entities".equals(name)) {
return false;
} else if ("http://xml.org/sax/features/is-standalone".equals(name)) {
return true;
} else if ("http://xml.org/sax/features/lexical-handler/parameter-entities".equals(name)) {
return false;
} else if ("http://xml.org/sax/features/namespaces".equals(name)) {
return true;
} else if ("http://xml.org/sax/features/namespace-prefixes".equals(name)) {
return false;
} else if ("http://xml.org/sax/features/resolve-dtd-uris".equals(name)) {
return true; // default value--applicable scenario never happens
} else if ("http://xml.org/sax/features/string-interning".equals(name)) {
return true;
} else if ("http://xml.org/sax/features/unicode-normalization-checking".equals(name)) {
return isCheckingNormalization(); // the checks aren't really per
// XML 1.1
} else if ("http://xml.org/sax/features/use-attributes2".equals(name)) {
return false;
} else if ("http://xml.org/sax/features/use-locator2".equals(name)) {
return false;
} else if ("http://xml.org/sax/features/use-entity-resolver2".equals(name)) {
return false;
} else if ("http://xml.org/sax/features/validation".equals(name)) {
return false;
} else if ("http://xml.org/sax/features/xmlns-uris".equals(name)) {
return false;
} else if ("http://xml.org/sax/features/xml-1.1".equals(name)) {
return false;
} else if ("http://validator.nu/features/html4-mode-compatible-with-xhtml1-schemata".equals(name)) {
return isHtml4ModeCompatibleWithXhtml1Schemata();
} else if ("http://validator.nu/features/mapping-lang-to-xml-lang".equals(name)) {
return isMappingLangToXmlLang();
} else if ("http://validator.nu/features/scripting-enabled".equals(name)) {
return isScriptingEnabled();
} else {
throw new SAXNotRecognizedException();
}
}
/**
* Allows <code>XMLReader</code>-level access to non-boolean valued
* getters.
*
* <p>
* The properties are mapped as follows:
*
* <dl>
* <dt><code>http://xml.org/sax/properties/document-xml-version</code></dt>
* <dd><code>"1.0"</code></dd>
* <dt><code>http://xml.org/sax/properties/lexical-handler</code></dt>
* <dd><code>getLexicalHandler</code></dd>
* <dt><code>http://validator.nu/properties/content-space-policy</code></dt>
* <dd><code>getContentSpacePolicy</code></dd>
* <dt><code>http://validator.nu/properties/content-non-xml-char-policy</code></dt>
* <dd><code>getContentNonXmlCharPolicy</code></dd>
* <dt><code>http://validator.nu/properties/comment-policy</code></dt>
* <dd><code>getCommentPolicy</code></dd>
* <dt><code>http://validator.nu/properties/xmlns-policy</code></dt>
* <dd><code>getXmlnsPolicy</code></dd>
* <dt><code>http://validator.nu/properties/name-policy</code></dt>
* <dd><code>getNamePolicy</code></dd>
* <dt><code>http://validator.nu/properties/streamability-violation-policy</code></dt>
* <dd><code>getStreamabilityViolationPolicy</code></dd>
* <dt><code>http://validator.nu/properties/document-mode-handler</code></dt>
* <dd><code>getDocumentModeHandler</code></dd>
* <dt><code>http://validator.nu/properties/doctype-expectation</code></dt>
* <dd><code>getDoctypeExpectation</code></dd>
* <dt><code>http://xml.org/sax/features/unicode-normalization-checking</code></dt>
* </dl>
*
* @param name
* property URI string
* @return a value per the list above
* @see org.xml.sax.XMLReader#getProperty(java.lang.String)
*/
public Object getProperty(String name) throws SAXNotRecognizedException,
SAXNotSupportedException {
if ("http://xml.org/sax/properties/declaration-handler".equals(name)) {
throw new SAXNotSupportedException(
"This parser does not suppert DeclHandler.");
} else if ("http://xml.org/sax/properties/document-xml-version".equals(name)) {
return "1.0"; // Emulating an XML 1.1 parser is not supported.
} else if ("http://xml.org/sax/properties/dom-node".equals(name)) {
throw new SAXNotSupportedException(
"This parser does not walk the DOM.");
} else if ("http://xml.org/sax/properties/lexical-handler".equals(name)) {
return getLexicalHandler();
} else if ("http://xml.org/sax/properties/xml-string".equals(name)) {
throw new SAXNotSupportedException(
"This parser does not expose the source as a string.");
} else if ("http://validator.nu/properties/content-space-policy".equals(name)) {
return getContentSpacePolicy();
} else if ("http://validator.nu/properties/content-non-xml-char-policy".equals(name)) {
return getContentNonXmlCharPolicy();
} else if ("http://validator.nu/properties/comment-policy".equals(name)) {
return getCommentPolicy();
} else if ("http://validator.nu/properties/xmlns-policy".equals(name)) {
return getXmlnsPolicy();
} else if ("http://validator.nu/properties/name-policy".equals(name)) {
return getNamePolicy();
} else if ("http://validator.nu/properties/streamability-violation-policy".equals(name)) {
return getStreamabilityViolationPolicy();
} else if ("http://validator.nu/properties/document-mode-handler".equals(name)) {
return getDocumentModeHandler();
} else if ("http://validator.nu/properties/doctype-expectation".equals(name)) {
return getDoctypeExpectation();
} else if ("http://validator.nu/properties/xml-policy".equals(name)) {
throw new SAXNotSupportedException(
"Cannot get a convenience setter.");
} else if ("http://validator.nu/properties/heuristics".equals(name)) {
return getHeuristics();
} else {
throw new SAXNotRecognizedException();
}
}
/**
* @see org.xml.sax.XMLReader#parse(org.xml.sax.InputSource)
*/
public void parse(InputSource input) throws IOException, SAXException {
lazyInit();
try {
treeBuilder.setFragmentContext(null);
tokenize(input);
} finally {
if (saxTreeBuilder != null) {
Document document = saxTreeBuilder.getDocument();
if (document != null) {
new TreeParser(contentHandler, lexicalHandler).parse(document);
}
}
}
}
/**
* Parses a fragment with HTML context.
*
* @param input the input to parse
* @param context the name of the context element (HTML namespace assumed)
* @throws IOException
* @throws SAXException
*/
public void parseFragment(InputSource input, String context)
throws IOException, SAXException {
lazyInit();
try {
treeBuilder.setFragmentContext(context.intern());
tokenize(input);
} finally {
if (saxTreeBuilder != null) {
DocumentFragment fragment = saxTreeBuilder.getDocumentFragment();
new TreeParser(contentHandler, lexicalHandler).parse(fragment);
}
}
}
/**
* Parses a fragment.
*
* @param input the input to parse
* @param contextLocal the local name of the context element
* @param contextNamespace the namespace of the context element
* @throws IOException
* @throws SAXException
*/
public void parseFragment(InputSource input, String contextLocal, String contextNamespace)
throws IOException, SAXException {
lazyInit();
try {
treeBuilder.setFragmentContext(contextLocal.intern(), contextNamespace.intern(), null, false);
tokenize(input);
} finally {
if (saxTreeBuilder != null) {
DocumentFragment fragment = saxTreeBuilder.getDocumentFragment();
new TreeParser(contentHandler, lexicalHandler).parse(fragment);
}
}
}
/**
* @param is
* @throws SAXException
* @throws IOException
* @throws MalformedURLException
*/
private void tokenize(InputSource is) throws SAXException, IOException, MalformedURLException {
if (is == null) {
throw new IllegalArgumentException("Null input.");
}
if (is.getByteStream() == null && is.getCharacterStream() == null) {
String systemId = is.getSystemId();
if (systemId == null) {
throw new IllegalArgumentException("No byte stream, no character stream nor URI.");
}
if (entityResolver != null) {
is = entityResolver.resolveEntity(is.getPublicId(), systemId);
}
if (is.getByteStream() == null || is.getCharacterStream() == null) {
is = new InputSource();
is.setSystemId(systemId);
is.setByteStream(new URL(systemId).openStream());
}
}
driver.tokenize(is);
}
/**
* @see org.xml.sax.XMLReader#parse(java.lang.String)
*/
public void parse(String systemId) throws IOException, SAXException {
parse(new InputSource(systemId));
}
/**
* @see org.xml.sax.XMLReader#setContentHandler(org.xml.sax.ContentHandler)
*/
public void setContentHandler(ContentHandler handler) {
contentHandler = handler;
if (saxStreamer != null) {
saxStreamer.setContentHandler(contentHandler == null ? new DefaultHandler()
: contentHandler);
}
}
/**
* Sets the lexical handler.
* @param handler the hander.
*/
public void setLexicalHandler(LexicalHandler handler) {
lexicalHandler = handler;
if (treeBuilder != null) {
treeBuilder.setIgnoringComments(handler == null);
if (saxStreamer != null) {
saxStreamer.setLexicalHandler(handler);
}
}
}
/**
* @see org.xml.sax.XMLReader#setDTDHandler(org.xml.sax.DTDHandler)
*/
public void setDTDHandler(DTDHandler handler) {
dtdHandler = handler;
}
/**
* @see org.xml.sax.XMLReader#setEntityResolver(org.xml.sax.EntityResolver)
*/
public void setEntityResolver(EntityResolver resolver) {
entityResolver = resolver;
}
/**
* @see org.xml.sax.XMLReader#setErrorHandler(org.xml.sax.ErrorHandler)
*/
public void setErrorHandler(ErrorHandler handler) {
errorHandler = handler;
treeBuilderErrorHandler = handler;
driver = null;
}
public void setTransitionHandler(TransitionHandler handler) {
transitionHandler = handler;
driver = null;
}
/**
* @see org.xml.sax.XMLReader#setErrorHandler(org.xml.sax.ErrorHandler)
* @deprecated For Validator.nu internal use
*/
public void setTreeBuilderErrorHandlerOverride(ErrorHandler handler) {
treeBuilderErrorHandler = handler;
if (driver != null) {
treeBuilder.setErrorHandler(handler);
}
}
/**
* Sets a boolean feature without having to use non-<code>XMLReader</code>
* setters directly.
*
* <p>
* The supported features are:
*
* <dl>
* <dt><code>http://xml.org/sax/features/unicode-normalization-checking</code></dt>
* <dd><code>setCheckingNormalization</code></dd>
* <dt><code>http://validator.nu/features/html4-mode-compatible-with-xhtml1-schemata</code></dt>
* <dd><code>setHtml4ModeCompatibleWithXhtml1Schemata</code></dd>
* <dt><code>http://validator.nu/features/mapping-lang-to-xml-lang</code></dt>
* <dd><code>setMappingLangToXmlLang</code></dd>
* <dt><code>http://validator.nu/features/scripting-enabled</code></dt>
* <dd><code>setScriptingEnabled</code></dd>
* </dl>
*
* @see org.xml.sax.XMLReader#setFeature(java.lang.String, boolean)
*/
public void setFeature(String name, boolean value)
throws SAXNotRecognizedException, SAXNotSupportedException {
if ("http://xml.org/sax/features/external-general-entities".equals(name)) {
if (value) {
throw new SAXNotSupportedException("Cannot set " + name + ".");
}
} else if ("http://xml.org/sax/features/external-parameter-entities".equals(name)) {
if (value) {
throw new SAXNotSupportedException("Cannot set " + name + ".");
}
} else if ("http://xml.org/sax/features/is-standalone".equals(name)) {
if (!value) {
throw new SAXNotSupportedException("Cannot set " + name + ".");
}
} else if ("http://xml.org/sax/features/lexical-handler/parameter-entities".equals(name)) {
if (value) {
throw new SAXNotSupportedException("Cannot set " + name + ".");
}
} else if ("http://xml.org/sax/features/namespaces".equals(name)) {
if (!value) {
throw new SAXNotSupportedException("Cannot set " + name + ".");
}
} else if ("http://xml.org/sax/features/namespace-prefixes".equals(name)) {
if (value) {
throw new SAXNotSupportedException("Cannot set " + name + ".");
}
} else if ("http://xml.org/sax/features/resolve-dtd-uris".equals(name)) {
if (!value) {
throw new SAXNotSupportedException("Cannot set " + name + ".");
}
} else if ("http://xml.org/sax/features/string-interning".equals(name)) {
if (!value) {
throw new SAXNotSupportedException("Cannot set " + name + ".");
}
} else if ("http://xml.org/sax/features/unicode-normalization-checking".equals(name)) {
setCheckingNormalization(value);
} else if ("http://xml.org/sax/features/use-attributes2".equals(name)) {
if (value) {
throw new SAXNotSupportedException("Cannot set " + name + ".");
}
} else if ("http://xml.org/sax/features/use-locator2".equals(name)) {
if (value) {
throw new SAXNotSupportedException("Cannot set " + name + ".");
}
} else if ("http://xml.org/sax/features/use-entity-resolver2".equals(name)) {
if (value) {
throw new SAXNotSupportedException("Cannot set " + name + ".");
}
} else if ("http://xml.org/sax/features/validation".equals(name)) {
if (value) {
throw new SAXNotSupportedException("Cannot set " + name + ".");
}
} else if ("http://xml.org/sax/features/xmlns-uris".equals(name)) {
if (value) {
throw new SAXNotSupportedException("Cannot set " + name + ".");
}
} else if ("http://xml.org/sax/features/xml-1.1".equals(name)) {
if (value) {
throw new SAXNotSupportedException("Cannot set " + name + ".");
}
} else if ("http://validator.nu/features/html4-mode-compatible-with-xhtml1-schemata".equals(name)) {
setHtml4ModeCompatibleWithXhtml1Schemata(value);
} else if ("http://validator.nu/features/mapping-lang-to-xml-lang".equals(name)) {
setMappingLangToXmlLang(value);
} else if ("http://validator.nu/features/scripting-enabled".equals(name)) {
setScriptingEnabled(value);
} else {
throw new SAXNotRecognizedException();
}
}
/**
* Sets a non-boolean property without having to use non-<code>XMLReader</code>
* setters directly.
*
* <dl>
* <dt><code>http://xml.org/sax/properties/lexical-handler</code></dt>
* <dd><code>setLexicalHandler</code></dd>
* <dt><code>http://validator.nu/properties/content-space-policy</code></dt>
* <dd><code>setContentSpacePolicy</code></dd>
* <dt><code>http://validator.nu/properties/content-non-xml-char-policy</code></dt>
* <dd><code>setContentNonXmlCharPolicy</code></dd>
* <dt><code>http://validator.nu/properties/comment-policy</code></dt>
* <dd><code>setCommentPolicy</code></dd>
* <dt><code>http://validator.nu/properties/xmlns-policy</code></dt>
* <dd><code>setXmlnsPolicy</code></dd>
* <dt><code>http://validator.nu/properties/name-policy</code></dt>
* <dd><code>setNamePolicy</code></dd>
* <dt><code>http://validator.nu/properties/streamability-violation-policy</code></dt>
* <dd><code>setStreamabilityViolationPolicy</code></dd>
* <dt><code>http://validator.nu/properties/document-mode-handler</code></dt>
* <dd><code>setDocumentModeHandler</code></dd>
* <dt><code>http://validator.nu/properties/doctype-expectation</code></dt>
* <dd><code>setDoctypeExpectation</code></dd>
* <dt><code>http://validator.nu/properties/xml-policy</code></dt>
* <dd><code>setXmlPolicy</code></dd>
* </dl>
*
* @see org.xml.sax.XMLReader#setProperty(java.lang.String,
* java.lang.Object)
*/
public void setProperty(String name, Object value)
throws SAXNotRecognizedException, SAXNotSupportedException {
if ("http://xml.org/sax/properties/declaration-handler".equals(name)) {
throw new SAXNotSupportedException(
"This parser does not suppert DeclHandler.");
} else if ("http://xml.org/sax/properties/document-xml-version".equals(name)) {
throw new SAXNotSupportedException(
"Can't set document-xml-version.");
} else if ("http://xml.org/sax/properties/dom-node".equals(name)) {
throw new SAXNotSupportedException("Can't set dom-node.");
} else if ("http://xml.org/sax/properties/lexical-handler".equals(name)) {
setLexicalHandler((LexicalHandler) value);
} else if ("http://xml.org/sax/properties/xml-string".equals(name)) {
throw new SAXNotSupportedException("Can't set xml-string.");
} else if ("http://validator.nu/properties/content-space-policy".equals(name)) {
setContentSpacePolicy((XmlViolationPolicy) value);
} else if ("http://validator.nu/properties/content-non-xml-char-policy".equals(name)) {
setContentNonXmlCharPolicy((XmlViolationPolicy) value);
} else if ("http://validator.nu/properties/comment-policy".equals(name)) {
setCommentPolicy((XmlViolationPolicy) value);
} else if ("http://validator.nu/properties/xmlns-policy".equals(name)) {
setXmlnsPolicy((XmlViolationPolicy) value);
} else if ("http://validator.nu/properties/name-policy".equals(name)) {
setNamePolicy((XmlViolationPolicy) value);
} else if ("http://validator.nu/properties/streamability-violation-policy".equals(name)) {
setStreamabilityViolationPolicy((XmlViolationPolicy) value);
} else if ("http://validator.nu/properties/document-mode-handler".equals(name)) {
setDocumentModeHandler((DocumentModeHandler) value);
} else if ("http://validator.nu/properties/doctype-expectation".equals(name)) {
setDoctypeExpectation((DoctypeExpectation) value);
} else if ("http://validator.nu/properties/xml-policy".equals(name)) {
setXmlPolicy((XmlViolationPolicy) value);
} else if ("http://validator.nu/properties/heuristics".equals(name)) {
setHeuristics((Heuristics) value);
} else {
throw new SAXNotRecognizedException();
}
}
/**
* Indicates whether NFC normalization of source is being checked.
* @return <code>true</code> if NFC normalization of source is being checked.
* @see nu.validator.htmlparser.impl.Tokenizer#isCheckingNormalization()
*/
public boolean isCheckingNormalization() {
return checkingNormalization;
}
/**
* Toggles the checking of the NFC normalization of source.
* @param enable <code>true</code> to check normalization
* @see nu.validator.htmlparser.impl.Tokenizer#setCheckingNormalization(boolean)
*/
public void setCheckingNormalization(boolean enable) {
this.checkingNormalization = enable;
if (driver != null) {
driver.setCheckingNormalization(checkingNormalization);
}
}
/**
* Sets the policy for consecutive hyphens in comments.
* @param commentPolicy the policy
* @see nu.validator.htmlparser.impl.Tokenizer#setCommentPolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
*/
public void setCommentPolicy(XmlViolationPolicy commentPolicy) {
this.commentPolicy = commentPolicy;
if (driver != null) {
driver.setCommentPolicy(commentPolicy);
}
}
/**
* Sets the policy for non-XML characters except white space.
* @param contentNonXmlCharPolicy the policy
* @see nu.validator.htmlparser.impl.Tokenizer#setContentNonXmlCharPolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
*/
public void setContentNonXmlCharPolicy(
XmlViolationPolicy contentNonXmlCharPolicy) {
this.contentNonXmlCharPolicy = contentNonXmlCharPolicy;
driver = null;
}
/**
* Sets the policy for non-XML white space.
* @param contentSpacePolicy the policy
* @see nu.validator.htmlparser.impl.Tokenizer#setContentSpacePolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
*/
public void setContentSpacePolicy(XmlViolationPolicy contentSpacePolicy) {
this.contentSpacePolicy = contentSpacePolicy;
if (driver != null) {
driver.setContentSpacePolicy(contentSpacePolicy);
}
}
/**
* Whether the parser considers scripting to be enabled for noscript treatment.
*
* @return <code>true</code> if enabled
* @see nu.validator.htmlparser.impl.TreeBuilder#isScriptingEnabled()
*/
public boolean isScriptingEnabled() {
return scriptingEnabled;
}
/**
* Sets whether the parser considers scripting to be enabled for noscript treatment.
* @param scriptingEnabled <code>true</code> to enable
* @see nu.validator.htmlparser.impl.TreeBuilder#setScriptingEnabled(boolean)
*/
public void setScriptingEnabled(boolean scriptingEnabled) {
this.scriptingEnabled = scriptingEnabled;
if (treeBuilder != null) {
treeBuilder.setScriptingEnabled(scriptingEnabled);
}
}
/**
* Returns the doctype expectation.
*
* @return the doctypeExpectation
*/
public DoctypeExpectation getDoctypeExpectation() {
return doctypeExpectation;
}
/**
* Sets the doctype expectation.
*
* @param doctypeExpectation
* the doctypeExpectation to set
* @see nu.validator.htmlparser.impl.TreeBuilder#setDoctypeExpectation(nu.validator.htmlparser.common.DoctypeExpectation)
*/
public void setDoctypeExpectation(DoctypeExpectation doctypeExpectation) {
this.doctypeExpectation = doctypeExpectation;
if (treeBuilder != null) {
treeBuilder.setDoctypeExpectation(doctypeExpectation);
}
}
/**
* Returns the document mode handler.
*
* @return the documentModeHandler
*/
public DocumentModeHandler getDocumentModeHandler() {
return documentModeHandler;
}
/**
* Sets the document mode handler.
*
* @param documentModeHandler
* the documentModeHandler to set
* @see nu.validator.htmlparser.impl.TreeBuilder#setDocumentModeHandler(nu.validator.htmlparser.common.DocumentModeHandler)
*/
public void setDocumentModeHandler(DocumentModeHandler documentModeHandler) {
this.documentModeHandler = documentModeHandler;
}
/**
* Returns the streamabilityViolationPolicy.
*
* @return the streamabilityViolationPolicy
*/
public XmlViolationPolicy getStreamabilityViolationPolicy() {
return streamabilityViolationPolicy;
}
/**
* Sets the streamabilityViolationPolicy.
*
* @param streamabilityViolationPolicy
* the streamabilityViolationPolicy to set
*/
public void setStreamabilityViolationPolicy(
XmlViolationPolicy streamabilityViolationPolicy) {
this.streamabilityViolationPolicy = streamabilityViolationPolicy;
driver = null;
}
/**
* Whether the HTML 4 mode reports boolean attributes in a way that repeats
* the name in the value.
* @param html4ModeCompatibleWithXhtml1Schemata
*/
public void setHtml4ModeCompatibleWithXhtml1Schemata(
boolean html4ModeCompatibleWithXhtml1Schemata) {
this.html4ModeCompatibleWithXhtml1Schemata = html4ModeCompatibleWithXhtml1Schemata;
if (driver != null) {
driver.setHtml4ModeCompatibleWithXhtml1Schemata(html4ModeCompatibleWithXhtml1Schemata);
}
}
/**
* Returns the <code>Locator</code> during parse.
* @return the <code>Locator</code>
*/
public Locator getDocumentLocator() {
return driver.getDocumentLocator();
}
/**
* Whether the HTML 4 mode reports boolean attributes in a way that repeats
* the name in the value.
*
* @return the html4ModeCompatibleWithXhtml1Schemata
*/
public boolean isHtml4ModeCompatibleWithXhtml1Schemata() {
return html4ModeCompatibleWithXhtml1Schemata;
}
/**
* Whether <code>lang</code> is mapped to <code>xml:lang</code>.
* @param mappingLangToXmlLang
* @see nu.validator.htmlparser.impl.Tokenizer#setMappingLangToXmlLang(boolean)
*/
public void setMappingLangToXmlLang(boolean mappingLangToXmlLang) {
this.mappingLangToXmlLang = mappingLangToXmlLang;
if (driver != null) {
driver.setMappingLangToXmlLang(mappingLangToXmlLang);
}
}
/**
* Whether <code>lang</code> is mapped to <code>xml:lang</code>.
*
* @return the mappingLangToXmlLang
*/
public boolean isMappingLangToXmlLang() {
return mappingLangToXmlLang;
}
/**
* Whether the <code>xmlns</code> attribute on the root element is
* passed to through. (FATAL not allowed.)
* @param xmlnsPolicy
* @see nu.validator.htmlparser.impl.Tokenizer#setXmlnsPolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
*/
public void setXmlnsPolicy(XmlViolationPolicy xmlnsPolicy) {
if (xmlnsPolicy == XmlViolationPolicy.FATAL) {
throw new IllegalArgumentException("Can't use FATAL here.");
}
this.xmlnsPolicy = xmlnsPolicy;
if (driver != null) {
driver.setXmlnsPolicy(xmlnsPolicy);
}
}
/**
* Returns the xmlnsPolicy.
*
* @return the xmlnsPolicy
*/
public XmlViolationPolicy getXmlnsPolicy() {
return xmlnsPolicy;
}
/**
* Returns the lexicalHandler.
*
* @return the lexicalHandler
*/
public LexicalHandler getLexicalHandler() {
return lexicalHandler;
}
/**
* Returns the commentPolicy.
*
* @return the commentPolicy
*/
public XmlViolationPolicy getCommentPolicy() {
return commentPolicy;
}
/**
* Returns the contentNonXmlCharPolicy.
*
* @return the contentNonXmlCharPolicy
*/
public XmlViolationPolicy getContentNonXmlCharPolicy() {
return contentNonXmlCharPolicy;
}
/**
* Returns the contentSpacePolicy.
*
* @return the contentSpacePolicy
*/
public XmlViolationPolicy getContentSpacePolicy() {
return contentSpacePolicy;
}
/**
* @param reportingDoctype
* @see nu.validator.htmlparser.impl.TreeBuilder#setReportingDoctype(boolean)
*/
public void setReportingDoctype(boolean reportingDoctype) {
this.reportingDoctype = reportingDoctype;
if (treeBuilder != null) {
treeBuilder.setReportingDoctype(reportingDoctype);
}
}
/**
* Returns the reportingDoctype.
*
* @return the reportingDoctype
*/
public boolean isReportingDoctype() {
return reportingDoctype;
}
/**
* @param errorProfile
* @see nu.validator.htmlparser.impl.errorReportingTokenizer#setErrorProfile(set)
*/
public void setErrorProfile(HashMap<String, String> errorProfileMap) {
this.errorProfileMap = errorProfileMap;
}
/**
* The policy for non-NCName element and attribute names.
* @param namePolicy
* @see nu.validator.htmlparser.impl.Tokenizer#setNamePolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
*/
public void setNamePolicy(XmlViolationPolicy namePolicy) {
this.namePolicy = namePolicy;
if (driver != null) {
driver.setNamePolicy(namePolicy);
treeBuilder.setNamePolicy(namePolicy);
}
}
/**
* Sets the encoding sniffing heuristics.
*
* @param heuristics the heuristics to set
* @see nu.validator.htmlparser.impl.Tokenizer#setHeuristics(nu.validator.htmlparser.common.Heuristics)
*/
public void setHeuristics(Heuristics heuristics) {
this.heuristics = heuristics;
if (driver != null) {
driver.setHeuristics(heuristics);
}
}
public Heuristics getHeuristics() {
return this.heuristics;
}
/**
* This is a catch-all convenience method for setting name, xmlns, content space,
* content non-XML char and comment policies in one go. This does not affect the
* streamability policy or doctype reporting.
*
* @param xmlPolicy
*/
public void setXmlPolicy(XmlViolationPolicy xmlPolicy) {
setNamePolicy(xmlPolicy);
setXmlnsPolicy(xmlPolicy == XmlViolationPolicy.FATAL ? XmlViolationPolicy.ALTER_INFOSET : xmlPolicy);
setContentSpacePolicy(xmlPolicy);
setContentNonXmlCharPolicy(xmlPolicy);
setCommentPolicy(xmlPolicy);
}
/**
* The policy for non-NCName element and attribute names.
*
* @return the namePolicy
*/
public XmlViolationPolicy getNamePolicy() {
return namePolicy;
}
/**
* Does nothing.
* @deprecated
*/
public void setBogusXmlnsPolicy(
XmlViolationPolicy bogusXmlnsPolicy) {
}
/**
* Returns <code>XmlViolationPolicy.ALTER_INFOSET</code>.
* @deprecated
* @return <code>XmlViolationPolicy.ALTER_INFOSET</code>
*/
public XmlViolationPolicy getBogusXmlnsPolicy() {
return XmlViolationPolicy.ALTER_INFOSET;
}
public void addCharacterHandler(CharacterHandler characterHandler) {
this.characterHandlers.add(characterHandler);
if (driver != null) {
driver.addCharacterHandler(characterHandler);
}
}
}