Package nu.validator.htmlparser.sax

Source Code of nu.validator.htmlparser.sax.HtmlParser

/*
* Copyright (c) 2007 Henri Sivonen
* Copyright (c) 2007-2010 Mozilla Foundation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/

package nu.validator.htmlparser.sax;

import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.LinkedList;
import java.util.List;
import java.util.HashMap;

import nu.validator.htmlparser.common.CharacterHandler;
import nu.validator.htmlparser.common.DoctypeExpectation;
import nu.validator.htmlparser.common.DocumentModeHandler;
import nu.validator.htmlparser.common.Heuristics;
import nu.validator.htmlparser.common.TokenHandler;
import nu.validator.htmlparser.common.TransitionHandler;
import nu.validator.htmlparser.common.XmlViolationPolicy;
import nu.validator.htmlparser.impl.ErrorReportingTokenizer;
import nu.validator.htmlparser.impl.Tokenizer;
import nu.validator.htmlparser.impl.TreeBuilder;
import nu.validator.htmlparser.io.Driver;
import nu.validator.saxtree.Document;
import nu.validator.saxtree.DocumentFragment;
import nu.validator.saxtree.TreeParser;

import org.xml.sax.ContentHandler;
import org.xml.sax.DTDHandler;
import org.xml.sax.EntityResolver;
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.SAXNotRecognizedException;
import org.xml.sax.SAXNotSupportedException;
import org.xml.sax.XMLReader;
import org.xml.sax.ext.LexicalHandler;
import org.xml.sax.helpers.DefaultHandler;

/**
* This class implements an HTML5 parser that exposes data through the SAX2
* interface.
*
* <p>By default, when using the constructor without arguments, the
* this parser coerces XML 1.0-incompatible infosets into XML 1.0-compatible
* infosets. This corresponds to <code>ALTER_INFOSET</code> as the general
* XML violation policy. To make the parser support non-conforming HTML fully
* per the HTML 5 spec while on the other hand potentially violating the SAX2
* API contract, set the general XML violation policy to <code>ALLOW</code>.
* It is possible to treat XML 1.0 infoset violations as fatal by setting
* the general XML violation policy to <code>FATAL</code>.
*
* <p>By default, this parser doesn't do true streaming but buffers everything
* first. The parser can be made truly streaming by calling
* <code>setStreamabilityViolationPolicy(XmlViolationPolicy.FATAL)</code>. This
* has the consequence that errors that require non-streamable recovery are
* treated as fatal.
*
* <p>By default, in order to make the parse events emulate the parse events
* for a DTDless XML document, the parser does not report the doctype through
* <code>LexicalHandler</code>. Doctype reporting through
* <code>LexicalHandler</code> can be turned on by calling
* <code>setReportingDoctype(true)</code>.
*
* @version $Id$
* @author hsivonen
*/
public class HtmlParser implements XMLReader {

    private Driver driver = null;

    private TreeBuilder<?> treeBuilder = null;

    private SAXStreamer saxStreamer = null; // work around javac bug

    private SAXTreeBuilder saxTreeBuilder = null; // work around javac bug

    private ContentHandler contentHandler = null;

    private LexicalHandler lexicalHandler = null;

    private DTDHandler dtdHandler = null;

    private EntityResolver entityResolver = null;

    private ErrorHandler errorHandler = null;

    private DocumentModeHandler documentModeHandler = null;

    private DoctypeExpectation doctypeExpectation = DoctypeExpectation.HTML;

    private boolean checkingNormalization = false;

    private boolean scriptingEnabled = false;

    private final List<CharacterHandler> characterHandlers = new LinkedList<CharacterHandler>();
   
    private XmlViolationPolicy contentSpacePolicy = XmlViolationPolicy.FATAL;

    private XmlViolationPolicy contentNonXmlCharPolicy = XmlViolationPolicy.FATAL;

    private XmlViolationPolicy commentPolicy = XmlViolationPolicy.FATAL;

    private XmlViolationPolicy namePolicy = XmlViolationPolicy.FATAL;

    private XmlViolationPolicy streamabilityViolationPolicy = XmlViolationPolicy.ALLOW;
   
    private boolean html4ModeCompatibleWithXhtml1Schemata = false;

    private boolean mappingLangToXmlLang = false;

    private XmlViolationPolicy xmlnsPolicy = XmlViolationPolicy.FATAL;
   
    private boolean reportingDoctype = true;

    private ErrorHandler treeBuilderErrorHandler = null;

    private Heuristics heuristics = Heuristics.NONE;

    private HashMap<String, String> errorProfileMap = null;

    private TransitionHandler transitionHandler = null;
   
    /**
     * Instantiates the parser with a fatal XML violation policy.
     *
     */
    public HtmlParser() {
        this(XmlViolationPolicy.FATAL);
    }
   
    /**
     * Instantiates the parser with a specific XML violation policy.
     * @param xmlPolicy the policy
     */
    public HtmlParser(XmlViolationPolicy xmlPolicy) {
        setXmlPolicy(xmlPolicy);
    }   

    private Tokenizer newTokenizer(TokenHandler handler, boolean newAttributesEachTime) {
        if (errorHandler == null && transitionHandler == null &&
            contentNonXmlCharPolicy == XmlViolationPolicy.ALLOW) {
            return new Tokenizer(handler, newAttributesEachTime);
        }
        ErrorReportingTokenizer tokenizer =
            new ErrorReportingTokenizer(handler, newAttributesEachTime);
        tokenizer.setErrorProfile(errorProfileMap);
        return tokenizer;
   }
   
    /**
     * This class wraps different tree builders depending on configuration. This
     * method does the work of hiding this from the user of the class.
     */
    private void lazyInit() {
        if (driver == null) {
            if (streamabilityViolationPolicy == XmlViolationPolicy.ALLOW) {
                this.saxTreeBuilder = new SAXTreeBuilder();
                this.treeBuilder = this.saxTreeBuilder;
                this.saxStreamer = null;
                this.driver = new Driver(newTokenizer(treeBuilder, true));
            } else {
                this.saxStreamer = new SAXStreamer();
                this.treeBuilder = this.saxStreamer;
                this.saxTreeBuilder = null;
                this.driver = new Driver(newTokenizer(treeBuilder, false));
            }
            this.driver.setErrorHandler(errorHandler);
            this.driver.setTransitionHandler(transitionHandler);
            this.treeBuilder.setErrorHandler(treeBuilderErrorHandler);
            this.driver.setCheckingNormalization(checkingNormalization);
            this.driver.setCommentPolicy(commentPolicy);
            this.driver.setContentNonXmlCharPolicy(contentNonXmlCharPolicy);
            this.driver.setContentSpacePolicy(contentSpacePolicy);
            this.driver.setHtml4ModeCompatibleWithXhtml1Schemata(html4ModeCompatibleWithXhtml1Schemata);
            this.driver.setMappingLangToXmlLang(mappingLangToXmlLang);
            this.driver.setXmlnsPolicy(xmlnsPolicy);
            this.driver.setHeuristics(heuristics);
            for (CharacterHandler characterHandler : characterHandlers) {
                this.driver.addCharacterHandler(characterHandler);
            }
            this.treeBuilder.setDoctypeExpectation(doctypeExpectation);
            this.treeBuilder.setDocumentModeHandler(documentModeHandler);
            this.treeBuilder.setIgnoringComments(lexicalHandler == null);
            this.treeBuilder.setScriptingEnabled(scriptingEnabled);
            this.treeBuilder.setReportingDoctype(reportingDoctype);
            this.treeBuilder.setNamePolicy(namePolicy);
            if (saxStreamer != null) {
                saxStreamer.setContentHandler(contentHandler == null ? new DefaultHandler()
                        : contentHandler);
                saxStreamer.setLexicalHandler(lexicalHandler);
                driver.setAllowRewinding(false);
            }
        }
    }

    /**
     * @see org.xml.sax.XMLReader#getContentHandler()
     */
    public ContentHandler getContentHandler() {
        return contentHandler;
    }

    /**
     * @see org.xml.sax.XMLReader#getDTDHandler()
     */
    public DTDHandler getDTDHandler() {
        return dtdHandler;
    }

    /**
     * @see org.xml.sax.XMLReader#getEntityResolver()
     */
    public EntityResolver getEntityResolver() {
        return entityResolver;
    }

    /**
     * @see org.xml.sax.XMLReader#getErrorHandler()
     */
    public ErrorHandler getErrorHandler() {
        return errorHandler;
    }

    /**
     * Exposes the configuration of the emulated XML parser as well as
     * boolean-valued configuration without using non-<code>XMLReader</code>
     * getters directly.
     *
     * <dl>
     * <dt><code>http://xml.org/sax/features/external-general-entities</code></dt>
     * <dd><code>false</code></dd>
     * <dt><code>http://xml.org/sax/features/external-parameter-entities</code></dt>
     * <dd><code>false</code></dd>
     * <dt><code>http://xml.org/sax/features/is-standalone</code></dt>
     * <dd><code>true</code></dd>
     * <dt><code>http://xml.org/sax/features/lexical-handler/parameter-entities</code></dt>
     * <dd><code>false</code></dd>
     * <dt><code>http://xml.org/sax/features/namespaces</code></dt>
     * <dd><code>true</code></dd>
     * <dt><code>http://xml.org/sax/features/namespace-prefixes</code></dt>
     * <dd><code>false</code></dd>
     * <dt><code>http://xml.org/sax/features/resolve-dtd-uris</code></dt>
     * <dd><code>true</code></dd>
     * <dt><code>http://xml.org/sax/features/string-interning</code></dt>
     * <dd><code>false</code></dd>
     * <dt><code>http://xml.org/sax/features/unicode-normalization-checking</code></dt>
     * <dd><code>isCheckingNormalization</code></dd>
     * <dt><code>http://xml.org/sax/features/use-attributes2</code></dt>
     * <dd><code>false</code></dd>
     * <dt><code>http://xml.org/sax/features/use-locator2</code></dt>
     * <dd><code>false</code></dd>
     * <dt><code>http://xml.org/sax/features/use-entity-resolver2</code></dt>
     * <dd><code>false</code></dd>
     * <dt><code>http://xml.org/sax/features/validation</code></dt>
     * <dd><code>false</code></dd>
     * <dt><code>http://xml.org/sax/features/xmlns-uris</code></dt>
     * <dd><code>false</code></dd>
     * <dt><code>http://xml.org/sax/features/xml-1.1</code></dt>
     * <dd><code>false</code></dd>
     * <dt><code>http://validator.nu/features/html4-mode-compatible-with-xhtml1-schemata</code></dt>
     * <dd><code>isHtml4ModeCompatibleWithXhtml1Schemata</code></dd>
     * <dt><code>http://validator.nu/features/mapping-lang-to-xml-lang</code></dt>
     * <dd><code>isMappingLangToXmlLang</code></dd>
     * <dt><code>http://validator.nu/features/scripting-enabled</code></dt>
     * <dd><code>isScriptingEnabled</code></dd>
     * </dl>
     *
     * @param name
     *            feature URI string
     * @return a value per the list above
     * @see org.xml.sax.XMLReader#getFeature(java.lang.String)
     */
    public boolean getFeature(String name) throws SAXNotRecognizedException,
            SAXNotSupportedException {
        if ("http://xml.org/sax/features/external-general-entities".equals(name)) {
            return false;
        } else if ("http://xml.org/sax/features/external-parameter-entities".equals(name)) {
            return false;
        } else if ("http://xml.org/sax/features/is-standalone".equals(name)) {
            return true;
        } else if ("http://xml.org/sax/features/lexical-handler/parameter-entities".equals(name)) {
            return false;
        } else if ("http://xml.org/sax/features/namespaces".equals(name)) {
            return true;
        } else if ("http://xml.org/sax/features/namespace-prefixes".equals(name)) {
            return false;
        } else if ("http://xml.org/sax/features/resolve-dtd-uris".equals(name)) {
            return true; // default value--applicable scenario never happens
        } else if ("http://xml.org/sax/features/string-interning".equals(name)) {
            return true;
        } else if ("http://xml.org/sax/features/unicode-normalization-checking".equals(name)) {
            return isCheckingNormalization(); // the checks aren't really per
            // XML 1.1
        } else if ("http://xml.org/sax/features/use-attributes2".equals(name)) {
            return false;
        } else if ("http://xml.org/sax/features/use-locator2".equals(name)) {
            return false;
        } else if ("http://xml.org/sax/features/use-entity-resolver2".equals(name)) {
            return false;
        } else if ("http://xml.org/sax/features/validation".equals(name)) {
            return false;
        } else if ("http://xml.org/sax/features/xmlns-uris".equals(name)) {
            return false;
        } else if ("http://xml.org/sax/features/xml-1.1".equals(name)) {
            return false;
        } else if ("http://validator.nu/features/html4-mode-compatible-with-xhtml1-schemata".equals(name)) {
            return isHtml4ModeCompatibleWithXhtml1Schemata();
        } else if ("http://validator.nu/features/mapping-lang-to-xml-lang".equals(name)) {
            return isMappingLangToXmlLang();
        } else if ("http://validator.nu/features/scripting-enabled".equals(name)) {
            return isScriptingEnabled();
        } else {
            throw new SAXNotRecognizedException();
        }
    }

    /**
     * Allows <code>XMLReader</code>-level access to non-boolean valued
     * getters.
     *
     * <p>
     * The properties are mapped as follows:
     *
     * <dl>
     * <dt><code>http://xml.org/sax/properties/document-xml-version</code></dt>
     * <dd><code>"1.0"</code></dd>
     * <dt><code>http://xml.org/sax/properties/lexical-handler</code></dt>
     * <dd><code>getLexicalHandler</code></dd>
     * <dt><code>http://validator.nu/properties/content-space-policy</code></dt>
     * <dd><code>getContentSpacePolicy</code></dd>
     * <dt><code>http://validator.nu/properties/content-non-xml-char-policy</code></dt>
     * <dd><code>getContentNonXmlCharPolicy</code></dd>
     * <dt><code>http://validator.nu/properties/comment-policy</code></dt>
     * <dd><code>getCommentPolicy</code></dd>
     * <dt><code>http://validator.nu/properties/xmlns-policy</code></dt>
     * <dd><code>getXmlnsPolicy</code></dd>
     * <dt><code>http://validator.nu/properties/name-policy</code></dt>
     * <dd><code>getNamePolicy</code></dd>
     * <dt><code>http://validator.nu/properties/streamability-violation-policy</code></dt>
     * <dd><code>getStreamabilityViolationPolicy</code></dd>
     * <dt><code>http://validator.nu/properties/document-mode-handler</code></dt>
     * <dd><code>getDocumentModeHandler</code></dd>
     * <dt><code>http://validator.nu/properties/doctype-expectation</code></dt>
     * <dd><code>getDoctypeExpectation</code></dd>
     * <dt><code>http://xml.org/sax/features/unicode-normalization-checking</code></dt>
     * </dl>
     *
     * @param name
     *            property URI string
     * @return a value per the list above
     * @see org.xml.sax.XMLReader#getProperty(java.lang.String)
     */
    public Object getProperty(String name) throws SAXNotRecognizedException,
            SAXNotSupportedException {
        if ("http://xml.org/sax/properties/declaration-handler".equals(name)) {
            throw new SAXNotSupportedException(
                    "This parser does not suppert DeclHandler.");
        } else if ("http://xml.org/sax/properties/document-xml-version".equals(name)) {
            return "1.0"; // Emulating an XML 1.1 parser is not supported.
        } else if ("http://xml.org/sax/properties/dom-node".equals(name)) {
            throw new SAXNotSupportedException(
                    "This parser does not walk the DOM.");
        } else if ("http://xml.org/sax/properties/lexical-handler".equals(name)) {
            return getLexicalHandler();
        } else if ("http://xml.org/sax/properties/xml-string".equals(name)) {
            throw new SAXNotSupportedException(
                    "This parser does not expose the source as a string.");
        } else if ("http://validator.nu/properties/content-space-policy".equals(name)) {
            return getContentSpacePolicy();
        } else if ("http://validator.nu/properties/content-non-xml-char-policy".equals(name)) {
            return getContentNonXmlCharPolicy();
        } else if ("http://validator.nu/properties/comment-policy".equals(name)) {
            return getCommentPolicy();
        } else if ("http://validator.nu/properties/xmlns-policy".equals(name)) {
            return getXmlnsPolicy();
        } else if ("http://validator.nu/properties/name-policy".equals(name)) {
            return getNamePolicy();
        } else if ("http://validator.nu/properties/streamability-violation-policy".equals(name)) {
            return getStreamabilityViolationPolicy();
        } else if ("http://validator.nu/properties/document-mode-handler".equals(name)) {
            return getDocumentModeHandler();
        } else if ("http://validator.nu/properties/doctype-expectation".equals(name)) {
            return getDoctypeExpectation();
        } else if ("http://validator.nu/properties/xml-policy".equals(name)) {
            throw new SAXNotSupportedException(
                    "Cannot get a convenience setter.");
        } else if ("http://validator.nu/properties/heuristics".equals(name)) {
            return getHeuristics();
        } else {
            throw new SAXNotRecognizedException();
        }
    }

    /**
     * @see org.xml.sax.XMLReader#parse(org.xml.sax.InputSource)
     */
    public void parse(InputSource input) throws IOException, SAXException {
        lazyInit();
        try {
            treeBuilder.setFragmentContext(null);
            tokenize(input);
        } finally {
            if (saxTreeBuilder != null) {
                Document document = saxTreeBuilder.getDocument();
                if (document != null) {
                    new TreeParser(contentHandler, lexicalHandler).parse(document);
                }
            }
        }
    }

    /**
     * Parses a fragment.
     *
     * @param input the input to parse
     * @param context the name of the context element
     * @throws IOException
     * @throws SAXException
     */
    public void parseFragment(InputSource input, String context)
            throws IOException, SAXException {
        lazyInit();
        try {
            treeBuilder.setFragmentContext(context.intern());
            tokenize(input);
        } finally {
            if (saxTreeBuilder != null) {
                DocumentFragment fragment = saxTreeBuilder.getDocumentFragment();
                new TreeParser(contentHandler, lexicalHandler).parse(fragment);
            }
        }
    }
   
    /**
     * @param is
     * @throws SAXException
     * @throws IOException
     * @throws MalformedURLException
     */
    private void tokenize(InputSource is) throws SAXException, IOException, MalformedURLException {
        if (is == null) {
            throw new IllegalArgumentException("Null input.");           
        }
        if (is.getByteStream() == null && is.getCharacterStream() == null) {
            String systemId = is.getSystemId();
            if (systemId == null) {
                throw new IllegalArgumentException("No byte stream, no character stream nor URI.");
            }
            if (entityResolver != null) {
                is = entityResolver.resolveEntity(is.getPublicId(), systemId);
            }
            if (is.getByteStream() == null || is.getCharacterStream() == null) {
                is = new InputSource();
                is.setSystemId(systemId);
                is.setByteStream(new URL(systemId).openStream());
            }
        }
        driver.tokenize(is);
    }

    /**
     * @see org.xml.sax.XMLReader#parse(java.lang.String)
     */
    public void parse(String systemId) throws IOException, SAXException {
        parse(new InputSource(systemId));
    }

    /**
     * @see org.xml.sax.XMLReader#setContentHandler(org.xml.sax.ContentHandler)
     */
    public void setContentHandler(ContentHandler handler) {
        contentHandler = handler;
        if (saxStreamer != null) {
            saxStreamer.setContentHandler(contentHandler == null ? new DefaultHandler()
                    : contentHandler);
        }
    }

    /**
     * Sets the lexical handler.
     * @param handler the hander.
     */
    public void setLexicalHandler(LexicalHandler handler) {
        lexicalHandler = handler;
        if (treeBuilder != null) {
            treeBuilder.setIgnoringComments(handler == null);
            if (saxStreamer != null) {
                saxStreamer.setLexicalHandler(handler);
            }
        }
    }

    /**
     * @see org.xml.sax.XMLReader#setDTDHandler(org.xml.sax.DTDHandler)
     */
    public void setDTDHandler(DTDHandler handler) {
        dtdHandler = handler;
    }

    /**
     * @see org.xml.sax.XMLReader#setEntityResolver(org.xml.sax.EntityResolver)
     */
    public void setEntityResolver(EntityResolver resolver) {
        entityResolver = resolver;
    }

    /**
     * @see org.xml.sax.XMLReader#setErrorHandler(org.xml.sax.ErrorHandler)
     */
    public void setErrorHandler(ErrorHandler handler) {
        errorHandler = handler;
        treeBuilderErrorHandler = handler;
        driver = null;
    }

    public void setTransitionHandler(TransitionHandler handler) {
        transitionHandler = handler;
        driver = null;
    }
   
    /**
     * @see org.xml.sax.XMLReader#setErrorHandler(org.xml.sax.ErrorHandler)
     * @deprecated For Validator.nu internal use
     */
    public void setTreeBuilderErrorHandlerOverride(ErrorHandler handler) {
        treeBuilderErrorHandler = handler;
        if (driver != null) {
            treeBuilder.setErrorHandler(handler);
        }
    }
   
    /**
     * Sets a boolean feature without having to use non-<code>XMLReader</code>
     * setters directly.
     *
     * <p>
     * The supported features are:
     *
     * <dl>
     * <dt><code>http://xml.org/sax/features/unicode-normalization-checking</code></dt>
     * <dd><code>setCheckingNormalization</code></dd>
     * <dt><code>http://validator.nu/features/html4-mode-compatible-with-xhtml1-schemata</code></dt>
     * <dd><code>setHtml4ModeCompatibleWithXhtml1Schemata</code></dd>
     * <dt><code>http://validator.nu/features/mapping-lang-to-xml-lang</code></dt>
     * <dd><code>setMappingLangToXmlLang</code></dd>
     * <dt><code>http://validator.nu/features/scripting-enabled</code></dt>
     * <dd><code>setScriptingEnabled</code></dd>
     * </dl>
     *
     * @see org.xml.sax.XMLReader#setFeature(java.lang.String, boolean)
     */
    public void setFeature(String name, boolean value)
            throws SAXNotRecognizedException, SAXNotSupportedException {
        if ("http://xml.org/sax/features/external-general-entities".equals(name)) {
            if (value) {
                throw new SAXNotSupportedException("Cannot set " + name + ".");
            }
        } else if ("http://xml.org/sax/features/external-parameter-entities".equals(name)) {
            if (value) {
                throw new SAXNotSupportedException("Cannot set " + name + ".");
            }
        } else if ("http://xml.org/sax/features/is-standalone".equals(name)) {
            if (!value) {
                throw new SAXNotSupportedException("Cannot set " + name + ".");
            }
        } else if ("http://xml.org/sax/features/lexical-handler/parameter-entities".equals(name)) {
            if (value) {
                throw new SAXNotSupportedException("Cannot set " + name + ".");
            }
        } else if ("http://xml.org/sax/features/namespaces".equals(name)) {
            if (!value) {
                throw new SAXNotSupportedException("Cannot set " + name + ".");
            }
        } else if ("http://xml.org/sax/features/namespace-prefixes".equals(name)) {
            if (value) {
                throw new SAXNotSupportedException("Cannot set " + name + ".");
            }
        } else if ("http://xml.org/sax/features/resolve-dtd-uris".equals(name)) {
            if (!value) {
                throw new SAXNotSupportedException("Cannot set " + name + ".");
            }
        } else if ("http://xml.org/sax/features/string-interning".equals(name)) {
            if (!value) {
                throw new SAXNotSupportedException("Cannot set " + name + ".");
            }
        } else if ("http://xml.org/sax/features/unicode-normalization-checking".equals(name)) {
            setCheckingNormalization(value);
        } else if ("http://xml.org/sax/features/use-attributes2".equals(name)) {
            if (value) {
                throw new SAXNotSupportedException("Cannot set " + name + ".");
            }
        } else if ("http://xml.org/sax/features/use-locator2".equals(name)) {
            if (value) {
                throw new SAXNotSupportedException("Cannot set " + name + ".");
            }
        } else if ("http://xml.org/sax/features/use-entity-resolver2".equals(name)) {
            if (value) {
                throw new SAXNotSupportedException("Cannot set " + name + ".");
            }
        } else if ("http://xml.org/sax/features/validation".equals(name)) {
            if (value) {
                throw new SAXNotSupportedException("Cannot set " + name + ".");
            }
        } else if ("http://xml.org/sax/features/xmlns-uris".equals(name)) {
            if (value) {
                throw new SAXNotSupportedException("Cannot set " + name + ".");
            }
        } else if ("http://xml.org/sax/features/xml-1.1".equals(name)) {
            if (value) {
                throw new SAXNotSupportedException("Cannot set " + name + ".");
            }
        } else if ("http://validator.nu/features/html4-mode-compatible-with-xhtml1-schemata".equals(name)) {
            setHtml4ModeCompatibleWithXhtml1Schemata(value);
        } else if ("http://validator.nu/features/mapping-lang-to-xml-lang".equals(name)) {
            setMappingLangToXmlLang(value);
        } else if ("http://validator.nu/features/scripting-enabled".equals(name)) {
            setScriptingEnabled(value);
        } else {
            throw new SAXNotRecognizedException();
        }
    }

    /**
     * Sets a non-boolean property without having to use non-<code>XMLReader</code>
     * setters directly.
     *
     * <dl>
     * <dt><code>http://xml.org/sax/properties/lexical-handler</code></dt>
     * <dd><code>setLexicalHandler</code></dd>
     * <dt><code>http://validator.nu/properties/content-space-policy</code></dt>
     * <dd><code>setContentSpacePolicy</code></dd>
     * <dt><code>http://validator.nu/properties/content-non-xml-char-policy</code></dt>
     * <dd><code>setContentNonXmlCharPolicy</code></dd>
     * <dt><code>http://validator.nu/properties/comment-policy</code></dt>
     * <dd><code>setCommentPolicy</code></dd>
     * <dt><code>http://validator.nu/properties/xmlns-policy</code></dt>
     * <dd><code>setXmlnsPolicy</code></dd>
     * <dt><code>http://validator.nu/properties/name-policy</code></dt>
     * <dd><code>setNamePolicy</code></dd>
     * <dt><code>http://validator.nu/properties/streamability-violation-policy</code></dt>
     * <dd><code>setStreamabilityViolationPolicy</code></dd>
     * <dt><code>http://validator.nu/properties/document-mode-handler</code></dt>
     * <dd><code>setDocumentModeHandler</code></dd>
     * <dt><code>http://validator.nu/properties/doctype-expectation</code></dt>
     * <dd><code>setDoctypeExpectation</code></dd>
     * <dt><code>http://validator.nu/properties/xml-policy</code></dt>
     * <dd><code>setXmlPolicy</code></dd>
     * </dl>
     *
     * @see org.xml.sax.XMLReader#setProperty(java.lang.String,
     *      java.lang.Object)
     */
    public void setProperty(String name, Object value)
            throws SAXNotRecognizedException, SAXNotSupportedException {
        if ("http://xml.org/sax/properties/declaration-handler".equals(name)) {
            throw new SAXNotSupportedException(
                    "This parser does not suppert DeclHandler.");
        } else if ("http://xml.org/sax/properties/document-xml-version".equals(name)) {
            throw new SAXNotSupportedException(
                    "Can't set document-xml-version.");
        } else if ("http://xml.org/sax/properties/dom-node".equals(name)) {
            throw new SAXNotSupportedException("Can't set dom-node.");
        } else if ("http://xml.org/sax/properties/lexical-handler".equals(name)) {
            setLexicalHandler((LexicalHandler) value);
        } else if ("http://xml.org/sax/properties/xml-string".equals(name)) {
            throw new SAXNotSupportedException("Can't set xml-string.");
        } else if ("http://validator.nu/properties/content-space-policy".equals(name)) {
            setContentSpacePolicy((XmlViolationPolicy) value);
        } else if ("http://validator.nu/properties/content-non-xml-char-policy".equals(name)) {
            setContentNonXmlCharPolicy((XmlViolationPolicy) value);
        } else if ("http://validator.nu/properties/comment-policy".equals(name)) {
            setCommentPolicy((XmlViolationPolicy) value);
        } else if ("http://validator.nu/properties/xmlns-policy".equals(name)) {
            setXmlnsPolicy((XmlViolationPolicy) value);
        } else if ("http://validator.nu/properties/name-policy".equals(name)) {
            setNamePolicy((XmlViolationPolicy) value);
        } else if ("http://validator.nu/properties/streamability-violation-policy".equals(name)) {
            setStreamabilityViolationPolicy((XmlViolationPolicy) value);
        } else if ("http://validator.nu/properties/document-mode-handler".equals(name)) {
            setDocumentModeHandler((DocumentModeHandler) value);
        } else if ("http://validator.nu/properties/doctype-expectation".equals(name)) {
            setDoctypeExpectation((DoctypeExpectation) value);
        } else if ("http://validator.nu/properties/xml-policy".equals(name)) {
            setXmlPolicy((XmlViolationPolicy) value);
        } else if ("http://validator.nu/properties/heuristics".equals(name)) {
            setHeuristics((Heuristics) value);
        } else {
            throw new SAXNotRecognizedException();
        }
    }

    /**
     * Indicates whether NFC normalization of source is being checked.
     * @return <code>true</code> if NFC normalization of source is being checked.
     * @see nu.validator.htmlparser.impl.Tokenizer#isCheckingNormalization()
     */
    public boolean isCheckingNormalization() {
        return checkingNormalization;
    }

    /**
     * Toggles the checking of the NFC normalization of source.
     * @param enable <code>true</code> to check normalization
     * @see nu.validator.htmlparser.impl.Tokenizer#setCheckingNormalization(boolean)
     */
    public void setCheckingNormalization(boolean enable) {
        this.checkingNormalization = enable;
        if (driver != null) {
            driver.setCheckingNormalization(checkingNormalization);
        }
    }

    /**
     * Sets the policy for consecutive hyphens in comments.
     * @param commentPolicy the policy
     * @see nu.validator.htmlparser.impl.Tokenizer#setCommentPolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
     */
    public void setCommentPolicy(XmlViolationPolicy commentPolicy) {
        this.commentPolicy = commentPolicy;
        if (driver != null) {
            driver.setCommentPolicy(commentPolicy);
        }
    }

    /**
     * Sets the policy for non-XML characters except white space.
     * @param contentNonXmlCharPolicy the policy
     * @see nu.validator.htmlparser.impl.Tokenizer#setContentNonXmlCharPolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
     */
    public void setContentNonXmlCharPolicy(
            XmlViolationPolicy contentNonXmlCharPolicy) {
        this.contentNonXmlCharPolicy = contentNonXmlCharPolicy;
        driver = null;
    }

    /**
     * Sets the policy for non-XML white space.
     * @param contentSpacePolicy the policy
     * @see nu.validator.htmlparser.impl.Tokenizer#setContentSpacePolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
     */
    public void setContentSpacePolicy(XmlViolationPolicy contentSpacePolicy) {
        this.contentSpacePolicy = contentSpacePolicy;
        if (driver != null) {
            driver.setContentSpacePolicy(contentSpacePolicy);
        }
    }

    /**
     * Whether the parser considers scripting to be enabled for noscript treatment.
     *
     * @return <code>true</code> if enabled
     * @see nu.validator.htmlparser.impl.TreeBuilder#isScriptingEnabled()
     */
    public boolean isScriptingEnabled() {
        return scriptingEnabled;
    }

    /**
     * Sets whether the parser considers scripting to be enabled for noscript treatment.
     * @param scriptingEnabled <code>true</code> to enable
     * @see nu.validator.htmlparser.impl.TreeBuilder#setScriptingEnabled(boolean)
     */
    public void setScriptingEnabled(boolean scriptingEnabled) {
        this.scriptingEnabled = scriptingEnabled;
        if (treeBuilder != null) {
            treeBuilder.setScriptingEnabled(scriptingEnabled);
        }
    }

    /**
     * Returns the doctype expectation.
     *
     * @return the doctypeExpectation
     */
    public DoctypeExpectation getDoctypeExpectation() {
        return doctypeExpectation;
    }

    /**
     * Sets the doctype expectation.
     *
     * @param doctypeExpectation
     *            the doctypeExpectation to set
     * @see nu.validator.htmlparser.impl.TreeBuilder#setDoctypeExpectation(nu.validator.htmlparser.common.DoctypeExpectation)
     */
    public void setDoctypeExpectation(DoctypeExpectation doctypeExpectation) {
        this.doctypeExpectation = doctypeExpectation;
        if (treeBuilder != null) {
            treeBuilder.setDoctypeExpectation(doctypeExpectation);
        }
    }

    /**
     * Returns the document mode handler.
     *
     * @return the documentModeHandler
     */
    public DocumentModeHandler getDocumentModeHandler() {
        return documentModeHandler;
    }

    /**
     * Sets the document mode handler.
     *
     * @param documentModeHandler
     *            the documentModeHandler to set
     * @see nu.validator.htmlparser.impl.TreeBuilder#setDocumentModeHandler(nu.validator.htmlparser.common.DocumentModeHandler)
     */
    public void setDocumentModeHandler(DocumentModeHandler documentModeHandler) {
        this.documentModeHandler = documentModeHandler;
    }

    /**
     * Returns the streamabilityViolationPolicy.
     *
     * @return the streamabilityViolationPolicy
     */
    public XmlViolationPolicy getStreamabilityViolationPolicy() {
        return streamabilityViolationPolicy;
    }

    /**
     * Sets the streamabilityViolationPolicy.
     *
     * @param streamabilityViolationPolicy
     *            the streamabilityViolationPolicy to set
     */
    public void setStreamabilityViolationPolicy(
            XmlViolationPolicy streamabilityViolationPolicy) {
        this.streamabilityViolationPolicy = streamabilityViolationPolicy;
        driver = null;
    }

    /**
     * Whether the HTML 4 mode reports boolean attributes in a way that repeats
     * the name in the value.
     * @param html4ModeCompatibleWithXhtml1Schemata
     */
    public void setHtml4ModeCompatibleWithXhtml1Schemata(
            boolean html4ModeCompatibleWithXhtml1Schemata) {
        this.html4ModeCompatibleWithXhtml1Schemata = html4ModeCompatibleWithXhtml1Schemata;
        if (driver != null) {
            driver.setHtml4ModeCompatibleWithXhtml1Schemata(html4ModeCompatibleWithXhtml1Schemata);
        }
    }

    /**
     * Returns the <code>Locator</code> during parse.
     * @return the <code>Locator</code>
     */
    public Locator getDocumentLocator() {
        return driver.getDocumentLocator();
    }

    /**
     * Whether the HTML 4 mode reports boolean attributes in a way that repeats
     * the name in the value.
     *
     * @return the html4ModeCompatibleWithXhtml1Schemata
     */
    public boolean isHtml4ModeCompatibleWithXhtml1Schemata() {
        return html4ModeCompatibleWithXhtml1Schemata;
    }

    /**
     * Whether <code>lang</code> is mapped to <code>xml:lang</code>.
     * @param mappingLangToXmlLang
     * @see nu.validator.htmlparser.impl.Tokenizer#setMappingLangToXmlLang(boolean)
     */
    public void setMappingLangToXmlLang(boolean mappingLangToXmlLang) {
        this.mappingLangToXmlLang = mappingLangToXmlLang;
        if (driver != null) {
            driver.setMappingLangToXmlLang(mappingLangToXmlLang);
        }
    }

    /**
     * Whether <code>lang</code> is mapped to <code>xml:lang</code>.
     *
     * @return the mappingLangToXmlLang
     */
    public boolean isMappingLangToXmlLang() {
        return mappingLangToXmlLang;
    }

    /**
     * Whether the <code>xmlns</code> attribute on the root element is
     * passed to through. (FATAL not allowed.)
     * @param xmlnsPolicy
     * @see nu.validator.htmlparser.impl.Tokenizer#setXmlnsPolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
     */
    public void setXmlnsPolicy(XmlViolationPolicy xmlnsPolicy) {
        if (xmlnsPolicy == XmlViolationPolicy.FATAL) {
            throw new IllegalArgumentException("Can't use FATAL here.");
        }
        this.xmlnsPolicy = xmlnsPolicy;
        if (driver != null) {
            driver.setXmlnsPolicy(xmlnsPolicy);
        }
    }

    /**
     * Returns the xmlnsPolicy.
     *
     * @return the xmlnsPolicy
     */
    public XmlViolationPolicy getXmlnsPolicy() {
        return xmlnsPolicy;
    }

    /**
     * Returns the lexicalHandler.
     *
     * @return the lexicalHandler
     */
    public LexicalHandler getLexicalHandler() {
        return lexicalHandler;
    }

    /**
     * Returns the commentPolicy.
     *
     * @return the commentPolicy
     */
    public XmlViolationPolicy getCommentPolicy() {
        return commentPolicy;
    }

    /**
     * Returns the contentNonXmlCharPolicy.
     *
     * @return the contentNonXmlCharPolicy
     */
    public XmlViolationPolicy getContentNonXmlCharPolicy() {
        return contentNonXmlCharPolicy;
    }

    /**
     * Returns the contentSpacePolicy.
     *
     * @return the contentSpacePolicy
     */
    public XmlViolationPolicy getContentSpacePolicy() {
        return contentSpacePolicy;
    }

    /**
     * @param reportingDoctype
     * @see nu.validator.htmlparser.impl.TreeBuilder#setReportingDoctype(boolean)
     */
    public void setReportingDoctype(boolean reportingDoctype) {
        this.reportingDoctype = reportingDoctype;
        if (treeBuilder != null) {
            treeBuilder.setReportingDoctype(reportingDoctype);
        }
    }

    /**
     * Returns the reportingDoctype.
     *
     * @return the reportingDoctype
     */
    public boolean isReportingDoctype() {
        return reportingDoctype;
    }

    /**
     * @param errorProfile
     * @see nu.validator.htmlparser.impl.errorReportingTokenizer#setErrorProfile(set)
     */
    public void setErrorProfile(HashMap<String, String> errorProfileMap) {
        this.errorProfileMap = errorProfileMap;
    }

    /**
     * The policy for non-NCName element and attribute names.
     * @param namePolicy
     * @see nu.validator.htmlparser.impl.Tokenizer#setNamePolicy(nu.validator.htmlparser.common.XmlViolationPolicy)
     */
    public void setNamePolicy(XmlViolationPolicy namePolicy) {
        this.namePolicy = namePolicy;
        if (driver != null) {
            driver.setNamePolicy(namePolicy);
            treeBuilder.setNamePolicy(namePolicy);
        }
    }
   
    /**
     * Sets the encoding sniffing heuristics.
     *
     * @param heuristics the heuristics to set
     * @see nu.validator.htmlparser.impl.Tokenizer#setHeuristics(nu.validator.htmlparser.common.Heuristics)
     */
    public void setHeuristics(Heuristics heuristics) {
        this.heuristics = heuristics;
        if (driver != null) {
            driver.setHeuristics(heuristics);
        }
    }
   
    public Heuristics getHeuristics() {
        return this.heuristics;
    }

    /**
     * This is a catch-all convenience method for setting name, xmlns, content space,
     * content non-XML char and comment policies in one go. This does not affect the
     * streamability policy or doctype reporting.
     *
     * @param xmlPolicy
     */
    public void setXmlPolicy(XmlViolationPolicy xmlPolicy) {
        setNamePolicy(xmlPolicy);
        setXmlnsPolicy(xmlPolicy == XmlViolationPolicy.FATAL ? XmlViolationPolicy.ALTER_INFOSET : xmlPolicy);
        setContentSpacePolicy(xmlPolicy);
        setContentNonXmlCharPolicy(xmlPolicy);
        setCommentPolicy(xmlPolicy);
    }

    /**
     * The policy for non-NCName element and attribute names.
     *
     * @return the namePolicy
     */
    public XmlViolationPolicy getNamePolicy() {
        return namePolicy;
    }

    /**
     * Does nothing.
     * @deprecated
     */
    public void setBogusXmlnsPolicy(
            XmlViolationPolicy bogusXmlnsPolicy) {
    }

    /**
     * Returns <code>XmlViolationPolicy.ALTER_INFOSET</code>.
     * @deprecated
     * @return <code>XmlViolationPolicy.ALTER_INFOSET</code>
     */
    public XmlViolationPolicy getBogusXmlnsPolicy() {
        return XmlViolationPolicy.ALTER_INFOSET;
    }
   
    public void addCharacterHandler(CharacterHandler characterHandler) {
        this.characterHandlers.add(characterHandler);
        if (driver != null) {
            driver.addCharacterHandler(characterHandler);
        }
    }
}
TOP

Related Classes of nu.validator.htmlparser.sax.HtmlParser

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.