Package org.apache.xerces.parsers

Source Code of org.apache.xerces.parsers.AbstractDOMParser

/*
* The Apache Software License, Version 1.1
*
*
* Copyright (c) 2001 The Apache Software Foundation. 
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
*    notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
*    notice, this list of conditions and the following disclaimer in
*    the documentation and/or other materials provided with the
*    distribution.
*
* 3. The end-user documentation included with the redistribution,
*    if any, must include the following acknowledgment: 
*       "This product includes software developed by the
*        Apache Software Foundation (http://www.apache.org/)."
*    Alternately, this acknowledgment may appear in the software itself,
*    if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Xerces" and "Apache Software Foundation" must
*    not be used to endorse or promote products derived from this
*    software without prior written permission. For written
*    permission, please contact apache@apache.org.
*
* 5. Products derived from this software may not be called "Apache",
*    nor may "Apache" appear in their name, without prior written
*    permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation and was
* originally based on software copyright (c) 1999, International
* Business Machines, Inc., http://www.apache.org.  For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/

package org.apache.xerces.parsers;

import org.apache.xerces.dom.DocumentImpl;
import org.apache.xerces.dom.EntityReferenceImpl;
import org.apache.xerces.dom.TextImpl;

import org.apache.xerces.xni.QName;
import org.apache.xerces.xni.XMLAttributes;
import org.apache.xerces.xni.XMLLocator;
import org.apache.xerces.xni.XMLString;
import org.apache.xerces.xni.XNIException;
import org.apache.xerces.xni.parser.XMLParserConfiguration;

import org.w3c.dom.Attr;
import org.w3c.dom.CDATASection;
import org.w3c.dom.Comment;
import org.w3c.dom.Document;
import org.w3c.dom.DocumentType;
import org.w3c.dom.DOMImplementation;
import org.w3c.dom.Element;
import org.w3c.dom.EntityReference;
import org.w3c.dom.Node;
import org.w3c.dom.ProcessingInstruction;
import org.w3c.dom.Text;

/**
* This is the base class of all DOM parsers. It implements the XNI
* callback methods to create the DOM tree. After a successful parse of
* an XML document, the DOM Document object can be queried using the
* <code>getDocument</code> method. The actual pipeline is defined in
* parser configuration.
*
* @author Stubs generated by DesignDoc on Mon Sep 11 11:10:57 PDT 2000
* @author Arnaud Le Hors, IBM
* @author Andy Clark, IBM
*
* @version $Id: AbstractDOMParser.java,v 1.2 2001/08/23 00:35:31 lehors Exp $
*/
public abstract class AbstractDOMParser
    extends AbstractXMLDocumentParser {

    //
    // Constants
    //

    /** Feature id: create entity ref nodes. */
    protected static final String CREATE_ENTITY_REF_NODES =
        "http://apache.org/xml/features/dom/create-entity-ref-nodes";

    /** Feature id: include ignorable whitespace. */
    protected static final String INCLUDE_IGNORABLE_WHITESPACE =
        "http://apache.org/xml/features/dom/include-ignorable-whitespace";

    private static final boolean DEBUG_ENTITY_REF = false;
   
    //
    // Data
    //

    // features

    /** Create entity reference nodes. */
    protected boolean fCreateEntityRefNodes;

    /** Include ignorable whitespace. */
    protected boolean fIncludeIgnorableWhitespace;

    // dom information

    /** The document. */
    protected Document fDocument;

    /** The default Xerces document implementation, if used. */
    protected DocumentImpl fDocumentImpl;

    /** Current node. */
    protected Node fCurrentNode;

    // state

    /** True if inside document. */
    protected boolean fInDocument;

    /** True if inside CDATA section. */
    protected boolean fInCDATASection;

    // data
   
    /** Attribute QName. */
    private QName fAttrQName = new QName();

    //
    // Constructors
    //

    /** Default constructor. */
    protected AbstractDOMParser(XMLParserConfiguration config) {
        super(config);

        // add recognized features
        final String[] recognizedFeatures = {
            CREATE_ENTITY_REF_NODES,
            INCLUDE_IGNORABLE_WHITESPACE,
        };
        fConfiguration.addRecognizedFeatures(recognizedFeatures);

        // set default values
        fConfiguration.setFeature(CREATE_ENTITY_REF_NODES, true);
        fConfiguration.setFeature(INCLUDE_IGNORABLE_WHITESPACE, true);

    } // <init>(XMLParserConfiguration)

    //
    // Public methods
    //

    /** Returns the DOM document object. */
    public Document getDocument() {
        return fDocument;
    } // getDocument():Document

    //
    // XMLDocumentParser methods
    //

    /**
     * Resets the parser state.
     *
     * @throws SAXException Thrown on initialization error.
     */
    public void reset() throws XNIException {
        super.reset();

        // get feature state
        fCreateEntityRefNodes = fConfiguration.getFeature(CREATE_ENTITY_REF_NODES);
        fIncludeIgnorableWhitespace = fConfiguration.getFeature(INCLUDE_IGNORABLE_WHITESPACE);

        // reset dom information
        fDocument = null;
        fCurrentNode = null;

        // reset state information
        fInDocument = false;
        fInDTD = false;
        fInCDATASection = false;

    } // reset()

    //
    // XMLDocumentHandler methods
    //

    /**
     * This method notifies of the start of an entity. The DTD has the
     * pseudo-name of "[dtd]; parameter entity names start with '%'; and
     * general entity names are just the entity name.
     * <p>
     * <strong>Note:</strong> Since the DTD is an entity, the handler
     * will be notified of the start of the DTD entity by calling the
     * startEntity method with the entity name "[dtd]" <em>before</em> calling
     * the startDTD method.
     * <p>
     * <strong>Note:</strong> This method is not called for entity references
     * appearing as part of attribute values.
     *
     * @param name     The name of the entity.
     * @param publicId The public identifier of the entity if the entity
     *                 is external, null otherwise.
     * @param systemId The system identifier of the entity if the entity
     *                 is external, null otherwise.
     * @param encoding The auto-detected IANA encoding name of the entity
     *                 stream. This value will be null in those situations
     *                 where the entity encoding is not auto-detected (e.g.
     *                 internal parameter entities).
     *
     * @throws XNIException Thrown by handler to signal an error.
     */
    public void startEntity(String name, String publicId, String systemId,
                            String baseSystemId,
                            String encoding) throws XNIException {

        if (fInDocument && !fInDTD && fCreateEntityRefNodes) {
            EntityReference entityRef = fDocument.createEntityReference(name);
            fCurrentNode.appendChild(entityRef);
            fCurrentNode = entityRef;
        }

    } // startEntity(String,String,String,String)

    /**
     * A comment.
     *
     * @param text The text in the comment.
     *
     * @throws XNIException Thrown by application to signal an error.
     */
    public void comment(XMLString text) throws XNIException {

        Comment comment = fDocument.createComment(text.toString());
        fCurrentNode.appendChild(comment);

    } // comment(XMLString)

    /**
     * A processing instruction. Processing instructions consist of a
     * target name and, optionally, text data. The data is only meaningful
     * to the application.
     * <p>
     * Typically, a processing instruction's data will contain a series
     * of pseudo-attributes. These pseudo-attributes follow the form of
     * element attributes but are <strong>not</strong> parsed or presented
     * to the application as anything other than text. The application is
     * responsible for parsing the data.
     *
     * @param target The target.
     * @param data   The data or null if none specified.
     *
     * @throws XNIException Thrown by handler to signal an error.
     */
    public void processingInstruction(String target, XMLString data)
        throws XNIException {

        ProcessingInstruction pi = fDocument.createProcessingInstruction(target, data.toString());
        fCurrentNode.appendChild(pi);

    } // processingInstruction(String,XMLString)

    /**
     * The start of the document.
     *
     * @param systemId The system identifier of the entity if the entity
     *                 is external, null otherwise.
     * @param encoding The auto-detected IANA encoding name of the entity
     *                 stream. This value will be null in those situations
     *                 where the entity encoding is not auto-detected (e.g.
     *                 internal entities or a document entity that is
     *                 parsed from a java.io.Reader).
     *    
     * @throws XNIException Thrown by handler to signal an error.
     */
    public void startDocument(XMLLocator locator, String encoding)
        throws XNIException {

        fInDocument = true;
        fDocument = new DocumentImpl();
        fDocumentImpl = (DocumentImpl)fDocument;
        fCurrentNode = fDocument;
        // set DOM error checking off
        fDocumentImpl.setErrorChecking(false);

    } // startDocument(String,String)

    /**
     * Notifies of the presence of the DOCTYPE line in the document.
     *
     * @param rootElement The name of the root element.
     * @param publicId    The public identifier if an external DTD or null
     *                    if the external DTD is specified using SYSTEM.
     * @param systemId    The system identifier if an external DTD, null
     *                    otherwise.
     *
     * @throws XNIException Thrown by handler to signal an error.
     */
    public void doctypeDecl(String rootElement, String publicId, String systemId)
        throws XNIException {
       
        DocumentImpl docimpl = (DocumentImpl)fDocument;
        DocumentType doctype = docimpl.createDocumentType(rootElement, publicId, systemId);
        fCurrentNode.appendChild(doctype);

    } // doctypeDecl(String,String,String)

    /**
     * The start of an element. If the document specifies the start element
     * by using an empty tag, then the startElement method will immediately
     * be followed by the endElement method, with no intervening methods.
     *
     * @param element    The name of the element.
     * @param attributes The element attributes.
     *
     * @throws XNIException Thrown by handler to signal an error.
     */
    public void startElement(QName element, XMLAttributes attributes)
        throws XNIException {

        Element elementNode = element.prefix != null
                            ? fDocument.createElementNS(element.uri, element.rawname)
                            : fDocument.createElement(element.rawname);
        int attrCount = attributes.getLength();
        for (int i = 0; i < attrCount; i++) {
            attributes.getName(i, fAttrQName);
            Attr attr = fAttrQName.prefix != null
                      ? fDocument.createAttributeNS(fAttrQName.uri, fAttrQName.rawname)
                      : fDocument.createAttribute(fAttrQName.rawname);
            attr.setNodeValue(attributes.getValue(i));
            // REVISIT: Handle entities in attribute value.
            elementNode.setAttributeNode(attr);

            // build entity references
            int entityCount = attributes.getEntityCount(i);
            if (entityCount > 0) {
                Text text = (Text)attr.getFirstChild();
                buildAttrEntityRefs(text, attributes, i, entityCount, 0, 0);
            }
        }
        fCurrentNode.appendChild(elementNode);
        fCurrentNode = elementNode;

    } // startElement(QName,XMLAttributes)

    /**
     * Character content.
     *
     * @param text The content.
     *
     * @throws XNIException Thrown by handler to signal an error.
     */
    public void characters(XMLString text) throws XNIException {

        if (fInCDATASection) {
            CDATASection cdataSection = (CDATASection)fCurrentNode;
            cdataSection.appendData(text.toString());
        }
        else if (!fInDTD) {
            Node child = fCurrentNode.getLastChild();
            if (child != null && child.getNodeType() == Node.TEXT_NODE) {
                Text textNode = (Text)child;
                textNode.appendData(text.toString());
            }
            else {
                Text textNode = fDocument.createTextNode(text.toString());
                fCurrentNode.appendChild(textNode);
            }
        }

    } // characters(XMLString)

    /**
     * Ignorable whitespace. For this method to be called, the document
     * source must have some way of determining that the text containing
     * only whitespace characters should be considered ignorable. For
     * example, the validator can determine if a length of whitespace
     * characters in the document are ignorable based on the element
     * content model.
     *
     * @param text The ignorable whitespace.
     *
     * @throws XNIException Thrown by handler to signal an error.
     */
    public void ignorableWhitespace(XMLString text) throws XNIException {

        if (!fIncludeIgnorableWhitespace) {
            return;
        }

        if (fInCDATASection) {
            CDATASection cdataSection = (CDATASection)fCurrentNode;
            cdataSection.appendData(text.toString());
            return;
        }
       
        Node child = fCurrentNode.getLastChild();
        if (child != null && child.getNodeType() == Node.TEXT_NODE) {
            Text textNode = (Text)child;
            textNode.appendData(text.toString());
        }
        else {
            Text textNode = fDocument.createTextNode(text.toString());
            if (fDocumentImpl != null) {
                TextImpl textNodeImpl = (TextImpl)textNode;
                textNodeImpl.setIgnorableWhitespace(true);
            }
            fCurrentNode.appendChild(textNode);
        }

    } // ignorableWhitespace(XMLString)

    /**
     * The end of an element.
     *
     * @param element The name of the element.
     *
     * @throws XNIException Thrown by handler to signal an error.
     */
    public void endElement(QName element) throws XNIException {

        fCurrentNode = fCurrentNode.getParentNode();

    } // endElement(QName)

    /**
     * The end of a namespace prefix mapping. This method will only be
     * called when namespace processing is enabled.
     *
     * @param prefix The namespace prefix.
     *
     * @throws XNIException Thrown by handler to signal an error.
     */
    public void endPrefixMapping(String prefix) throws XNIException {
    } // endPrefixMapping(String)

    /**
     * The start of a CDATA section.
     *
     * @throws XNIException Thrown by handler to signal an error.
     */
    public void startCDATA() throws XNIException {

        fInCDATASection = true;
        CDATASection cdataSection = fDocument.createCDATASection("");
        fCurrentNode.appendChild(cdataSection);
        fCurrentNode = cdataSection;

    } // startCDATA()

    /**
     * The end of a CDATA section.
     *
     * @throws XNIException Thrown by handler to signal an error.
     */
    public void endCDATA() throws XNIException {

        fInCDATASection = false;
        fCurrentNode = fCurrentNode.getParentNode();

    } // endCDATA()

    /**
     * The end of the document.
     *
     * @throws XNIException Thrown by handler to signal an error.
     */
    public void endDocument() throws XNIException {

        // set DOM error checking back on
        if (fDocumentImpl != null) {
            fDocumentImpl.setErrorChecking(true);
        }
        fInDocument = false;
        fCurrentNode = null;

    } // endDocument()

    /**
     * This method notifies the end of an entity. The DTD has the pseudo-name
     * of "[dtd]; parameter entity names start with '%'; and general entity
     * names are just the entity name.
     * <p>
     * <strong>Note:</strong> Since the DTD is an entity, the handler
     * will be notified of the end of the DTD entity by calling the
     * endEntity method with the entity name "[dtd]" <em>after</em> calling
     * the endDTD method.
     * <p>
     * <strong>Note:</strong> This method is not called for entity references
     * appearing as part of attribute values.
     *
     * @param name The name of the entity.
     *
     * @throws XNIException Thrown by handler to signal an error.
     */
    public void endEntity(String name) throws XNIException {

        if (fInDocument && !fInDTD && fCreateEntityRefNodes) {
            fCurrentNode = fCurrentNode.getParentNode();
        }

    } // endEntity(String)

    //
    // Protected methods
    //

    /**
     * Builds entity references in attribute values. This method is
     * recursive because entity references can contain entity
     * references.
     *
     * @param text        The text node that needs to be split.
     * @param attributes  The attribute information.
     * @param attrIndex   The attribute index.
     * @param entityCount The number of entities. This is passed as
     *                    a convenience so that this method doesn't
     *                    have to call XMLAttributes#getEntityCount.
     *                    The caller already has the entity count so
     *                    it's kind of a waste to make each invocation
     *                    of this method query it again.
     * @param entityIndex The entity index that this method invocation
     *                    should start building from.
     * @param textOffset  The offset at which the start of this text
     *                    should be considered. We need this to adjust
     *                    the offset since the characters in the current
     *                    text string are indexed from zero.
     *
     * @return Returns the number of entities built by this method.
     */
    protected int buildAttrEntityRefs(Text text, XMLAttributes attributes,
                                      int attrIndex,
                                      int entityCount, int entityIndex,
                                      int textOffset) {

        // iterate over entities
        String textString = text.getNodeValue();
        int textLength = textString.length();
        int i = entityIndex;
        while (i < entityCount) {

            // get entity information
            String entityName = attributes.getEntityName(attrIndex, i);
            int entityOffset = attributes.getEntityOffset(attrIndex, i);
            int entityLength = attributes.getEntityLength(attrIndex, i);

            // is this entity not in this text?
            //

            if (DEBUG_ENTITY_REF) {
                System.out.println("==>"+textString);
                System.out.println(i+". &"+entityName+";");
            }
            // is this entity not in this text?
            //
            int tempLength= text.getNodeValue().length();
            if ( tempLength == 0 || entityOffset >= textOffset +  tempLength) {
                break;
            }
        
            // split text into 3 parts; first part remains the
            // text node that was passed into this method
            Text text1 = text.splitText(entityOffset - textOffset);
            Text text2 = text1.splitText(entityLength);

            if (DEBUG_ENTITY_REF) {
                System.out.println(text.getNodeValue()+"->"+text1.getNodeValue()+"->"+text2.getNodeValue());
            }

            // create entity reference
            EntityReference entityRef = fDocument.createEntityReference(entityName);
            ((EntityReferenceImpl)entityRef).setReadOnly(false, false);

            // insert entity ref into tree and append middle text
            Node parent = text.getParentNode();
            parent.replaceChild(entityRef, text1);
            entityRef.appendChild(text1);

            // see if there are any nested entity refs
            if (i < entityCount - 1) {
                int nextEntityOffset = attributes.getEntityOffset(attrIndex, i + 1);
                if (nextEntityOffset < entityOffset + entityLength) {
                    // NOTE: Notice that we're incrementing the entity
                    //       index variable. Since the following call will
                    //       "consume" some of the entities.
                    i += buildAttrEntityRefs(text1, attributes, attrIndex, entityCount, i + 1, entityOffset);
                }
            }
            ((EntityReferenceImpl)entityRef).setReadOnly(true, false);

            // adjust text node
            textOffset += text.getLength() + entityLength;
            text = text2;
           
            // increment and keep going
            i++;
        }
       
        // return number of entities we handled
        return i - entityIndex;

    } // buildAttrEntityRefs(Text,XMLAttributes,int,int,int,int):int

} // class AbstractDOMParser
TOP

Related Classes of org.apache.xerces.parsers.AbstractDOMParser

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.