Source Code of net.sf.saxon.event.ReceivingContentHandler$LocalLocator

package net.sf.saxon.event;


import net.sf.saxon.Configuration;
import net.sf.saxon.FeatureKeys;
import net.sf.saxon.expr.ExpressionLocation;
import net.sf.saxon.om.NameChecker;
import net.sf.saxon.om.NamePool;
import net.sf.saxon.om.NodeInfo;
import net.sf.saxon.om.StandardNames;
import net.sf.saxon.tinytree.CharSlice;
import net.sf.saxon.tinytree.CompressedWhitespace;
import net.sf.saxon.trans.XPathException;
import net.sf.saxon.type.ValidationException;
import net.sf.saxon.value.Whitespace;
import org.xml.sax.*;
import org.xml.sax.ext.Attributes2;
import org.xml.sax.ext.LexicalHandler;


import javax.xml.transform.Result;
import javax.xml.transform.TransformerException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.HashMap;


/**
  * ReceivingContentHandler is a glue class that provides a standard SAX ContentHandler
  * interface to a Saxon Receiver. To achieve this it needs to map names supplied
  * as strings to numeric name codes, for which purpose it needs access to a name
  * pool. The class also performs the function of assembling adjacent text nodes.
  * <p>The class was previously named ContentEmitter.</p>
  * <p>If the input stream contains the processing instructions assigned by JAXP to switch
  * disable-output-escaping on or off, these will be reflected in properties set in the corresponding
  * characters events. In this case adjacent text nodes will not be combined.
  * @author Michael H. Kay
  */


public class ReceivingContentHandler
        implements ContentHandler, LexicalHandler, DTDHandler //, SaxonLocator, SourceLocationProvider
{
    private NamePool pool;
    private PipelineConfiguration pipe;
    private Receiver receiver;
    private boolean inDTD = false;  // true while processing the DTD
    private Locator locator;        // a SAX Locator
    private LocalLocator localLocator = new LocalLocator();


    // buffer for accumulating character data, until the next markup event is received


    private char[] buffer = new char[512];
    private int charsUsed = 0;
    private CharSlice slice = new CharSlice(buffer, 0, 0);


    // array for accumulating namespace information


    private int[] namespaces = new int[20];
    private int namespacesUsed = 0;


    // determine whether ignorable whitespace is ignored


    private boolean ignoreIgnorable = false;


    // determine whether DTD attribute types are retained


    private boolean retainDTDAttributeTypes = false;


    // determine whether DTD attribute value defaults should be suppressed


    private boolean suppressDTDAttributeDefaults = false;


    // indicate that escaping is allowed to be disabled using the JAXP-defined processing instructions


    private boolean allowDisableOutputEscaping = false;


    // indicate that escaping is disabled


    private boolean escapingDisabled = false;


    /**
     * A local cache is used to avoid allocating namecodes for the same name more than once.
     * This reduces contention on the NamePool. This is a two-level hashmap: the first level
     * has the namespace URI as its key, and returns a HashMap which maps lexical QNames to integer
     * namecodes.
     */


    private HashMap<String, HashMap<Object, Integer>> cache =
            new HashMap<String, HashMap<Object, Integer>>(10);
    private HashMap<Object, Integer> noNamespaceMap;


//    private static Class attributes2class;
//    private static Method isSpecifiedMethod;




    /**
    * Create a ReceivingContentHandler and initialise variables
    */


    public ReceivingContentHandler() {
    }


    /**
     * Set the ReceivingContentHandler to its initial state, except for the local name cache,
     * which is retained
     */


    public void reset() {
        pipe = null;
        pool = null;
        receiver = null;
        ignoreIgnorable = false;
        retainDTDAttributeTypes = false;
        charsUsed = 0;
        slice.setLength(0);
        namespacesUsed = 0;
        locator = null;
        allowDisableOutputEscaping = false;
        escapingDisabled = false;
    }


    /**
     * Set the receiver to which events are passed. ReceivingContentHandler is essentially a translator
     * that takes SAX events as input and produces Saxon Receiver events as output; these Receiver events
     * are passed to the supplied Receiver
     * @param receiver the Receiver of events
     */


    public void setReceiver(Receiver receiver) {
    this.receiver = receiver;
        //receiver = new TracingFilter(receiver);
  }


    /**
     * Set the pipeline configuration
     * @param pipe the pipeline configuration. This holds a reference to the Saxon configuration, as well as
     * information that can vary from one pipeline to another, for example the LocationProvider which resolves
     * the location of events in a source document
     */


    public void setPipelineConfiguration(PipelineConfiguration pipe) {
        this.pipe = pipe;
        pipe.setLocationProvider(localLocator);
        Configuration config = pipe.getConfiguration();
        pool = config.getNamePool();
        ignoreIgnorable = pipe.getParseOptions().getStripSpace() != Whitespace.NONE;
        retainDTDAttributeTypes = config.isRetainDTDAttributeTypes();
        suppressDTDAttributeDefaults = !pipe.isExpandAttributeDefaults();
        Boolean b = (Boolean)config.getConfigurationProperty(FeatureKeys.USE_PI_DISABLE_OUTPUT_ESCAPING);
        allowDisableOutputEscaping = b.booleanValue();
    }


    /**
     * Get the pipeline configuration
     * @return the pipeline configuration as supplied to
    {@link #setPipelineConfiguration(PipelineConfiguration)}
     */


    public PipelineConfiguration getPipelineConfiguration() {
        return pipe;
    }


    /**
     * Get the Configuration object
     * @return the Saxon configuration
     */


    public Configuration getConfiguration() {
        return pipe.getConfiguration();
    }


    /**
     * Set whether "ignorable whitespace" should be ignored. This method is effective only
     * if called after setPipelineConfiguration, since the default value is taken from the
     * configuration.
     * @param ignore true if ignorable whitespace (whitespace in element content that is notified
     * via the {@link #ignorableWhitespace(char[], int, int)} method) should be ignored, false if
     * it should be treated as ordinary text.
     */


    public void setIgnoreIgnorableWhitespace(boolean ignore) {
        ignoreIgnorable = ignore;
    }


    /**
     * Determine whether "ignorable whitespace" is ignored. This returns the value that was set
     * using {@link #setIgnoreIgnorableWhitespace} if that has been called; otherwise the value
     * from the configuration.
     * @return true if ignorable whitespace is being ignored
     */


    public boolean isIgnoringIgnorableWhitespace() {
        return ignoreIgnorable;
    }


    /**
     * Receive notification of the beginning of a document.
     */


    public void startDocument () throws SAXException {
        //System.err.println("ReceivingContentHandler#startDocument");
        try {
            charsUsed = 0;
            namespacesUsed = 0;
            pipe.setLocationProvider(localLocator);
            receiver.setPipelineConfiguration(pipe);
            receiver.open();
            receiver.startDocument(0);
        } catch (XPathException err) {
            throw new SAXException(err);
        }
    }


    /**
    * Receive notification of the end of a document
    */


    public void endDocument () throws SAXException {
        // System.err.println("RCH: end document");
        try {
            flush();
            receiver.endDocument();
            receiver.close();
        } catch (ValidationException err) {
            err.setLocator(locator);
            throw new SAXException(err);
        } catch (XPathException err) {
            throw new SAXException(err);
        }
    }


    /**
     * Supply a locator that can be called to give information about location in the source document
     * being parsed.
    */


    public void setDocumentLocator (Locator locator) {
      this.locator = locator;
    }


    /**
    * Notify a namespace prefix to URI binding
    */


    public void startPrefixMapping(String prefix, String uri) throws SAXException {
        //System.err.println("StartPrefixMapping " + prefix + "=" + uri);
        if (prefix.equals("xmlns")) {
            // the binding xmlns:xmlns="http://www.w3.org/2000/xmlns/"
            // should never be reported, but it's been known to happen
            return;
        }
      if (namespacesUsed >= namespaces.length) {
        int[] n2 = new int[namespacesUsed * 2];
        System.arraycopy(namespaces, 0, n2, 0, namespacesUsed);
        namespaces = n2;
      }
      namespaces[namespacesUsed++] = pool.allocateNamespaceCode(prefix, uri);
    }


    /**
    * Notify that a namespace binding is going out of scope
    */


    public void endPrefixMapping(String prefix) throws SAXException {}


    /**
    * Notify an element start event, including all the associated attributes
    */


    public void startElement (String uri, String localname, String rawname, Attributes atts)
    throws SAXException
    {
//        System.err.println("ReceivingContentHandler#startElement " +
//                uri + "," + localname + "," + rawname +
//                " at line " + locator.getLineNumber() + " of " + locator.getSystemId());
        //for (int a=0; a<atts.getLength(); a++) {
        //     System.err.println("  Attribute " + atts.getURI(a) + "/" + atts.getLocalName(a) + "/" + atts.getQName(a));
        //}
        try {
            flush();


        int nameCode = getNameCode(uri, localname, rawname);
        receiver.startElement(nameCode, StandardNames.XS_UNTYPED, 0, ReceiverOptions.NAMESPACE_OK);


        for (int n=0; n<namespacesUsed; n++) {
            receiver.namespace(namespaces[n], 0);
        }


        for (int a=0; a<atts.getLength(); a++) {
                int properties = ReceiverOptions.NAMESPACE_OK;
                String qname = atts.getQName(a);
                if (qname.startsWith("xmlns") && (qname.equals("xmlns") || qname.startsWith("xmlns:"))) {
                    // We normally configure the parser so that it doesn't notify namespaces as attributes.
                    // But when running as a TransformerHandler, we have no control over the feature settings
                    // of the sender of the events. So we filter them out, just in case. There might be cases
                    // where we ought not just to ignore them, but to handle them as namespace events, but
                    // we'll cross that bridge when we come to it.
                    continue;
                }
                // Note JDK15 dependency on Attributes2.isSpecified()
                if (suppressDTDAttributeDefaults
                        && atts instanceof Attributes2
                        && !((Attributes2)atts).isSpecified(qname)) {
                    continue;
                }
//                if (suppressDTDAttributeDefaults /*&& attributes2class == null*/ ) {
//                    try {
//                        attributes2class = getConfiguration().getClass("org.xml.sax.ext.Attributes2", false, null);
//                        //noinspection RedundantArrayCreation
//                        isSpecifiedMethod = attributes2class.getMethod("isSpecified", new Class[]{String.class});
//                    } catch (XPathException e) {
//                        suppressDTDAttributeDefaults = false;
//                        attributes2class = null;
//                    } catch (NoSuchMethodException e) {
//                        suppressDTDAttributeDefaults = false;
//                        attributes2class = null;
//                    }
//
//                    if (suppressDTDAttributeDefaults) {
//                        if (atts instanceof Attributes2) {
//                            try {
//                                //noinspection RedundantArrayCreation
//                                Boolean specified = (Boolean)isSpecifiedMethod.invoke(atts, new Object[]{qname});
//                                //if (!((Attributes2)atts).isSpecified(a)) {
//                                if (!specified.booleanValue()) {
//                                    // skip this attribute
//                                    continue;
//                                }
//                            } catch (IllegalAccessException e) {
//                                suppressDTDAttributeDefaults = false;
//                            } catch (InvocationTargetException e) {
//                                suppressDTDAttributeDefaults = false;
//                            }
//                        } else {
//                            // XML parser doesn't report whether attributes were defaulted, so we give up
//                            suppressDTDAttributeDefaults = false;
//                        }
//                    }
//                }
                int attCode = getNameCode(atts.getURI(a), atts.getLocalName(a), atts.getQName(a));
            String type = atts.getType(a);
            int typeCode = StandardNames.XS_UNTYPED_ATOMIC;
                if (retainDTDAttributeTypes) {
                    if (type.equals("CDATA")) {
                        // common case, no action
                    } else if (type.equals("ID")) {
                        typeCode = StandardNames.XS_ID;
                    } else if (type.equals("IDREF")) {
                        typeCode = StandardNames.XS_IDREF;
                    } else if (type.equals("IDREFS")) {
                        typeCode = StandardNames.XS_IDREFS;
                    } else if (type.equals("NMTOKEN")) {
                        typeCode = StandardNames.XS_NMTOKEN;
                    } else if (type.equals("NMTOKENS")) {
                        typeCode = StandardNames.XS_NMTOKENS;
                    } else if (type.equals("ENTITY")) {
                        typeCode = StandardNames.XS_ENTITY;
                    } else if (type.equals("ENTITIES")) {
                        typeCode = StandardNames.XS_ENTITIES;
                    }
                } else {
                    if (type.equals("CDATA")) {
                        // common case, do nothing
                    } else if (type.equals("ID")) {
                        typeCode = StandardNames.XS_ID | NodeInfo.IS_DTD_TYPE;
                    } else if (type.equals("IDREF")) {
                        typeCode = StandardNames.XS_IDREF | NodeInfo.IS_DTD_TYPE;
                    } else if (type.equals("IDREFS")) {
                        typeCode = StandardNames.XS_IDREFS | NodeInfo.IS_DTD_TYPE;
                    }
                }


            receiver.attribute(attCode, typeCode, atts.getValue(a), 0, properties);
        }


        receiver.startContent();


            namespacesUsed = 0;
        } catch (ValidationException err) {
            if (err.getLineNumber() == -1) {
                err.setLocator(locator);
            }
            throw new SAXException(err);
        } catch (XPathException err) {
            throw new SAXException(err);
        }
    }


    /**
     * Get the NamePool name code associated with a name appearing in the document
     * @param uri the namespace URI
     * @param localname the local part of the name
     * @param rawname the lexical QName
     * @return the NamePool name code, newly allocated if necessary
     * @throws SAXException if the information supplied by the SAX parser is insufficient
     */


    private int getNameCode(String uri, String localname, String rawname) throws SAXException {
        // System.err.println("URI=" + uri + " local=" + " raw=" + rawname);
        // The XML parser isn't required to report the rawname (qname), though all known parsers do.
        // If none is provided, we give up
        if (rawname.length() == 0) {
            throw new SAXException("Saxon requires an XML parser that reports the QName of each element");
        }
        // It's also possible (especially when using a TransformerHandler) that the parser
        // has been configured to report the QName rather than the localname+URI
        if (localname.length() == 0) {
            throw new SAXException("Parser configuration problem: namespace reporting is not enabled");
        }


        // Following code maintains a local cache to remember all the namecodes that have been
        // allocated, which reduces contention on the NamePool. It also avoids parsing the lexical QName
        // when the same name is used repeatedly. We also get a tiny improvement by avoiding the first hash
        // table lookup for names in the null namespace.


        HashMap<Object, Integer> map2 = (uri.length() == 0 ? noNamespaceMap : cache.get(uri));
        if (map2 == null) {
            map2 = new HashMap<Object, Integer>(50);
            cache.put(uri, map2);
            if (uri.length() == 0) {
                noNamespaceMap = map2;
            }
        }


        Integer n = map2.get(rawname);
        // we use the rawname (qname) rather than the local name because we want a namecode rather than
        // a fingerprint - that is, the prefix matters.
        if (n == null) {
            String prefix = NameChecker.getPrefix(rawname);
            int nc = pool.allocate(prefix, uri, localname);
            n = new Integer(nc);
            map2.put(rawname, n);
            return nc;
        } else {
            return n.intValue();
        }


    }






    /**
    * Report the end of an element (the close tag)
    */


    public void endElement (String uri, String localname, String rawname) throws SAXException {
        //System.err.println("ReceivingContentHandler#End element " + rawname);
        try {
            flush();
            receiver.endElement();
        } catch (ValidationException err) {
            err.maybeSetLocation(ExpressionLocation.makeFromSax(locator));
            if (!err.hasBeenReported()) {
                try {
                    pipe.getErrorListener().fatalError(err);
                } catch (TransformerException e) {
                    //
                }
            }
            err.setHasBeenReported(true);
            throw new SAXException(err);
        } catch (XPathException err) {
            throw new SAXException(err);
        }
    }


    /**
     * Report character data. Note that contiguous character data may be reported as a sequence of
     * calls on this method, with arbitrary boundaries
    */


    public void characters (char ch[], int start, int length) throws SAXException {
        // System.err.println("characters (" + length + ")");
        // need to concatenate chunks of text before we can decide whether a node is all-white


        while (charsUsed + length > buffer.length) {
            char[] newbuffer = new char[buffer.length*2];
            System.arraycopy(buffer, 0, newbuffer, 0, charsUsed);
            buffer = newbuffer;
            slice = new CharSlice(buffer, 0, 0);
        }
        System.arraycopy(ch, start, buffer, charsUsed, length);
        charsUsed += length;
    }


    /**
     * Report character data classified as "Ignorable whitespace", that is, whitespace text nodes
     * appearing as children of elements with an element-only content model
    */


    public void ignorableWhitespace (char ch[], int start, int length) throws SAXException {
        if (!ignoreIgnorable) {
            characters(ch, start, length);
        }
    }


    /**
    * Notify the existence of a processing instruction
    */


    public void processingInstruction (String name, String remainder) throws SAXException {
        try {
            flush();
            if (!inDTD) {
                if (name==null) {
                  // trick used by the old James Clark xp parser to notify a comment
                  comment(remainder.toCharArray(), 0, remainder.length());
                } else {
                    // some parsers allow through PI names containing colons
                    if (!getConfiguration().getNameChecker().isValidNCName(name)) {
                        throw new SAXException("Invalid processing instruction name (" + name + ')');
                    }
                    if (allowDisableOutputEscaping) {
                        if (name.equals(Result.PI_DISABLE_OUTPUT_ESCAPING)) {
                            //flush();
                            escapingDisabled = true;
                            return;
                        } else if (name.equals(Result.PI_ENABLE_OUTPUT_ESCAPING)) {
                            //flush();
                            escapingDisabled = false;
                            return;
                        }
                    }
                    receiver.processingInstruction(name, Whitespace.removeLeadingWhitespace(remainder), 0, 0);
                }
            }
        } catch (XPathException err) {
            throw new SAXException(err);
        }
    }


    /**
     * Notify the existence of a comment. Note that in SAX this is part of LexicalHandler interface
     * rather than the ContentHandler interface.
    */


    public void comment (char ch[], int start, int length) throws SAXException {
        try {
            flush();
            if (!inDTD) {
              receiver.comment(new CharSlice(ch, start, length), 0, 0);
            }
        } catch (XPathException err) {
            throw new SAXException(err);
        }
    }


    /**
    * Flush buffer for accumulated character data
    */


    private void flush() throws XPathException {
        if (charsUsed > 0) {
            slice.setLength(charsUsed);
            CharSequence cs = CompressedWhitespace.compress(slice);
            receiver.characters(cs, 0,
                    escapingDisabled ? ReceiverOptions.DISABLE_ESCAPING : ReceiverOptions.WHOLE_TEXT_NODE);
            charsUsed = 0;
            escapingDisabled = false;
        }
    }


    /**
     * Notify a skipped entity. Saxon ignores this event
     */


    public void skippedEntity(String name) throws SAXException {}


    // No-op methods to satisfy lexical handler interface


  /**
   * Register the start of the DTD. Saxon ignores the DTD; however, it needs to know when the DTD starts and
     * ends so that it can ignore comments in the DTD, which are reported like any other comment, but which
     * are skipped because they are not part of the XPath data model
  */


    public void startDTD (String name, String publicId, String systemId) throws SAXException {
    inDTD = true;
    }


  /**
  * Register the end of the DTD. Comments in the DTD are skipped because they
  * are not part of the XPath data model
  */


    public void endDTD () throws SAXException {
    inDTD = false;
    }


    public void startEntity (String name) throws SAXException {}


    public void endEntity (String name)  throws SAXException {}


    public void startCDATA () throws SAXException {}


    public void endCDATA ()  throws SAXException {}


    //////////////////////////////////////////////////////////////////////////////
    // Implement DTDHandler interface
    //////////////////////////////////////////////////////////////////////////////




    public void notationDecl(       String name,
                                    String publicId,
                                    String systemId) throws SAXException
    {}




    public void unparsedEntityDecl( String name,
                                    String publicId,
                                    String systemId,
                                    String notationName) throws SAXException {
        // Some (non-conformant) SAX parsers report the systemId as written.
        // We need to turn it into an absolute URL.


        String uri = systemId;
        if (locator != null) {
            try {
                URI suppliedURI = new URI(systemId);
                if (!suppliedURI.isAbsolute()) {
                    String baseURI = locator.getSystemId();
                    if (baseURI != null) {   // See bug 2167979
                        URI absoluteURI = new URI(baseURI).resolve(systemId);
                        uri = absoluteURI.toString();
                    }
                }
            } catch (URISyntaxException err) {
                uri = systemId; // fallback
            }
        }
        try {
            receiver.setUnparsedEntity(name, uri, publicId);
        } catch (XPathException err) {
            throw new SAXException(err);
        }
    }








    private class LocalLocator implements SaxonLocator, SourceLocationProvider {


        // This class is needed to bridge a SAX Locator to a JAXP SourceLocator


        /**
         * Return the system identifier for the current document event.
         * @return A string containing the system identifier, or
         *         null if none is available.
         */


        public String getSystemId() {
            return (locator == null ? null : locator.getSystemId());
        }


        /**
         * Return the public identifier for the current document event.
         * @return A string containing the public identifier, or
         *         null if none is available.
         */


        public String getPublicId() {
            return (locator==null ? null : locator.getPublicId());
        }


        /**
         * Return the line number where the current document event ends.
         * @return The line number, or -1 if none is available.
         */


        public int getLineNumber() {
            return (locator==null ? -1 : locator.getLineNumber());
        }


        /**
         * Return the character position where the current document event ends.
         * @return The column number, or -1 if none is available.
         */


        public int getColumnNumber() {
            return (locator==null ? -1 : locator.getColumnNumber());
        }


        /**
         * Get the line number within the document or module containing a particular location
         *
         * @param locationId identifier of the location in question (as passed down the Receiver pipeline)
         * @return the line number within the document or module.
         */


        public int getLineNumber(long locationId) {
            return (locator==null ? -1 : locator.getLineNumber());
        }


        public int getColumnNumber(long locationId) {
            return (locator==null ? -1 : locator.getColumnNumber());
        } 


        /**
         * Get the URI of the document or module containing a particular location
         *
         * @param locationId identifier of the location in question (as passed down the Receiver pipeline)
         * @return the URI of the document or module.
         */


        public String getSystemId(long locationId) {
            return (locator == null ? null : locator.getSystemId());
        }
    }


}   // end of class ReceivingContentHandler


//
// The contents of this file are subject to the Mozilla Public License Version 1.0 (the "License");
// you may not use this file except in compliance with the License. You may obtain a copy of the
// License at http://www.mozilla.org/MPL/
//
// Software distributed under the License is distributed on an "AS IS" basis,
// WITHOUT WARRANTY OF ANY KIND, either express or implied.
// See the License for the specific language governing rights and limitations under the License.
//
// The Original Code is: all this file.
//
// The Initial Developer of the Original Code is Michael H. Kay.
//
// Portions created by (your name) are Copyright (C) (your legal entity). All Rights Reserved.
//
// Contributor(s): none.
//
Source Code of net.sf.saxon.event.ReceivingContentHandler$LocalLocator

Related Classes of net.sf.saxon.event.ReceivingContentHandler$LocalLocator