package org.pdf4j.saxon.dom;
import org.pdf4j.saxon.AugmentedSource;
import org.pdf4j.saxon.Configuration;
import org.pdf4j.saxon.event.Builder;
import org.pdf4j.saxon.event.PipelineConfiguration;
import org.pdf4j.saxon.event.Sender;
import org.pdf4j.saxon.om.Validation;
import org.pdf4j.saxon.tinytree.TinyBuilder;
import org.pdf4j.saxon.tinytree.TinyDocumentImpl;
import org.pdf4j.saxon.trans.XPathException;
import org.pdf4j.saxon.value.Whitespace;
import org.w3c.dom.DOMImplementation;
import org.w3c.dom.Document;
import org.xml.sax.*;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.transform.sax.SAXSource;
import javax.xml.transform.Source;
import java.io.File;
import java.io.IOException;
/**
* This class implements the JAXP DocumentBuilder interface, allowing a Saxon TinyTree to be
* constructed using standard JAXP parsing interfaces. The returned DOM document node is a wrapper
* over the Saxon TinyTree structure. Note that although this wrapper
* implements the DOM interfaces, it is read-only, and all attempts to update it will throw
* an exception. No schema or DTD validation is carried out on the document.
*/
public class DocumentBuilderImpl extends DocumentBuilder {
private Configuration config;
private EntityResolver entityResolver;
private ErrorHandler errorHandler;
private boolean xIncludeAware;
private boolean validating;
private int stripSpace = Whitespace.UNSPECIFIED;
/**
* Set the Saxon Configuration to be used by the document builder.
* This non-JAXP method must be called if the resulting document is to be used
* within a Saxon query or transformation. If no Configuration is supplied,
* Saxon creates a Configuration on the first call to the {@link #parse} method,
* and subsequent calls reuse the same Configuration.
*
* <p>As an alternative to calling this method, a Configuration can be supplied by calling
* <code>setAttribute(FeatureKeys.CONFIGURATION, config)</code> on the <code>DocumentBuilderFactory</code>
* object, where <code>config</code> can be obtained by calling
* <code>getAttribute(FeatureKeys.CONFIGURATION)</code> on the <code>TransformerFactory</code>.</p>
*
* @since Saxon 8.8
*/
public void setConfiguration(Configuration config) {
this.config = config;
}
/**
* Get the Saxon Configuration to be used by the document builder. This is
* a non-JAXP method.
* @return the Configuration previously supplied to {@link #setConfiguration},
* or the Configuration created automatically by Saxon on the first call to the
* {@link #parse} method, or null if no Configuration has been supplied and
* the {@link #parse} method has not been called.
*
* @since Saxon 8.8
*/
public Configuration getConfiguration() {
return config;
}
/**
* Indicates whether or not this document builder is configured to
* understand namespaces.
*
* @return true if this document builder is configured to understand
* namespaces. This implementation always returns true.
*/
public boolean isNamespaceAware() {
return true;
}
/**
* Determine whether the document builder should perform DTD validation
* @param state set to true to request DTD validation
*/
public void setValidating(boolean state) {
validating = state;
}
/**
* Indicates whether or not this document builder is configured to
* validate XML documents against a DTD.
*
* @return true if this parser is configured to validate
* XML documents against a DTD; false otherwise.
*/
public boolean isValidating() {
return validating;
}
/**
* Create a new Document Node.
* @throws UnsupportedOperationException (always). The only way to build a document using this DocumentBuilder
* implementation is by using the parse() method.
*/
public Document newDocument() {
throw new UnsupportedOperationException("The only way to build a document using this DocumentBuilder is with the parse() method");
}
/**
* Parse the content of the given input source as an XML document
* and return a new DOM {@link Document} object.
*
* <p>Note: for this document to be usable as part of a Saxon query or transformation,
* the document should be built within the {@link Configuration} in which that query
* or transformation is running. This can be achieved using the non-JAXP
* {@link #setConfiguration} method.
*
* @param in InputSource containing the content to be parsed. Note that if
* an EntityResolver or ErrorHandler has been supplied, then the XMLReader contained
* in this InputSource will be modified to register this EntityResolver or ErrorHandler,
* replacing any that was previously registered.
*
* @exception SAXException If any parse errors occur.
* @return A new DOM Document object.
*/
public Document parse(InputSource in) throws SAXException {
try {
Builder builder = new TinyBuilder();
if (config == null) {
config = new Configuration();
}
PipelineConfiguration pipe = config.makePipelineConfiguration();
builder.setPipelineConfiguration(pipe);
SAXSource source = new SAXSource(in);
if (entityResolver != null) {
XMLReader reader = source.getXMLReader();
if (reader == null) {
reader = config.getSourceParser();
}
reader.setEntityResolver(entityResolver);
}
if (errorHandler != null) {
XMLReader reader = source.getXMLReader();
if (reader == null) {
reader = config.getSourceParser();
}
reader.setErrorHandler(errorHandler);
}
source.setSystemId(in.getSystemId());
Source ss = source;
if (xIncludeAware) {
ss = AugmentedSource.makeAugmentedSource(ss);
((AugmentedSource)ss).setXIncludeAware(true);
}
if (validating) {
ss = AugmentedSource.makeAugmentedSource(ss);
((AugmentedSource)ss).setDTDValidationMode(Validation.STRICT);
}
if (stripSpace != Whitespace.UNSPECIFIED) {
ss = AugmentedSource.makeAugmentedSource(ss);
((AugmentedSource)ss).setStripSpace(stripSpace);
}
new Sender(pipe).send(source, builder);
TinyDocumentImpl doc = (TinyDocumentImpl)builder.getCurrentRoot();
builder.reset();
return (Document)DocumentOverNodeInfo.wrap(doc);
} catch (XPathException err) {
throw new SAXException(err);
}
}
/**
* Parse the content of the given file as an XML document
* and return a new DOM {@link Document} object.
* An <code>IllegalArgumentException</code> is thrown if the
* <code>File</code> is <code>null</code> null.
*
* <p><i>This implementation differs from the parent implementation
* by using a correct algorithm for filename-to-uri conversion.<i></p>
*
* @param f The file containing the XML to parse.
* @exception java.io.IOException If any IO errors occur.
* @exception SAXException If any parse errors occur.
* @return A new DOM Document object.
*/
public Document parse(File f) throws SAXException, IOException {
if (f == null) {
throw new IllegalArgumentException("File cannot be null");
}
String uri = f.toURI().toString();
InputSource in = new InputSource(uri);
return parse(in);
}
/**
* Specify the {@link EntityResolver} to be used to resolve
* entities present in the XML document to be parsed. Setting
* this to <code>null</code> will result in the underlying
* implementation using the EntityResolver registered with the
* XMLReader contained in the InputSource.
*
* @param er The <code>EntityResolver</code> to be used to resolve entities
* present in the XML document to be parsed.
*/
public void setEntityResolver(EntityResolver er) {
entityResolver = er;
}
/**
* Specify the {@link ErrorHandler} to be used by the parser.
* Setting this to <code>null</code> will result in the underlying
* implementation using using the ErrorHandler registered with the
* XMLReader contained in the InputSource.
*
* @param eh The <code>ErrorHandler</code> to be used by the parser.
*/
public void setErrorHandler(ErrorHandler eh) {
errorHandler = eh;
}
/**
* Obtain an instance of a {@link DOMImplementation} object.
*
* @return A new instance of a <code>DOMImplementation</code>.
*/
public DOMImplementation getDOMImplementation() {
return newDocument().getImplementation();
}
/**
* <p>Set state of XInclude processing.</p>
* <p/>
* <p>If XInclude markup is found in the document instance, should it be
* processed as specified in <a href="http://www.w3.org/TR/xinclude/">
* XML Inclusions (XInclude) Version 1.0</a>.</p>
* <p/>
* <p>XInclude processing defaults to <code>false</code>.</p>
*
* @param state Set XInclude processing to <code>true</code> or
* <code>false</code>
*/
public void setXIncludeAware(boolean state) {
xIncludeAware = state;
}
/**
* <p>Get the XInclude processing mode for this parser.</p>
*
* @return the return value of
* the {@link javax.xml.parsers.DocumentBuilderFactory#isXIncludeAware()}
* when this parser was created from factory.
* @throws UnsupportedOperationException For backward compatibility, when implementations for
* earlier versions of JAXP is used, this exception will be
* thrown.
* @see javax.xml.parsers.DocumentBuilderFactory#setXIncludeAware(boolean)
* @since JAXP 1.5, Saxon 8.9
*/
public boolean isXIncludeAware() {
return xIncludeAware;
}
/**
* Set the space-stripping action to be applied to the source document
* @param stripAction one of {@link org.pdf4j.saxon.value.Whitespace#IGNORABLE},
* {@link org.pdf4j.saxon.value.Whitespace#ALL}, or {@link org.pdf4j.saxon.value.Whitespace#NONE}
* @since 8.9
*/
public void setStripSpace(int stripAction) {
stripSpace = stripAction;
}
/**
* Get the space-stripping action to be applied to the source document
* @return one of {@link org.pdf4j.saxon.value.Whitespace#IGNORABLE},
* {@link org.pdf4j.saxon.value.Whitespace#ALL}, or {@link org.pdf4j.saxon.value.Whitespace#NONE}
* @since 8.9
*/
public int getStripSpace() {
return stripSpace;
}
}
//
// The contents of this file are subject to the Mozilla Public License Version 1.0 (the "License");
// you may not use this file except in compliance with the License. You may obtain a copy of the
// License at http://www.mozilla.org/MPL/
//
// Software distributed under the License is distributed on an "AS IS" basis,
// WITHOUT WARRANTY OF ANY KIND, either express or implied.
// See the License for the specific language governing rights and limitations under the License.
//
// The Original Code is: all this file.
//
// The Initial Developer of the Original Code is Michael H. Kay
//
// Portions created by (your name) are Copyright (C) (your legal entity). All Rights Reserved.
//
// Contributor(s): none
//