package org.pdf4j.saxon.functions;
import org.pdf4j.saxon.*;
import org.pdf4j.saxon.event.Builder;
import org.pdf4j.saxon.event.PipelineConfiguration;
import org.pdf4j.saxon.event.Receiver;
import org.pdf4j.saxon.event.Sender;
import org.pdf4j.saxon.expr.*;
import org.pdf4j.saxon.om.DocumentInfo;
import org.pdf4j.saxon.om.Item;
import org.pdf4j.saxon.om.NodeInfo;
import org.pdf4j.saxon.om.SequenceIterator;
import org.pdf4j.saxon.sort.DocumentOrderIterator;
import org.pdf4j.saxon.sort.GlobalOrderComparer;
import org.pdf4j.saxon.trans.Err;
import org.pdf4j.saxon.trans.XPathException;
import org.pdf4j.saxon.value.AtomicValue;
import org.pdf4j.saxon.value.Cardinality;
import org.pdf4j.saxon.value.SingletonNode;
import org.pdf4j.saxon.value.Whitespace;
import javax.xml.transform.Source;
import javax.xml.transform.SourceLocator;
import javax.xml.transform.TransformerException;
import javax.xml.transform.URIResolver;
import javax.xml.transform.dom.DOMSource;
import java.net.URI;
import java.net.URISyntaxException;
/**
* Implements the XSLT document() function
*/
public class Document extends SystemFunction implements XSLTFunction {
private String expressionBaseURI = null;
private boolean readOnce = false; // TODO: implement this. The idea is that if streaming copy cannot be
// used, then document projection will be used instead, for this specific call on the document() function.
/**
* Indicate that the document(s) will be read once only (or that they should be treated as if they
* are read once only. This means (a) the document will not be held in memory after all references
* to it go out of scope, and (b) if the query or transformation tries to read it again, it will get a new
* copy, with different node identities, and potentially with different content. It also means that the
* document is eligible for document projection.
* @param once true if this document is to be treated as being read once only
*/
public void setReadOnce(boolean once) {
readOnce = once;
}
/**
* Ask whether this document has been marked as being read once only.
* @return true if the document has been marked as being read once only
*/
public boolean isReadOnce() {
return readOnce;
}
public void checkArguments(ExpressionVisitor visitor) throws XPathException {
if (expressionBaseURI == null) {
// only do this once. The second call supplies an env pointing to the containing
// xsl:template, which has a different base URI (and in a simplified stylesheet, has no base URI)
super.checkArguments(visitor);
expressionBaseURI = visitor.getStaticContext().getBaseURI();
Optimizer opt = visitor.getConfiguration().getOptimizer();
argument[0] = ExpressionTool.unsorted(opt, argument[0], false);
}
}
/**
* Determine the static cardinality
*/
public int computeCardinality() {
Expression expression = argument[0];
if (Cardinality.allowsMany(expression.getCardinality())) {
return StaticProperty.ALLOWS_ZERO_OR_MORE;
} else {
return StaticProperty.ALLOWS_ZERO_OR_ONE;
}
// may have to revise this if the argument can be a list-valued element or attribute
}
/**
* Get the base URI from the static context
* @return the base URI
*/
public String getStaticBaseURI() {
return expressionBaseURI;
}
/**
* Get the static properties of this expression (other than its type). The result is
* bit-signficant. These properties are used for optimizations. In general, if
* property bit is set, it is true, but if it is unset, the value is unknown.
*/
public int computeSpecialProperties() {
return StaticProperty.ORDERED_NODESET |
StaticProperty.PEER_NODESET |
StaticProperty.NON_CREATIVE;
// Declaring it as a peer node-set expression avoids sorting of expressions such as
// document(XXX)/a/b/c
// The document() function might appear to be creative: but it isn't, because multiple calls
// with the same arguments will produce identical results.
}
/**
* preEvaluate: the document() function can be evaluated at compile time if (a) the argument
* is a string literal, and (b) the option {@link FeatureKeys#PRE_EVALUATE_DOC_FUNCTION} is set.
* @param visitor an expression visitor
*/
public Expression preEvaluate(ExpressionVisitor visitor) {
Configuration config = visitor.getConfiguration();
if (getNumberOfArguments() == 1 &&
((Boolean)config.getConfigurationProperty(FeatureKeys.PRE_EVALUATE_DOC_FUNCTION)).booleanValue()) {
try {
AtomicValue hrefVal = (AtomicValue)argument[0].evaluateItem(null);
if (hrefVal==null) {
return null;
}
String href = hrefVal.getStringValue();
if (href.indexOf('#') >= 0) {
return this;
}
NodeInfo item = Document.preLoadDoc(href, expressionBaseURI, config, this);
if (item!=null) {
return new Literal(new SingletonNode(item));
}
} catch (Exception err) {
// ignore the exception and try again at run-time
return this;
}
}
return this;
}
/**
* Add a representation of this expression to a PathMap. The PathMap captures a map of the nodes visited
* by an expression in a source tree.
*
* @param pathMap the PathMap to which the expression should be added
* @param pathMapNodeSet the set of nodes in the path map that are affected
* @return the pathMapNode representing the focus established by this expression, in the case where this
* expression is the first operand of a path expression or filter expression
*/
public PathMap.PathMapNodeSet addToPathMap(PathMap pathMap, PathMap.PathMapNodeSet pathMapNodeSet) {
return addDocToPathMap(pathMap, pathMapNodeSet);
}
/**
* Copy an expression. This makes a deep copy.
* @return the copy of the original expression
*/
public Expression copy() {
Document d = (Document)super.copy();
d.expressionBaseURI = expressionBaseURI;
d.readOnce = readOnce;
return d;
}
/**
* iterate() handles evaluation of the function:
* it returns a sequence of Document nodes
*/
public SequenceIterator iterate(XPathContext context) throws XPathException {
int numArgs = argument.length;
SequenceIterator hrefSequence = argument[0].iterate(context);
String baseURI = null;
if (numArgs==2) {
// we can trust the type checking: it must be a node
NodeInfo base = (NodeInfo)argument[1].evaluateItem(context);
baseURI = base.getBaseURI();
}
DocumentMappingFunction map = new DocumentMappingFunction(context);
map.baseURI = baseURI;
map.stylesheetURI = expressionBaseURI;
map.locator = this;
ItemMappingIterator iter = new ItemMappingIterator(hrefSequence, map);
Expression expression = argument[0];
if (Cardinality.allowsMany(expression.getCardinality())) {
return new DocumentOrderIterator(iter, GlobalOrderComparer.getInstance());
// this is to make sure we eliminate duplicates: two href's might be the same
} else {
return iter;
}
}
private static class DocumentMappingFunction implements ItemMappingFunction {
public String baseURI;
public String stylesheetURI;
public SourceLocator locator;
public XPathContext context;
public DocumentMappingFunction(XPathContext context) {
this.context = context;
}
public Item map(Item item) throws XPathException {
String b = baseURI;
if (b==null) {
if (item instanceof NodeInfo) {
b = ((NodeInfo)item).getBaseURI();
} else {
b = stylesheetURI;
}
}
return makeDoc(item.getStringValue(), b, context, locator);
}
}
/**
* Supporting routine to load one external document given a URI (href) and a baseURI. This is used
* in the normal case when a document is loaded at run-time (that is, when a Controller is available)
* @param href the relative URI
* @param baseURI the base URI
* @param c the dynamic XPath context
* @param locator used to identify the location of the instruction in event of error
* @return the root of the constructed document, or the selected element within the document
* if a fragment identifier was supplied
*/
public static NodeInfo makeDoc(String href, String baseURI, XPathContext c, SourceLocator locator)
throws XPathException {
Configuration config = c.getConfiguration();
// If the href contains a fragment identifier, strip it out now
//System.err.println("Entering makeDoc " + href);
int hash = href.indexOf('#');
String fragmentId = null;
if (hash>=0) {
if (hash==href.length()-1) {
// # sign at end - just ignore it
href = href.substring(0, hash);
} else {
fragmentId = href.substring(hash+1);
href = href.substring(0, hash);
if (!config.getNameChecker().isValidNCName(fragmentId)) {
XPathException de = new XPathException("The fragment identifier " + Err.wrap(fragmentId) + " is not a valid NCName");
de.setErrorCode("XTRE1160");
de.setXPathContext(c);
throw de;
}
}
}
Controller controller = c.getController();
// Resolve relative URI
String documentKey;
URIResolver resolver = controller.getURIResolver();
if (resolver == null) {
resolver = controller.getStandardURIResolver();
}
if (resolver instanceof RelativeURIResolver) {
// If this is the case, the URIResolver is responsible for absolutization as well as dereferencing
try {
documentKey = ((RelativeURIResolver)resolver).makeAbsolute(href, baseURI);
} catch (TransformerException e) {
documentKey = '/' + href;
baseURI = "";
}
} else {
// Saxon takes charge of absolutization, leaving the user URIResolver to handle dereferencing only
if (baseURI==null) { // no base URI available
try {
// the href might be an absolute URL
documentKey = (new URI(href)).toString();
} catch (URISyntaxException err) {
// it isn't; but the URI resolver might know how to cope
documentKey = '/' + href;
baseURI = "";
}
} else if (href.length() == 0) {
// common case in XSLT, which java.net.URI#resolve() does not handle correctly
documentKey = baseURI;
} else {
try {
URI uri = new URI(baseURI).resolve(href);
documentKey = uri.toString();
} catch (URISyntaxException err) {
documentKey = baseURI + "/../" + href;
} catch (IllegalArgumentException err) {
documentKey = baseURI + "/../" + href;
}
}
}
// see if the document is already loaded
DocumentInfo doc = config.getGlobalDocumentPool().find(documentKey);
if (doc != null) {
return doc;
}
doc = controller.getDocumentPool().find(documentKey);
if (doc != null) {
return getFragment(doc, fragmentId, c);
}
// check that the document was not written by this transformation
if (!controller.checkUniqueOutputDestination(documentKey)) {
XPathException err = new XPathException(
"Cannot read a document that was written during the same transformation: " + documentKey);
err.setXPathContext(c);
err.setErrorCode("XTRE1500");
throw err;
}
try {
// Get a Source from the URIResolver
Source source;
if (resolver instanceof RelativeURIResolver) {
try {
source = ((RelativeURIResolver)resolver).dereference(documentKey);
} catch (Exception ex) {
XPathException de = new XPathException("Exception thrown by URIResolver", ex);
if (controller.getConfiguration().isTraceExternalFunctions()) {
ex.printStackTrace();
}
de.setLocator(locator);
throw de;
}
} else {
try {
source = resolver.resolve(href, baseURI);
} catch (Exception ex) {
XPathException de = new XPathException("Exception thrown by URIResolver", ex);
if (controller.getConfiguration().isTraceExternalFunctions()) {
ex.printStackTrace();
}
de.setLocator(locator);
throw de;
}
}
// if a user URI resolver returns null, try the standard one
// (Note, the standard URI resolver never returns null)
if (source==null && !(resolver instanceof NonDelegatingURIResolver)) {
resolver = controller.getStandardURIResolver();
if (resolver instanceof RelativeURIResolver) {
source = ((RelativeURIResolver)resolver).dereference(documentKey);
} else {
source = resolver.resolve(href, baseURI);
}
}
//System.err.println("URI resolver returned " + source.getClass() + " " + source.getSystemId());
source = config.getSourceResolver().resolveSource(source, config);
//System.err.println("Resolved source " + source.getClass() + " " + source.getSystemId());
DocumentInfo newdoc;
if (source instanceof NodeInfo || source instanceof DOMSource) {
NodeInfo startNode = controller.prepareInputTree(source);
newdoc = startNode.getDocumentRoot();
} else {
Builder b = controller.makeBuilder();
Receiver s = b;
source = AugmentedSource.makeAugmentedSource(source);
((AugmentedSource)source).setStripSpace(Whitespace.XSLT);
if (controller.getExecutable().stripsInputTypeAnnotations()) {
s = controller.getConfiguration().getAnnotationStripper(s);
}
PathMap map = controller.getPathMapForDocumentProjection();
if (map != null) {
PathMap.PathMapRoot pathRoot = map.getRootForDocument(documentKey);
if (pathRoot != null && !pathRoot.isReturnable() && !pathRoot.hasUnknownDependencies()) {
((AugmentedSource)source).addFilter(config.makeDocumentProjector(pathRoot));
}
}
new Sender(b.getPipelineConfiguration()).send(source, s);
newdoc = (DocumentInfo)b.getCurrentRoot();
b.reset();
if (source instanceof AugmentedSource && ((AugmentedSource)source).isPleaseCloseAfterUse()) {
((AugmentedSource)source).close();
}
}
controller.registerDocument(newdoc, documentKey);
controller.addUnavailableOutputDestination(documentKey);
return getFragment(newdoc, fragmentId, c);
} catch (TransformerException err) {
XPathException xerr = XPathException.makeXPathException(err);
xerr.setLocator(locator);
xerr.setErrorCode("FODC0002");
try {
controller.recoverableError(xerr);
} catch (XPathException err2) {
throw xerr;
}
return null;
}
}
/**
* Supporting routine to load one external document given a URI (href) and a baseURI. This is used
* when the document is pre-loaded at compile time.
* @param href the relative URI. This must not contain a fragment identifier
* @param baseURI the base URI
* @param config the Saxon configuration
* @param locator used to identify the location of the instruction in event of error. May be null.
* @return the root of the constructed document, or the selected element within the document
* if a fragment identifier was supplied
*/
public static NodeInfo preLoadDoc(String href, String baseURI, Configuration config, SourceLocator locator)
throws XPathException {
int hash = href.indexOf('#');
if (hash>=0) {
throw new XPathException("Fragment identifier not supported for preloaded documents");
}
// Resolve relative URI
String documentKey;
URIResolver resolver = config.getURIResolver();
if (resolver instanceof RelativeURIResolver) {
try {
documentKey = ((RelativeURIResolver)resolver).makeAbsolute(href, baseURI);
} catch (TransformerException e) {
documentKey = '/' + href;
baseURI = "";
}
} else {
if (baseURI==null) { // no base URI available
try {
// the href might be an absolute URL
documentKey = (new URI(href)).toString();
} catch (URISyntaxException err) {
// it isn't; but the URI resolver might know how to cope
documentKey = '/' + href;
baseURI = "";
}
} else if (href.length() == 0) {
// common case in XSLT, which java.net.URI#resolve() does not handle correctly
documentKey = baseURI;
} else {
try {
URI uri = new URI(baseURI).resolve(href);
documentKey = uri.toString();
} catch (URISyntaxException err) {
documentKey = baseURI + "/../" + href;
} catch (IllegalArgumentException err) {
documentKey = baseURI + "/../" + href;
}
}
}
// see if the document is already loaded
DocumentInfo doc = config.getGlobalDocumentPool().find(documentKey);
if (doc != null) {
return doc;
}
try {
// Get a Source from the URIResolver
URIResolver r = resolver;
Source source = null;
if (r != null) {
try {
source = r.resolve(href, baseURI);
} catch (Exception ex) {
XPathException de = new XPathException("Exception thrown by URIResolver", ex);
if (config.isTraceExternalFunctions()) {
ex.printStackTrace();
}
de.setLocator(locator);
throw de;
}
}
// if a user URI resolver returns null, try the standard one
// (Note, the standard URI resolver never returns null)
if (source==null && !(r instanceof NonDelegatingURIResolver)) {
r = config.getSystemURIResolver();
source = r.resolve(href, baseURI);
}
//System.err.println("URI resolver returned " + source.getClass() + " " + source.getSystemId());
source = config.getSourceResolver().resolveSource(source, config);
//System.err.println("Resolved source " + source.getClass() + " " + source.getSystemId());
DocumentInfo newdoc = config.buildDocument(source);
config.getGlobalDocumentPool().add(newdoc, documentKey);
return newdoc;
} catch (TransformerException err) {
XPathException xerr = XPathException.makeXPathException(err);
xerr.setLocator(locator);
xerr.setErrorCode("FODC0002");
throw new XPathException(err);
}
}
/**
* Copy the documents identified by this expression to a given Receiver. This method is used only when it is
* known that the documents are being copied, because there is then no problem about node identity.
* @param context the XPath dynamic context
* @param out the destination to which the documents will be sent
*/
public void sendDocuments(XPathContext context, Receiver out) throws XPathException {
SequenceIterator hrefSequence = argument[0].iterate(context);
String explicitBaseURI = null;
if (argument.length==2) {
// we can trust the type checking: it must be a node
NodeInfo base = (NodeInfo)argument[1].evaluateItem(context);
explicitBaseURI = base.getBaseURI();
}
while (true) {
Item href = hrefSequence.next();
if (href == null) {
break;
}
String base;
if (explicitBaseURI == null) {
if (href instanceof NodeInfo) {
base = ((NodeInfo)href).getBaseURI();
} else {
base = expressionBaseURI;
}
} else {
base = explicitBaseURI;
}
sendDoc(href.getStringValue(), base, context, this, out);
}
}
/**
* Supporting routine to push one external document given a URI (href) and a baseURI to a given Receiver.
* This method cannot handle fragment identifiers
* @param href the relative URI
* @param baseURL the base URI
* @param c the XPath dynamic context
* @param locator used to identify the lcoation of the instruction in case of error
* @param out the destination where the document is to be sent
*/
public static void sendDoc(String href, String baseURL, XPathContext c,
SourceLocator locator, Receiver out) throws XPathException {
PipelineConfiguration pipe = out.getPipelineConfiguration();
if (pipe == null) {
pipe = c.getController().makePipelineConfiguration();
out.setPipelineConfiguration(pipe);
}
// Resolve relative URI
String documentKey;
if (baseURL==null) { // no base URI available
try {
// the href might be an absolute URL
documentKey = (new URI(href)).toString();
} catch (URISyntaxException err) {
// it isn't; but the URI resolver might know how to cope
documentKey = '/' + href;
baseURL = "";
}
} else if (href.length() == 0) {
// common case in XSLT, which java.net.URI#resolve() does not handle correctly
documentKey = baseURL;
} else {
try {
URI url = new URI(baseURL).resolve(href);
documentKey = url.toString();
} catch (URISyntaxException err) {
documentKey = baseURL + "/../" + href;
} catch (IllegalArgumentException err) {
documentKey = baseURL + "/../" + href;
}
}
Controller controller = c.getController();
// see if the document is already loaded
DocumentInfo doc = controller.getDocumentPool().find(documentKey);
Source source = null;
if (doc != null) {
source = doc;
} else {
try {
// Get a Source from the URIResolver
URIResolver r = controller.getURIResolver();
if (r != null) {
source = r.resolve(href, baseURL);
}
// if a user URI resolver returns null, try the standard one
// (Note, the standard URI resolver never returns null)
if (source==null) {
r = controller.getStandardURIResolver();
source = r.resolve(href, baseURL);
}
if (source instanceof NodeInfo || source instanceof DOMSource) {
NodeInfo startNode = controller.prepareInputTree(source);
source = startNode.getDocumentRoot();
}
} catch (TransformerException err) {
XPathException xerr = XPathException.makeXPathException(err);
xerr.setLocator(locator);
if (xerr.getErrorCodeLocalPart() == null) {
xerr.setErrorCode("FODC0005");
}
throw xerr;
}
}
//out = controller.makeStripper(out);
source = AugmentedSource.makeAugmentedSource(source);
((AugmentedSource)source).setStripSpace(Whitespace.XSLT);
if (controller.getExecutable().stripsInputTypeAnnotations()) {
out = controller.getConfiguration().getAnnotationStripper(out);
}
try {
new Sender(pipe).send(source, out);
} catch (XPathException e) {
e.maybeSetLocation(locator);
if (e.getErrorCodeLocalPart() == null) {
e.setErrorCode("FODC0005");
}
throw e;
}
}
/**
* Resolve the fragment identifier within a URI Reference.
* Only "bare names" XPointers are recognized, that is, a fragment identifier
* that matches an ID attribute value within the target document.
* @param doc the document node
* @param fragmentId the fragment identifier (an ID value within the document)
* @param context the XPath dynamic context
* @return the element within the supplied document that matches the
* given id value; or null if no such element is found.
*/
private static NodeInfo getFragment(DocumentInfo doc, String fragmentId, XPathContext context)
throws XPathException {
// TODO: we only support one kind of fragment identifier. The rules say
// that the interpretation of the fragment identifier depends on media type,
// but we aren't getting the media type from the URIResolver.
if (fragmentId==null) {
return doc;
}
if (!context.getConfiguration().getNameChecker().isValidNCName(fragmentId)) {
XPathException err = new XPathException("Invalid fragment identifier in URI");
err.setXPathContext(context);
err.setErrorCode("XTRE1160");
try {
context.getController().recoverableError(err);
} catch (XPathException dynamicError) {
throw err;
}
return doc;
}
return doc.selectID(fragmentId);
}
}
//
// The contents of this file are subject to the Mozilla Public License Version 1.0 (the "License");
// you may not use this file except in compliance with the License. You may obtain a copy of the
// License at http://www.mozilla.org/MPL/
//
// Software distributed under the License is distributed on an "AS IS" basis,
// WITHOUT WARRANTY OF ANY KIND, either express or implied.
// See the License for the specific language governing rights and limitations under the License.
//
// The Original Code is: all this file.
//
// The Initial Developer of the Original Code is Michael H. Kay.
//
// Portions created by (your name) are Copyright (C) (your legal entity). All Rights Reserved.
//
// Contributor(s): none.
//