/*
* Copyright 2003-2013 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package groovy.xml;
import groovy.lang.Closure;
import groovy.lang.GroovyRuntimeException;
import groovy.lang.Writable;
import groovy.util.Node;
import groovy.util.XmlNodePrinter;
import groovy.util.slurpersupport.GPathResult;
import org.codehaus.groovy.runtime.InvokerHelper;
import org.codehaus.groovy.runtime.StringGroovyMethods;
import org.w3c.dom.Element;
import org.xml.sax.SAXException;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.transform.*;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
import javax.xml.validation.Schema;
import javax.xml.validation.SchemaFactory;
import java.io.*;
import java.net.URL;
/**
* Used for pretty printing XML content and other XML related utilities.
*
* @author Paul King
*/
public class XmlUtil {
/**
* Return a pretty String version of the Element.
*
* @param element the Element to serialize
* @return the pretty String representation of the Element
*/
public static String serialize(Element element) {
StringWriter sw = new StringWriter();
serialize(new DOMSource(element), sw);
return sw.toString();
}
/**
* Write a pretty version of the Element to the OutputStream.
*
* @param element the Element to serialize
* @param os the OutputStream to write to
*/
public static void serialize(Element element, OutputStream os) {
Source source = new DOMSource(element);
serialize(source, os);
}
/**
* Write a pretty version of the Element to the Writer.
*
* @param element the Element to serialize
* @param w the Writer to write to
*/
public static void serialize(Element element, Writer w) {
Source source = new DOMSource(element);
serialize(source, w);
}
/**
* Return a pretty String version of the Node.
*
* @param node the Node to serialize
* @return the pretty String representation of the Node
*/
public static String serialize(Node node) {
return serialize(asString(node));
}
/**
* Write a pretty version of the Node to the OutputStream.
*
* @param node the Node to serialize
* @param os the OutputStream to write to
*/
public static void serialize(Node node, OutputStream os) {
serialize(asString(node), os);
}
/**
* Write a pretty version of the Node to the Writer.
*
* @param node the Node to serialize
* @param w the Writer to write to
*/
public static void serialize(Node node, Writer w) {
serialize(asString(node), w);
}
/**
* Return a pretty version of the GPathResult.
*
* @param node a GPathResult to serialize to a String
* @return the pretty String representation of the GPathResult
*/
public static String serialize(GPathResult node) {
return serialize(asString(node));
}
/**
* Write a pretty version of the GPathResult to the OutputStream.
*
* @param node a GPathResult to serialize
* @param os the OutputStream to write to
*/
public static void serialize(GPathResult node, OutputStream os) {
serialize(asString(node), os);
}
/**
* Write a pretty version of the GPathResult to the Writer.
*
* @param node a GPathResult to serialize
* @param w the Writer to write to
*/
public static void serialize(GPathResult node, Writer w) {
serialize(asString(node), w);
}
/**
* Return a pretty String version of the XML content produced by the Writable.
*
* @param writable the Writable to serialize
* @return the pretty String representation of the content from the Writable
*/
public static String serialize(Writable writable) {
return serialize(asString(writable));
}
/**
* Write a pretty version of the XML content produced by the Writable to the OutputStream.
*
* @param writable the Writable to serialize
* @param os the OutputStream to write to
*/
public static void serialize(Writable writable, OutputStream os) {
serialize(asString(writable), os);
}
/**
* Write a pretty version of the XML content produced by the Writable to the Writer.
*
* @param writable the Writable to serialize
* @param w the Writer to write to
*/
public static void serialize(Writable writable, Writer w) {
serialize(asString(writable), w);
}
/**
* Return a pretty version of the XML content contained in the given String.
*
* @param xmlString the String to serialize
* @return the pretty String representation of the original content
*/
public static String serialize(String xmlString) {
StringWriter sw = new StringWriter();
serialize(asStreamSource(xmlString), sw);
return sw.toString();
}
/**
* Write a pretty version of the given XML string to the OutputStream.
*
* @param xmlString the String to serialize
* @param os the OutputStream to write to
*/
public static void serialize(String xmlString, OutputStream os) {
serialize(asStreamSource(xmlString), os);
}
/**
* Write a pretty version of the given XML string to the Writer.
*
* @param xmlString the String to serialize
* @param w the Writer to write to
*/
public static void serialize(String xmlString, Writer w) {
serialize(asStreamSource(xmlString), w);
}
/**
* Factory method to create a SAXParser configured to validate according to a particular schema language and
* optionally providing the schema sources to validate with.
* The created SAXParser will be namespace-aware and not validate against DTDs.
*
* @param schemaLanguage the schema language used, e.g. XML Schema or RelaxNG (as per the String representation in javax.xml.XMLConstants)
* @param schemas the schemas to validate against
* @return the created SAXParser
* @throws SAXException
* @throws ParserConfigurationException
* @see #newSAXParser(String, boolean, boolean, Source...)
* @since 1.8.7
*/
public static SAXParser newSAXParser(String schemaLanguage, Source... schemas) throws SAXException, ParserConfigurationException {
return newSAXParser(schemaLanguage, true, false, schemas);
}
/**
* Factory method to create a SAXParser configured to validate according to a particular schema language and
* optionally providing the schema sources to validate with.
*
* @param schemaLanguage the schema language used, e.g. XML Schema or RelaxNG (as per the String representation in javax.xml.XMLConstants)
* @param namespaceAware will the parser be namespace aware
* @param validating will the parser also validate against DTDs
* @param schemas the schemas to validate against
* @return the created SAXParser
* @throws SAXException
* @throws ParserConfigurationException
* @since 1.8.7
*/
public static SAXParser newSAXParser(String schemaLanguage, boolean namespaceAware, boolean validating, Source... schemas) throws SAXException, ParserConfigurationException {
SAXParserFactory factory = SAXParserFactory.newInstance();
factory.setValidating(validating);
factory.setNamespaceAware(namespaceAware);
if (schemas.length != 0) {
SchemaFactory schemaFactory = SchemaFactory.newInstance(schemaLanguage);
factory.setSchema(schemaFactory.newSchema(schemas));
}
SAXParser saxParser = factory.newSAXParser();
if (schemas.length == 0) {
saxParser.setProperty("http://java.sun.com/xml/jaxp/properties/schemaLanguage", schemaLanguage);
}
return saxParser;
}
/**
* Factory method to create a SAXParser configured to validate according to a particular schema language and
* a File containing the schema to validate against.
* The created SAXParser will be namespace-aware and not validate against DTDs.
*
* @param schemaLanguage the schema language used, e.g. XML Schema or RelaxNG (as per the String representation in javax.xml.XMLConstants)
* @param schema a file containing the schema to validate against
* @return the created SAXParser
* @throws SAXException
* @throws ParserConfigurationException
* @see #newSAXParser(String, boolean, boolean, File)
* @since 1.8.7
*/
public static SAXParser newSAXParser(String schemaLanguage, File schema) throws SAXException, ParserConfigurationException {
return newSAXParser(schemaLanguage, true, false, schema);
}
/**
* Factory method to create a SAXParser configured to validate according to a particular schema language and
* a File containing the schema to validate against.
*
* @param schemaLanguage the schema language used, e.g. XML Schema or RelaxNG (as per the String representation in javax.xml.XMLConstants)
* @param namespaceAware will the parser be namespace aware
* @param validating will the parser also validate against DTDs
* @param schema a file containing the schema to validate against
* @return the created SAXParser
* @throws SAXException
* @throws ParserConfigurationException
* @since 1.8.7
*/
public static SAXParser newSAXParser(String schemaLanguage, boolean namespaceAware, boolean validating, File schema) throws SAXException, ParserConfigurationException {
SchemaFactory schemaFactory = SchemaFactory.newInstance(schemaLanguage);
return newSAXParser(namespaceAware, validating, schemaFactory.newSchema(schema));
}
/**
* Factory method to create a SAXParser configured to validate according to a particular schema language and
* an URL pointing to the schema to validate against.
* The created SAXParser will be namespace-aware and not validate against DTDs.
*
* @param schemaLanguage the schema language used, e.g. XML Schema or RelaxNG (as per the String representation in javax.xml.XMLConstants)
* @param schema a URL pointing to the schema to validate against
* @return the created SAXParser
* @throws SAXException
* @throws ParserConfigurationException
* @see #newSAXParser(String, boolean, boolean, URL)
* @since 1.8.7
*/
public static SAXParser newSAXParser(String schemaLanguage, URL schema) throws SAXException, ParserConfigurationException {
return newSAXParser(schemaLanguage, true, false, schema);
}
/**
* Factory method to create a SAXParser configured to validate according to a particular schema language and
* an URL pointing to the schema to validate against.
*
* @param schemaLanguage the schema language used, e.g. XML Schema or RelaxNG (as per the String representation in javax.xml.XMLConstants)
* @param namespaceAware will the parser be namespace aware
* @param validating will the parser also validate against DTDs
* @param schema a URL pointing to the schema to validate against
* @return the created SAXParser
* @throws SAXException
* @throws ParserConfigurationException
* @since 1.8.7
*/
public static SAXParser newSAXParser(String schemaLanguage, boolean namespaceAware, boolean validating, URL schema) throws SAXException, ParserConfigurationException {
SchemaFactory schemaFactory = SchemaFactory.newInstance(schemaLanguage);
return newSAXParser(namespaceAware, validating, schemaFactory.newSchema(schema));
}
/**
* Escape the following characters {@code " ' & < >} with their XML entities, e.g.
* {@code "bread" & "butter"} becomes {@code "bread" & "butter"}.
* Notes:<ul>
* <li>Supports only the five basic XML entities (gt, lt, quot, amp, apos)</li>
* <li>Does not escape control characters</li>
* <li>Does not support DTDs or external entities</li>
* <li>Does not treat surrogate pairs specially</li>
* <li>Does not perform Unicode validation on its input</li>
* </ul>
*
* @param orig the original String
* @return A new string in which all characters that require escaping
* have been replaced with the corresponding XML entities.
* @see #escapeControlCharacters(String)
*/
public static String escapeXml(String orig) {
return StringGroovyMethods.collectReplacements(orig, new Closure<String>(null) {
public String doCall(Character arg) {
switch (arg) {
case '&':
return "&";
case '<':
return "<";
case '>':
return ">";
case '"':
return """;
case '\'':
return "'";
}
return null;
}
});
}
/**
* Escape control characters (below 0x20) with their XML entities, e.g.
* carriage return ({@code Ctrl-M or \r}) becomes {@code }
* Notes:<ul>
* <li>Does not escape non-ascii characters above 0x7e</li>
* <li>Does not treat surrogate pairs specially</li>
* <li>Does not perform Unicode validation on its input</li>
* </ul>
*
* @param orig the original String
* @return A new string in which all characters that require escaping
* have been replaced with the corresponding XML entities.
* @see #escapeXml(String)
*/
public static String escapeControlCharacters(String orig) {
return StringGroovyMethods.collectReplacements(orig, new Closure<String>(null) {
public String doCall(Character arg) {
if (arg < 31) {
return "&#" + (int) arg + ";";
}
return null;
}
});
}
private static SAXParser newSAXParser(boolean namespaceAware, boolean validating, Schema schema1) throws ParserConfigurationException, SAXException {
SAXParserFactory factory = SAXParserFactory.newInstance();
factory.setValidating(validating);
factory.setNamespaceAware(namespaceAware);
factory.setSchema(schema1);
return factory.newSAXParser();
}
private static String asString(Node node) {
StringWriter sw = new StringWriter();
PrintWriter pw = new PrintWriter(sw);
XmlNodePrinter nodePrinter = new XmlNodePrinter(pw);
nodePrinter.setPreserveWhitespace(true);
nodePrinter.print(node);
return sw.toString();
}
private static String asString(GPathResult node) {
// little bit of hackery to avoid Groovy dependency in this file
try {
Object builder = ((Class) Class.forName("groovy.xml.StreamingMarkupBuilder")).newInstance();
InvokerHelper.setProperty(builder, "encoding", "UTF-8");
Writable w = (Writable) InvokerHelper.invokeMethod(builder, "bindNode", node);
return "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" + w.toString();
} catch (Exception e) {
return "Couldn't convert node to string because: " + e.getMessage();
}
}
// TODO: replace with stream-based version
private static String asString(Writable writable) {
if (writable instanceof GPathResult) {
return asString((GPathResult) writable); //GROOVY-4285
}
Writer sw = new StringWriter();
try {
writable.writeTo(sw);
} catch (IOException e) {
// ignore
}
return sw.toString();
}
private static StreamSource asStreamSource(String xmlString) {
return new StreamSource(new StringReader(xmlString));
}
private static void serialize(Source source, OutputStream os) {
try {
serialize(source, new StreamResult(new OutputStreamWriter(os, "UTF-8")));
}
catch (UnsupportedEncodingException e) {
// ignore
}
}
private static void serialize(Source source, Writer w) {
serialize(source, new StreamResult(w));
}
private static void serialize(Source source, StreamResult target) {
TransformerFactory factory = TransformerFactory.newInstance();
setIndent(factory, 2);
try {
Transformer transformer = factory.newTransformer();
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.setOutputProperty(OutputKeys.METHOD, "xml");
transformer.setOutputProperty(OutputKeys.MEDIA_TYPE, "text/xml");
transformer.transform(source, target);
}
catch (TransformerException e) {
throw new GroovyRuntimeException(e.getMessage());
}
}
private static void setIndent(TransformerFactory factory, int indent) {
// TODO: support older parser attribute values as well
try {
factory.setAttribute("indent-number", indent);
} catch (IllegalArgumentException e) {
// ignore for factories that don't support this
}
}
}