/*
* Copyright 2003-2007 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package groovy.util;
import groovy.util.slurpersupport.GPathResult;
import groovy.util.slurpersupport.Node;
import groovy.util.slurpersupport.NodeChild;
import groovy.xml.FactorySupport;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.io.StringReader;
import java.net.URL;
import java.util.HashMap;
import java.util.Hashtable;
import java.util.Map;
import java.util.Stack;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.Attributes;
import org.xml.sax.DTDHandler;
import org.xml.sax.EntityResolver;
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXNotRecognizedException;
import org.xml.sax.SAXNotSupportedException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
/**
* @author John Wilson
*
*/
public class XmlSlurper extends DefaultHandler {
private final XMLReader reader;
private Node currentNode = null;
private final Stack stack = new Stack();
private final StringBuffer charBuffer = new StringBuffer();
private final Map namespaceTagHints = new Hashtable();
private boolean keepWhitespace = false;
public XmlSlurper() throws ParserConfigurationException, SAXException {
this(false, true);
}
public XmlSlurper(final boolean validating, final boolean namespaceAware) throws ParserConfigurationException, SAXException {
SAXParserFactory factory = FactorySupport.createSaxParserFactory();
factory.setNamespaceAware(namespaceAware);
factory.setValidating(validating);
this.reader = factory.newSAXParser().getXMLReader();
}
public XmlSlurper(final XMLReader reader) {
this.reader = reader;
}
public XmlSlurper(final SAXParser parser) throws SAXException {
this(parser.getXMLReader());
}
/**
* @param keepWhitespace
*
* If true then whitespace before elements is kept.
* The deafult is to discard the whitespace.
*/
public void setKeepWhitespace(boolean keepWhitespace) {
this.keepWhitespace = keepWhitespace;
}
/**
* @return The GPathResult instance created by consuming a stream of SAX events
* Note if one of the parse methods has been called then this returns null
* Note if this is called more than once all calls after the first will return null
*
*/
public GPathResult getDocument() {
try {
return new NodeChild(this.currentNode, null, this.namespaceTagHints);
} finally {
this.currentNode = null;
}
}
/**
* Parse the content of the specified input source into a GPathResult object
*
* @param input
* @return An object which supports GPath expressions
* @throws IOException
* @throws SAXException
*/
public GPathResult parse(final InputSource input) throws IOException, SAXException {
this.reader.setContentHandler(this);
this.reader.parse(input);
return getDocument();
}
/**
* Parses the content of the given file as XML turning it into a GPathResult object
*
* @param file
* @return An object which supports GPath expressions
* @throws IOException
* @throws SAXException
*/
public GPathResult parse(final File file) throws IOException, SAXException {
final InputSource input = new InputSource(new FileInputStream(file));
input.setSystemId("file://" + file.getAbsolutePath());
return parse(input);
}
/**
* Parse the content of the specified input stream into an GPathResult Object.
* Note that using this method will not provide the parser with any URI
* for which to find DTDs etc
*
* @param input
* @return An object which supports GPath expressions
* @throws IOException
* @throws SAXException
*/
public GPathResult parse(final InputStream input) throws IOException, SAXException {
return parse(new InputSource(input));
}
/**
* Parse the content of the specified reader into a GPathResult Object.
* Note that using this method will not provide the parser with any URI
* for which to find DTDs etc
*
* @param in
* @return An object which supports GPath expressions
* @throws IOException
* @throws SAXException
*/
public GPathResult parse(final Reader in) throws IOException, SAXException {
return parse(new InputSource(in));
}
/**
* Parse the content of the specified URI into a GPathResult Object
*
* @param uri
* @return An object which supports GPath expressions
* @throws IOException
* @throws SAXException
*/
public GPathResult parse(final String uri) throws IOException, SAXException {
return parse(new InputSource(uri));
}
/**
* A helper method to parse the given text as XML
*
* @param text
* @return An object which supports GPath expressions
*/
public GPathResult parseText(final String text) throws IOException, SAXException {
return parse(new StringReader(text));
}
// Delegated XMLReader methods
//------------------------------------------------------------------------
/* (non-Javadoc)
* @see org.xml.sax.XMLReader#getDTDHandler()
*/
public DTDHandler getDTDHandler() {
return this.reader.getDTDHandler();
}
/* (non-Javadoc)
* @see org.xml.sax.XMLReader#getEntityResolver()
*/
public EntityResolver getEntityResolver() {
return this.reader.getEntityResolver();
}
/* (non-Javadoc)
* @see org.xml.sax.XMLReader#getErrorHandler()
*/
public ErrorHandler getErrorHandler() {
return this.reader.getErrorHandler();
}
/* (non-Javadoc)
* @see org.xml.sax.XMLReader#getFeature(java.lang.String)
*/
public boolean getFeature(final String uri) throws SAXNotRecognizedException, SAXNotSupportedException {
return this.reader.getFeature(uri);
}
/* (non-Javadoc)
* @see org.xml.sax.XMLReader#getProperty(java.lang.String)
*/
public Object getProperty(final String uri) throws SAXNotRecognizedException, SAXNotSupportedException {
return this.reader.getProperty(uri);
}
/* (non-Javadoc)
* @see org.xml.sax.XMLReader#setDTDHandler(org.xml.sax.DTDHandler)
*/
public void setDTDHandler(final DTDHandler dtdHandler) {
this.reader.setDTDHandler(dtdHandler);
}
/* (non-Javadoc)
* @see org.xml.sax.XMLReader#setEntityResolver(org.xml.sax.EntityResolver)
*/
public void setEntityResolver(final EntityResolver entityResolver) {
this.reader.setEntityResolver(entityResolver);
}
/**
* Resolves entities against using the suppied URL as the base for relative URLs
*
* @param base
* The URL used to resolve relative URLs
*/
public void setEntityBaseUrl(final URL base) {
this.reader.setEntityResolver(new EntityResolver() {
public InputSource resolveEntity(final String publicId, final String systemId) throws IOException {
return new InputSource(new URL(base, systemId).openStream());
}
});
}
/* (non-Javadoc)
* @see org.xml.sax.XMLReader#setErrorHandler(org.xml.sax.ErrorHandler)
*/
public void setErrorHandler(final ErrorHandler errorHandler) {
this.reader.setErrorHandler(errorHandler);
}
/* (non-Javadoc)
* @see org.xml.sax.XMLReader#setFeature(java.lang.String, boolean)
*/
public void setFeature(final String uri, final boolean value) throws SAXNotRecognizedException, SAXNotSupportedException {
this.reader.setFeature(uri, value);
}
/* (non-Javadoc)
* @see org.xml.sax.XMLReader#setProperty(java.lang.String, java.lang.Object)
*/
public void setProperty(final String uri, final Object value) throws SAXNotRecognizedException, SAXNotSupportedException {
this.reader.setProperty(uri, value);
}
// ContentHandler interface
//-------------------------------------------------------------------------
/* (non-Javadoc)
* @see org.xml.sax.ContentHandler#startDocument()
*/
public void startDocument() throws SAXException {
this.currentNode = null;
this.charBuffer.setLength(0);
}
/* (non-Javadoc)
* @see org.xml.sax.helpers.DefaultHandler#startPrefixMapping(java.lang.String, java.lang.String)
*/
public void startPrefixMapping(final String tag, final String uri) throws SAXException {
this.namespaceTagHints.put(tag, uri);
}
/* (non-Javadoc)
* @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes)
*/
public void startElement(final String namespaceURI, final String localName, final String qName, final Attributes atts) throws SAXException {
addCdata();
final Map attributes = new HashMap();
final Map attributeNamespaces = new HashMap();
for (int i = atts.getLength() - 1; i != -1; i--) {
if (atts.getURI(i).length() == 0) {
attributes.put(atts.getQName(i), atts.getValue(i));
} else {
attributes.put(atts.getLocalName(i), atts.getValue(i));
attributeNamespaces.put(atts.getLocalName(i), atts.getURI(i));
}
}
final Node newElement;
if (namespaceURI.length() == 0){
newElement = new Node(this.currentNode, qName, attributes, attributeNamespaces, namespaceURI);
} else {
newElement = new Node(this.currentNode, localName, attributes, attributeNamespaces, namespaceURI);
}
if (this.currentNode != null) {
this.currentNode.addChild(newElement);
}
this.stack.push(this.currentNode);
this.currentNode = newElement;
}
/* (non-Javadoc)
* @see org.xml.sax.ContentHandler#characters(char[], int, int)
*/
public void characters(final char[] ch, final int start, final int length) throws SAXException {
this.charBuffer.append(ch, start, length);
}
/* (non-Javadoc)
* @see org.xml.sax.ContentHandler#endElement(java.lang.String, java.lang.String, java.lang.String)
*/
public void endElement(final String namespaceURI, final String localName, final String qName) throws SAXException {
addCdata();
final Object oldCurrentNode = this.stack.pop();
if (oldCurrentNode != null) {
this.currentNode = (Node)oldCurrentNode;
}
}
/* (non-Javadoc)
* @see org.xml.sax.ContentHandler#endDocument()
*/
public void endDocument() throws SAXException {
}
// Implementation methods
//-------------------------------------------------------------------------
/**
*
*/
private void addCdata() {
if (this.charBuffer.length() != 0) {
//
// This element is preceeded by CDATA if keepWhitespace is false (the default setting) and
// it's not whitespace add it to the body
// Note that, according to the XML spec, we should preserve the CDATA if it's all whitespace
// but for the sort of work I'm doing ignoring the whitespace is preferable
//
final String cdata = this.charBuffer.toString();
this.charBuffer.setLength(0);
if (this.keepWhitespace || cdata.trim().length() != 0) {
this.currentNode.addChild(cdata);
}
}
}
}