/*
* Copyright (c) 1998-2011 Caucho Technology -- all rights reserved
*
* This file is part of Resin(R) Open Source
*
* Each copy or derived work must preserve the copyright notice and this
* notice unmodified.
*
* Resin Open Source is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* Resin Open Source is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, or any warranty
* of NON-INFRINGEMENT. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with Resin Open Source; if not, write to the
* Free SoftwareFoundation, Inc.
* 59 Temple Place, Suite 330
* Boston, MA 02111-1307 USA
*
* @author Scott Ferguson
*/
package com.caucho.xml2;
import com.caucho.server.util.CauchoSystem;
import com.caucho.util.L10N;
import com.caucho.vfs.Path;
import com.caucho.vfs.ReadStream;
import com.caucho.vfs.Vfs;
import com.caucho.vfs.VfsStream;
import org.w3c.dom.Document;
import org.xml.sax.*;
import org.xml.sax.ext.LexicalHandler;
import javax.xml.parsers.DocumentBuilderFactory;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.Hashtable;
import java.util.Locale;
import java.util.logging.Logger;
abstract public class AbstractParser implements XMLReader, Parser
{
static final Logger log = Logger.getLogger(AbstractParser.class.getName());
static final L10N L = new L10N(AbstractParser.class);
static Hashtable<String,String> _attrTypes = new Hashtable<String,String>();
static Entities _xmlEntities = new XmlEntities();
boolean _isCoalescing = true;
boolean _optionalTags = true;
boolean _skipWhitespace;
boolean _skipComments;
boolean _strictComments;
boolean _strictAttributes;
boolean _entitiesAsText = false;
boolean _expandEntities = true;
boolean _strictCharacters;
boolean _strictXml;
boolean _singleTopElement;
boolean _isNamespaceAware = true;
boolean _isNamespacePrefixes = true;
boolean _isSAXNamespaces = false;
boolean _isXmlnsPrefix;
boolean _isXmlnsAttribute;
boolean _isValidating = false;
boolean _isStaticEncoding = false;
String _defaultEncoding = "UTF-8";
// sax stuff
ContentHandler _contentHandler;
EntityResolver _entityResolver;
DTDHandler _dtdHandler;
LexicalHandler _lexicalHandler;
ErrorHandler _errorHandler;
Locale _locale;
Entities _entities;
QDocument _owner;
QDocumentType _dtd;
DOMBuilder _builder;
Path _searchPath;
String _publicId;
String _systemId;
String _filename;
int _line = 1;
/**
* Creates a new parser with the XmlPolicy and a new dtd.
*/
AbstractParser()
{
this(null);
}
/**
* Creates a new parser with a given policy and dtd.
*
* @param policy the parsing policy, handling optional tags.
* @param dtd the parser's dtd.
*/
AbstractParser(QDocumentType dtd)
{
if (dtd == null)
dtd = new QDocumentType(null);
_dtd = dtd;
_entities = _xmlEntities;
}
void clear()
{
_isCoalescing = true;
_isNamespaceAware = true;
_isSAXNamespaces = false;
_isNamespacePrefixes = false;
_optionalTags = true;
_skipWhitespace = false;
_skipComments = false;
_strictComments = false;
_strictAttributes = false;
_entitiesAsText = false;
_expandEntities = true;
_strictCharacters = false;
_strictXml = false;
_singleTopElement = false;
_defaultEncoding = "UTF-8";
_isStaticEncoding = false;
}
void init()
{
/*
_isXmlnsPrefix = (_isNamespaceAware ||
_isSAXNamespaces ||
_isNamespacePrefixes);
*/
_isXmlnsPrefix = _isNamespaceAware || _isNamespacePrefixes;
_isXmlnsAttribute = _isNamespacePrefixes || ! _isNamespaceAware;
}
/**
* Sets the owner.
*/
public void setOwner(QDocument doc)
{
_owner = doc;
}
/**
* Sets the configuration for a document builder.
*/
public void setConfig(DocumentBuilderFactory factory)
{
if (_builder == null)
_builder = new DOMBuilder();
_isCoalescing = factory.isCoalescing();
setExpandEntities(factory.isExpandEntityReferences());
setSkipComments(factory.isIgnoringComments());
setSkipWhitespace(factory.isIgnoringElementContentWhitespace());
setNamespaceAware(factory.isNamespaceAware());
setNamespacePrefixes(false);
setValidating(factory.isValidating());
}
public void setEntitiesAsText(boolean entitiesAsText)
{
_entitiesAsText = entitiesAsText;
}
public boolean getEntitiesAsText()
{
return _entitiesAsText;
}
public void setExpandEntities(boolean expandEntities)
{
_expandEntities = expandEntities;
}
/**
* Set to true if comments should be skipped. If false events will be
* generated for the comments.
*/
public void setSkipComments(boolean skipComments)
{
_skipComments = skipComments;
}
/**
* Set to true if ignorable-whitespace should be skipped.
*/
public void setSkipWhitespace(boolean skipWhitespace)
{
_skipWhitespace = skipWhitespace;
}
/**
* Returns true if text and cdata nodes will be combined.
*/
public boolean isCoalescing()
{
return _isCoalescing;
}
/**
* Set true if text and cdata nodes should be combined.
*/
public void setCoalescing(boolean isCoalescing)
{
_isCoalescing = isCoalescing;
}
/**
* Returns true if the XML should be validated
*/
public boolean isValidating()
{
return _isValidating;
}
/**
* Set true if the XML should be validated
*/
public void setValidating(boolean isValidating)
{
_isValidating = isValidating;
}
/**
* Returns true if the parsing is namespace aware.
*/
public boolean isNamespaceAware()
{
return _isNamespaceAware;
}
/**
* Set true if the parsing is namespace aware.
*/
public void setNamespaceAware(boolean isNamespaceAware)
{
_isNamespaceAware = isNamespaceAware;
}
/**
* Returns true if the parsing uses sax namespaces
*/
public boolean isSAXNamespaces()
{
return _isSAXNamespaces;
}
/**
* Set true if the parsing uses sax namespaces
*/
public void setSAXNamespaces(boolean isNamespaces)
{
_isSAXNamespaces = isNamespaces;
}
/**
* Returns true if the parsing uses namespace prefixes
*/
public boolean isNamespacePrefixes()
{
return _isNamespacePrefixes;
}
/**
* Set true if the parsing uses sax namespaces
*/
public void setNamespacePrefixes(boolean isNamespaces)
{
_isNamespacePrefixes = isNamespaces;
}
public boolean getSkipComments()
{
return _skipComments;
}
/**
* Sets the default encoding if none is specified.
*
* @param encoding the default encoding
*/
public void setDefaultEncoding(String encoding)
{
_defaultEncoding = encoding;
}
/**
* Gets the default encoding if none is specified.
*/
public String getDefaultEncoding()
{
return _defaultEncoding;
}
public Object getProperty(String name)
throws SAXNotRecognizedException
{
if (name.equals("http://xml.org/sax/properties/lexical-handler"))
return _lexicalHandler;
else if (name.equals("http://xml.org/sax/properties/dom-node"))
return null;
else if (name.equals("http://xml.org/sax/properties/xml-string"))
return null;
else
throw new SAXNotRecognizedException(name);
}
public void setProperty(String name, Object obj)
throws SAXNotSupportedException
{
if (name.equals("http://xml.org/sax/properties/lexical-handler"))
_lexicalHandler = (LexicalHandler) obj;
else if (name.equals("http://xml.org/sax/handlers/LexicalHandler"))
_lexicalHandler = (LexicalHandler) obj;
else
throw new SAXNotSupportedException(name);
}
public boolean getFeature(String name)
throws SAXNotRecognizedException
{
if (name.equals("http://xml.org/sax/features/namespaces"))
return _isSAXNamespaces;
else if (name.equals("http://xml.org/sax/features/namespace-prefixes"))
return _isNamespacePrefixes;
else if (name.equals("http://xml.org/sax/features/string-interning"))
return true;
else if (name.equals("http://xml.org/sax/features/validation"))
return _isValidating;
else if (name.equals("http://xml.org/sax/features/external-general-entities"))
return true;
else if (name.equals("http://xml.org/sax/features/external-parameter-entities"))
return false;
else if (name.equals("http://caucho.com/xml/features/skip-comments"))
return _skipComments;
else
throw new SAXNotRecognizedException(name);
}
public void setFeature(String name, boolean value)
throws SAXNotSupportedException
{
if (name.equals("http://xml.org/sax/features/namespaces")) {
_isNamespaceAware = value;
}
else if (name.equals("http://xml.org/sax/features/namespace-prefixes")) {
// setting namespace-prefixes, even if false, sets namespace-aware
// see xml/032b
_isNamespacePrefixes = value;
_isNamespaceAware = true;
}
else if (name.equals("http://caucho.com/xml/features/skip-comments")) {
_skipComments = value;
}
else if (name.equals("http://xml.org/sax/features/validation"))
_isValidating = value;
else
throw new SAXNotSupportedException(name);
}
public void setLexicalHandler(LexicalHandler handler)
{
_lexicalHandler = handler;
}
/**
* Sets the callback object to find files.
*
* @param resolver the object to find files.
*/
public void setEntityResolver(EntityResolver resolver)
{
_entityResolver = resolver;
}
/**
* Sets the callback object finding files from system ids.
*
* @return the resolver to find files.
*/
public EntityResolver getEntityResolver()
{
return _entityResolver;
}
public void setDTDHandler(DTDHandler handler)
{
_dtdHandler = handler;
}
public DTDHandler getDTDHandler()
{
return _dtdHandler;
}
public void setContentHandler(ContentHandler handler)
{
_contentHandler = handler;
}
public ContentHandler getContentHandler()
{
return _contentHandler;
}
/**
* Configures the document handler callback.
*
* @param handler the new document handler.
*/
public void setDocumentHandler(DocumentHandler handler)
{
if (handler == null)
_contentHandler = null;
else
_contentHandler = new ContentHandlerAdapter(handler);
}
public void setErrorHandler(ErrorHandler handler)
{
_errorHandler = handler;
}
public ErrorHandler getErrorHandler()
{
return _errorHandler;
}
public void setLocale(Locale locale)
{
_locale = locale;
}
/**
* SAX parsing from a SAX InputSource
*
* @param source source containing the XML
*/
public void parse(InputSource source)
throws IOException, SAXException
{
init();
if (_searchPath == null) {
if (source.getSystemId() != null)
_searchPath = Vfs.lookup(source.getSystemId()).getParent();
}
_systemId = source.getSystemId();
_publicId = source.getPublicId();
ReadStream stream;
String encoding = null;
if (source.getByteStream() != null) {
stream = Vfs.openRead(source.getByteStream());
encoding = source.getEncoding();
}
else if (source.getCharacterStream() != null) {
encoding = "UTF-8";
_isStaticEncoding = true;
stream = Vfs.openRead(source.getCharacterStream());
}
else if (source.getSystemId() != null) {
InputStream is = openStream(source.getSystemId(),
source.getPublicId(),
null,
true);
stream = Vfs.openRead(is);
encoding = source.getEncoding();
}
else
throw new FileNotFoundException(L.l("invalid InputSource"));
if (encoding != null)
stream.setEncoding(encoding);
try {
parseInt(stream);
} finally {
stream.close();
}
}
/**
* SAX parsing from an InputStream
*
* @param is stream containing the XML
*/
public void parse(InputStream is)
throws IOException, SAXException
{
init();
_systemId = "stream";
if (is instanceof ReadStream) {
Path path = ((ReadStream) is).getPath();
_systemId = path.getURL();
_filename = path.getUserPath();
if (_searchPath != null) {
}
else if (path != null)
_searchPath = path.getParent();
parseInt((ReadStream) is);
}
else {
ReadStream rs = VfsStream.openRead(is);
try {
parseInt(rs);
} finally {
if (rs != is)
rs.close();
}
}
}
/**
* SAX parsing from an InputStream
*
* @param is stream containing the XML
*/
public void parse(InputStream is, String systemId)
throws IOException, SAXException
{
init();
parseImpl(is, systemId);
}
/**
* SAX parsing from an InputStream
*
* @param is stream containing the XML
*/
public void parseImpl(InputStream is, String systemId)
throws IOException, SAXException
{
if (is instanceof ReadStream) {
Path path = ((ReadStream) is).getPath();
if (_searchPath != null) {
}
else if (path != null) {
_searchPath = path.getParent();
if (systemId != null)
_searchPath = _searchPath.lookup(systemId).getParent();
}
else if (systemId != null)
_searchPath = Vfs.lookup(systemId).getParent();
if (systemId == null) {
systemId = path.getURL();
_filename = ((ReadStream) is).getUserPath();
}
else
_filename = systemId;
_systemId = systemId;
parseInt((ReadStream) is);
}
else {
if (systemId == null) {
_systemId = "anonymous.xml";
}
else {
_searchPath = Vfs.lookup(systemId).getParent();
_systemId = systemId;
}
ReadStream rs = VfsStream.openRead(is);
try {
parseInt(rs);
} finally {
if (rs != is)
rs.close();
}
}
}
/**
* SAX parsing from a file path
*
* @param systemId path to the file containing the XML
*/
public void parse(String systemId)
throws IOException, SAXException
{
InputStream is = openTopStream(systemId, null);
try {
parse(is);
} finally {
is.close();
}
}
/**
* SAX parsing from a VFS path
*/
public void parse(Path path)
throws IOException, SAXException
{
init();
if (_searchPath == null)
_searchPath = path.getParent();
ReadStream is = path.openRead();
try {
parseInt(is);
} finally {
is.close();
}
}
/**
* SAX parsing from a string.
*
* @param string string containing the XML
*/
public void parseString(String string)
throws IOException, SAXException
{
init();
ReadStream is = Vfs.openString(string);
try {
parseInt(is);
} finally {
is.close();
}
}
/**
* Parses a document from a SAX InputSource
*
* @param source SAX InputSource containing the XML data.
*/
public Document parseDocument(InputSource source)
throws IOException, SAXException
{
init();
QDocument doc = new QDocument();
if (_builder == null)
_builder = new DOMBuilder();
_builder.init(doc);
setOwner(doc);
doc.setSystemId(source.getSystemId());
_builder.setSystemId(source.getSystemId());
_builder.setStrictXML(_strictXml);
_builder.setCoalescing(_isCoalescing);
_builder.setSkipWhitespace(_skipWhitespace);
_contentHandler = _builder;
parse(source);
return doc;
}
/**
* Parses a document from system path.
*
* @param systemId path to the XML data.
*/
public Document parseDocument(String systemId)
throws IOException, SAXException
{
InputStream is = openTopStream(systemId, null);
try {
return parseDocument(is);
} finally {
is.close();
}
}
/**
* Parses a document from a VFS path
*
* @param path the VFS path containing the XML document.
*/
public Document parseDocument(Path path)
throws IOException, SAXException
{
if (_searchPath == null)
_searchPath = path.getParent();
ReadStream is = path.openRead();
try {
return parseDocument(is);
} finally {
is.close();
}
}
/**
* Parses an input stream into a DOM document
*
* @param is the input stream containing the XML
*
* @return the parsed document.
*/
public Document parseDocument(InputStream is)
throws IOException, SAXException
{
return parseDocument(is, null);
}
/**
* Parses an input stream into a DOM document
*
* @param is the input stream containing the XML
* @param systemId the URL of the stream.
*
* @return the parsed document.
*/
public Document parseDocument(InputStream is, String systemId)
throws IOException, SAXException
{
init();
QDocument doc = new QDocument();
parseDocument(doc, is, systemId);
return doc;
}
public void parseDocument(QDocument doc, InputStream is, String systemId)
throws IOException, SAXException
{
_owner = doc;
if (_builder == null)
_builder = new DOMBuilder();
_builder.init(_owner);
_builder.setSystemId(systemId);
_builder.setCoalescing(_isCoalescing);
_builder.setSkipWhitespace(_skipWhitespace);
_contentHandler = _builder;
parseImpl(is, systemId);
}
/**
* Parses a string into a DOM document
*
* @param string the string containing the XML
*/
public Document parseDocumentString(String string)
throws IOException, SAXException
{
ReadStream is = Vfs.openString(string);
try {
_isStaticEncoding = true;
return parseDocument(is);
} finally {
is.close();
}
}
/**
* Looks up an input stream from the system id.
*/
public InputStream openStream(String systemId, String publicId)
throws IOException, SAXException
{
return openStream(systemId, publicId, _entityResolver, false);
}
/**
* Looks up an input stream from the system id.
*/
public InputStream openTopStream(String systemId, String publicId)
throws IOException, SAXException
{
return openStream(systemId, publicId, _entityResolver, true);
}
/**
* Looks up an input stream from the system id.
*/
public InputStream openStream(String systemId, String publicId,
EntityResolver entityResolver)
throws IOException, SAXException
{
return openStream(systemId, publicId, entityResolver, false);
}
/**
* Looks up an input stream from the system id.
*/
protected InputStream openStream(String systemId, String publicId,
EntityResolver entityResolver,
boolean isTop)
throws IOException, SAXException
{
int colon = systemId.indexOf(':');
int slash = systemId.indexOf('/');
boolean isAbsolute = colon > 0 && (colon < slash || slash < 0);
if (slash == 0 || ! isAbsolute) {
Path pwd;
if (_searchPath != null)
pwd = _searchPath;
else
pwd = Vfs.lookup(systemId).getParent();
String newId = pwd.lookup(systemId).getURL();
if (! newId.startsWith("error:"))
systemId = newId;
else {
int tail = _systemId.lastIndexOf('/');
if (tail >= 0)
systemId = _systemId.substring(0, tail + 1) + systemId;
}
}
// xml/03c5 -- must be after the normalization
if (entityResolver != null) {
InputSource source = entityResolver.resolveEntity(publicId, systemId);
if (source != null) {
_filename = systemId;
_systemId = systemId;
return openSource(source);
}
}
int ch;
if (CauchoSystem.isWindows() && systemId.startsWith("file:") &&
systemId.length() > 7 && systemId.charAt(6) == ':' &&
(((ch = systemId.charAt(5)) >= 'a' && ch <= 'z') ||
ch >= 'A' && ch <= 'Z')) {
colon = 1;
isAbsolute = false;
systemId = "/" + systemId.substring(5);
}
if (! isTop &&
isAbsolute && ! systemId.startsWith("file:") &&
! systemId.startsWith("jar:") &&
! (colon == 1 && CauchoSystem.isWindows())) {
throw new RemoteURLException(L.l("URL `{0}' was not opened because it is a remote URL. Any URL scheme other than file: must be handled by a custom entity resolver.",
systemId));
}
else if (_searchPath != null) {
return _searchPath.lookup(systemId).openRead();
}
else
return Vfs.lookup(systemId).openRead();
}
/**
* Opens the source
*/
protected InputStream openSource(InputSource source)
throws IOException, SAXException
{
if (source.getByteStream() != null) {
return source.getByteStream();
}
else if (source.getCharacterStream() != null) {
return Vfs.openRead(source.getCharacterStream());
}
else if (source.getSystemId() != null) {
return Vfs.openRead(source.getSystemId());
}
else
throw new FileNotFoundException(L.l("invalid InputSource {0}", source));
}
/**
* Parse the document from a read stream.
*
* @param is read stream to parse from.
*
* @return The parsed document.
*/
abstract Document parseInt(ReadStream is)
throws IOException, SAXException;
static {
_attrTypes.put("CDATA", "CDATA");
_attrTypes.put("ID", "ID");
_attrTypes.put("IDREF", "IDREF");
_attrTypes.put("IDREFS", "IDREFS");
_attrTypes.put("ENTITY", "ENTITY");
_attrTypes.put("ENTITIES", "ENTITIES");
_attrTypes.put("NMTOKEN", "NMTOKEN");
_attrTypes.put("NMTOKENS", "NMTOKENS");
}
}