/*
* Copyright (c) 1998-2011 Caucho Technology -- all rights reserved
*
* This file is part of Resin(R) Open Source
*
* Each copy or derived work must preserve the copyright notice and this
* notice unmodified.
*
* Resin Open Source is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* Resin Open Source is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, or any warranty
* of NON-INFRINGEMENT. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with Resin Open Source; if not, write to the
* Free SoftwareFoundation, Inc.
* 59 Temple Place, Suite 330
* Boston, MA 02111-1307 USA
*
* @author Scott Ferguson
*/
package com.caucho.xml;
import com.caucho.util.CharBuffer;
import com.caucho.vfs.Path;
import com.caucho.vfs.ReadStream;
import com.caucho.vfs.ReaderWriterStream;
import com.caucho.vfs.Vfs;
import com.caucho.vfs.WriteStream;
import com.caucho.xml.readers.MacroReader;
import com.caucho.xml.readers.Utf16Reader;
import com.caucho.xml.readers.Utf8Reader;
import com.caucho.xml.readers.XmlReader;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.logging.Level;
/**
* A configurable XML parser. Loose versions of XML and HTML are supported
* by changing the Policy object.
*
* <p>Normally, applications will use Xml, LooseXml, Html, or LooseHtml.
*/
public class XmlParser extends AbstractParser {
// Xerces uses the following
public static final String XMLNS = "http://www.w3.org/2000/xmlns/";
public static final String XML = "http://www.w3.org/XML/1998/namespace";
static final QName DOC_NAME = new QName(null, "#document", null);
static final QName TEXT_NAME = new QName(null, "#text", null);
static final QName JSP_NAME = new QName(null, "#jsp", null);
static final QName WHITESPACE_NAME = new QName(null, "#whitespace", null);
static final QName JSP_ATTRIBUTE_NAME = new QName("xtp", "jsp-attribute", null);
QAttributes _attributes;
QAttributes _nullAttributes;
boolean _inDtd;
CharBuffer _text;
CharBuffer _eltName;
CharBuffer _cb;
CharBuffer _buf = new CharBuffer();
String _textFilename;
int _textLine;
char []_textBuffer = new char[1024];
int _textLength;
int _textCapacity = _textBuffer.length;
boolean _isIgnorableWhitespace;
boolean _isJspText;
CharBuffer _name = new CharBuffer();
CharBuffer _nameBuffer = new CharBuffer();
MacroReader _macro = new MacroReader();
int _macroIndex = 0;
int _macroLength = 0;
char []_macroBuffer;
QName []_elementNames = new QName[64];
NamespaceMap []_namespaces = new NamespaceMap[64];
int []_elementLines = new int[64];
int _elementTop;
NamespaceMap _namespaceMap;
ArrayList<String> _attrNames = new ArrayList<String>();
ArrayList<String> _attrValues = new ArrayList<String>();
ReadStream _is;
XmlReader _reader;
String _extPublicId;
String _extSystemId;
QName _activeNode;
QName _topNamespaceNode;
boolean _isTagStart;
boolean _stopOnIncludeEnd;
boolean _hasTopElement;
boolean _hasDoctype;
boolean _isHtml;
Locator _locator = new LocatorImpl(this);
public XmlParser()
{
clear();
}
/**
* Creates a new parser with a given parsing policy and dtd.
*
* @param policy the parsing policy, handling optional tags.
* @param dtd the parser's dtd.
*/
XmlParser(Policy policy, QDocumentType dtd)
{
super(policy, dtd);
clear();
}
/**
* Initialize the parser.
*/
void init()
{
super.init();
_attributes = new QAttributes();
_nullAttributes = new QAttributes();
_eltName = new CharBuffer();
_text = new CharBuffer();
_isHtml = _policy instanceof HtmlPolicy;
// jsp/193b
// _namespaceMap = null;
_textLength = 0;
_isIgnorableWhitespace = true;
_elementTop = 0;
_elementLines[0] = 1;
_line = 1;
_dtd = null;
_inDtd = false;
_isTagStart = false;
_stopOnIncludeEnd = false;
_extPublicId = null;
_extSystemId = null;
// _filename = null;
_publicId = null;
_systemId = null;
_hasTopElement = false;
_hasDoctype = false;
_macroIndex = 0;
_macroLength = 0;
_reader = null;
// _owner = null;
_policy.init();
}
/**
* Parse the document from a read stream.
*
* @param is read stream to parse from.
*
* @return The parsed document.
*/
@Override
Document parseInt(ReadStream is)
throws IOException, SAXException
{
_is = is;
if (_filename == null && _systemId != null)
_filename = _systemId;
else if (_filename == null)
_filename = _is.getUserPath();
if (_systemId == null) {
_systemId = _is.getPath().getURL();
if ("null:".equals(_systemId) || "string:".equals(_systemId))
_systemId = "stream";
}
/* xsl/0401
if (_isNamespaceAware)
_namespaceMap = new NamespaceMap(null, "", "");
*/
_policy.setNamespaceAware(_isNamespaceAware);
if (_filename == null)
_filename = _systemId;
if (_filename == null)
_filename = "stream";
if (_dtd != null)
_dtd.setSystemId(_systemId);
if (_builder != null) {
if (! "string:".equals(_systemId) && ! "stream".equals(_systemId))
_builder.setSystemId(_systemId);
_builder.setFilename(_is.getPath().getURL());
}
if (_contentHandler == null)
_contentHandler = new org.xml.sax.helpers.DefaultHandler();
_contentHandler.setDocumentLocator(_locator);
if (_owner == null)
_owner = new QDocument();
if (_defaultEncoding != null)
_owner.setAttribute("encoding", _defaultEncoding);
_activeNode = DOC_NAME;
_policy.setStream(is);
_policy.setNamespace(_namespaceMap);
_contentHandler.startDocument();
int ch = parseXMLDeclaration(null);
ch = skipWhitespace(ch);
parseNode(ch, false);
/*
if (dbg.canWrite()) {
printDebugNode(dbg, doc, 0);
dbg.flush();
}
*/
if (_strictXml && ! _hasTopElement)
throw error(L.l("XML file has no top-element. All well-formed XML files have a single top-level element."));
if (_contentHandler != null)
_contentHandler.endDocument();
QDocument owner = _owner;
_owner = null;
// ioc/23l0
Path path = is.getPath();
is.close();
owner.addDepend(path);
return owner;
}
/**
* The main dispatch loop.
*
* @param node the current node
* @param ch the next character
* @param special true for the short form, <foo/bar/>
*/
private void parseNode(int ch, boolean special)
throws IOException, SAXException
{
//boolean isTop = node instanceof QDocument;
_text.clear();
loop:
while (true) {
if (_textLength == 0) {
_textFilename = getFilename();
_textLine = getLine();
}
switch (ch) {
case -1:
if (_textLength != 0)
appendText();
if (! _stopOnIncludeEnd && _reader.getNext() != null) {
popInclude();
if (_reader != null)
parseNode(_reader.read(), special);
return;
}
closeTag("");
return;
case ' ': case '\t': case '\n': case '\r':
if (! _normalizeWhitespace)
addText((char) ch);
else if (_textLength == 0) {
if (! _isTagStart)
addText(' ');
}
else if (_textBuffer[_textLength - 1] != ' ') {
addText(' ');
}
ch = _reader.read();
break;
case 0xffff:
// marker for end of text for serialization
return;
default:
addText((char) ch);
ch = _reader.read();
break;
case '/':
if (! special) {
addText((char) ch);
ch = _reader.read();
continue;
}
ch = _reader.read();
if (ch == '>' || ch == -1) {
appendText();
popNode();
return;
}
addText('/');
break;
case '&':
ch = parseEntityReference();
break;
case '<':
boolean endTag = false;
ch = _reader.read();
if (ch == '/' && ! special) {
if (_normalizeWhitespace &&
_textLength > 0 && _textBuffer[_textLength - 1] == ' ') {
_textLength--;
}
appendText();
ch = _reader.parseName(_name, _reader.read());
if (ch != '>') {
// XXX: Hack for Java PetStore
while (XmlChar.isWhitespace(ch))
ch = _reader.read();
if (ch != '>')
throw error(L.l("`</{0}>' expected `>' at {1}. Closing tags must close immediately after the tag name.", _name, badChar(ch)));
}
closeTag(_policy.getName(_name).getName());
ch = _reader.read();
}
// element: <tag attr=value ... attr=value> ...
else if (XmlChar.isNameStart(ch)) {
appendText();
parseElement(ch);
ch = _reader.read();
}
// <! ...
else if (ch == '!') {
// <![CDATA[ ... ]]>
if ((ch = _reader.read()) == '[') {
parseCdata();
ch = _reader.read();
}
// <!-- ... -->
else if (ch == '-') {
parseComment();
ch = _reader.read();
}
else if (XmlChar.isNameStart(ch)) {
appendText();
ch = _reader.parseName(_name, ch);
String declName = _name.toString();
if (declName.equals("DOCTYPE")) {
parseDoctype(ch);
if (_contentHandler instanceof DOMBuilder)
((DOMBuilder) _contentHandler).dtd(_dtd);
ch = _reader.read();
} else if (_forgiving && declName.equalsIgnoreCase("doctype")) {
parseDoctype(ch);
if (_contentHandler instanceof DOMBuilder)
((DOMBuilder) _contentHandler).dtd(_dtd);
ch = _reader.read();
} else
throw error(L.l("expected `<!DOCTYPE' declaration at {0}", declName));
if (isDtdValidating()) {
generateDtdValidator(_dtd);
}
} else if (_forgiving) {
addText("<!");
} else
throw error(L.l("expected `<!DOCTYPE' declaration at {0}", badChar(ch)));
}
// PI: <?tag attr=value ... attr=value?>
else if (ch == '?') {
ch = parsePI();
}
else if (_strictXml) {
throw error(L.l("expected tag name after `<' at {0}. Open tag names must immediately follow the open brace like `<foo ...>'", badChar(ch)));
}
// implicit <![CDATA[ for <% ... %>
else if (_isJsp && ch == '%') {
ch = _reader.read();
appendText();
_isJspText = ch != '=';
addText("<%");
while (ch >= 0) {
if (ch == '%') {
ch = _reader.read();
if (ch == '>') {
addText("%>");
ch = _reader.read();
break;
}
else
addText('%');
}
else {
addText((char) ch);
ch = _reader.read();
}
}
appendText();
_isJspText = false;
}
else {
addText('<');
}
}
}
}
/**
* Parses the <!DOCTYPE> declaration.
*/
private void parseDoctype(int ch)
throws IOException, SAXException
{
if (_activeNode != DOC_NAME)
throw error(L.l("<!DOCTYPE immediately follow the <?xml ...?> declaration."));
_inDtd = true;
ch = skipWhitespace(ch);
ch = _reader.parseName(_nameBuffer, ch);
String name = _nameBuffer.toString();
ch = skipWhitespace(ch);
if (_dtd == null)
_dtd = new QDocumentType(name);
_dtd.setName(name);
if (XmlChar.isNameStart(ch)) {
ch = parseExternalID(ch);
ch = skipWhitespace(ch);
_dtd._publicId = _extPublicId;
_dtd._systemId = _extSystemId;
}
if (_dtd._systemId != null && ! _dtd._systemId.equals("")) {
InputStream is = null;
unread(ch);
XmlReader oldReader = _reader;
boolean hasInclude = false;
try {
pushInclude(_extPublicId, _extSystemId);
hasInclude = true;
} catch (Exception e) {
if (log.isLoggable(Level.FINEST))
log.log(Level.FINER, e.toString(), e);
else
log.finer(e.toString());
}
if (hasInclude) {
_stopOnIncludeEnd = true;
try {
ch = parseDoctypeDecl(_dtd);
} catch (XmlParseException e) {
if (_extSystemId != null &&
_extSystemId.startsWith("http")) {
log.log(Level.FINE, e.toString(), e);
}
else
throw e;
}
_stopOnIncludeEnd = false;
while (_reader != null && _reader != oldReader)
popInclude();
}
if (_reader != null)
ch = skipWhitespace(read());
}
if (ch == '[')
ch = parseDoctypeDecl(_dtd);
ch = skipWhitespace(ch);
_inDtd = false;
if (ch != '>')
throw error(L.l("expected `>' in <!DOCTYPE at {0}",
badChar(ch)));
}
/**
* Parses the DTD.
*
* <pre>
* dtd-item ::= <!ELEMENT ... |
* <!ATTLIST ... |
* <!NOTATION ... |
* <!ENTITY ... |
* <!-- comment |
* <? pi |
* %pe-ref;
* </pre>
*
* @return the next character.
*/
private int parseDoctypeDecl(QDocumentType doctype)
throws IOException, SAXException
{
_hasDoctype = true;
int ch = 0;
for (ch = skipWhitespace(read());
ch >= 0 && ch != ']';
ch = skipWhitespace(read())) {
if (ch == '<') {
if ((ch = read()) == '!') {
if (XmlChar.isNameStart(ch = read())) {
ch = _reader.parseName(_text, ch);
String name = _text.toString();
if (name.equals("ELEMENT"))
parseElementDecl(doctype);
else if (name.equals("ATTLIST"))
parseAttlistDecl(doctype);
else if (name.equals("NOTATION"))
parseNotationDecl(doctype);
else if (name.equals("ENTITY"))
parseEntityDecl(doctype);
else
throw error("unknown declaration `" + name + "'");
}
else if (ch == '-')
parseComment();
else if (ch == '[') {
ch = _reader.parseName(_text, read());
String name = _text.toString();
if (name.equals("IGNORE")) {
parseIgnore();
}
else if (name.equals("INCLUDE")) {
parseIgnore();
}
else
throw error("unknown declaration `" + name + "'");
}
}
else if (ch == '?') {
parsePI();
}
else
throw error(L.l("expected markup at {0}", badChar(ch)));
}
else if (ch == '%') {
ch = _reader.parseName(_buf, read());
if (ch != ';')
throw error(L.l("`%{0};' expects `;' at {1}. Parameter entities have a `%name;' syntax.", _buf, badChar(ch)));
addPEReference(_text, _buf.toString());
}
else {
throw error(L.l("expected '<' at {0}", badChar(ch)));
}
_text.clear();
}
_text.clear();
return read();
}
/**
* Parses an element.
*
* @param ch the current character
*/
private void parseElement(int ch)
throws IOException, SAXException
{
ch = _reader.parseName(_eltName, ch);
NamespaceMap oldNamespace = _namespaceMap;
if (ch != '>' && ch != '/')
ch = parseAttributes(ch, true);
else
_attributes.clear();
QName qname = _policy.getName(_eltName);
if (_isValidating && _dtd != null) {
QElementDef elementDef = _dtd.getElement(qname.getName());
if (elementDef != null)
elementDef.fillDefaults(_attributes);
}
if (ch == '/') {
// empty tag: <foo/>
if ((ch = _reader.read()) == '>') {
addElement(qname, true, _attributes, oldNamespace);
}
// short tag: </foo/some text here/>
else {
addElement(qname, false, _attributes, oldNamespace);
parseNode(ch, true);
}
} else if (ch == '>') {
addElement(qname, false, _attributes, oldNamespace);
} else
throw error(L.l("unexpected character {0} while parsing `{1}' attributes. Expected an attribute name or `>' or `/>'. XML element syntax is:\n <name attr-1=\"value-1\" ... attr-n=\"value-n\">",
badChar(ch), qname.getName()));
}
/**
* Parses the attributes in an element.
*
* @param ch the next character to reader.read.
*
* @return the next character to read.
*/
private int parseAttributes(int ch, boolean isElement)
throws IOException, SAXException
{
ch = skipWhitespace(ch);
_attributes.clear();
_attrNames.clear();
_attrValues.clear();
boolean hasWhitespace = true;
while (ch != -1) {
if (! XmlChar.isNameStart(ch)) {
if (! _isJsp || ch != '<')
break;
ch = parseJspAttribute(isElement);
continue;
}
if (! hasWhitespace)
throw error(L.l("attributes must be separated by whitespace"));
hasWhitespace = false;
ch = _reader.parseName(_text, ch);
if (! _text.startsWith("xmlns")) {
}
else {
QName name;
if (_isNamespaceAware && _contentHandler instanceof DOMBuilder)
name = _policy.getNamespaceName(_text);
else
name = new QName(_text.toString(), null);
String prefix;
if (_text.length() > 5) {
prefix = _text.substring(6);
if (prefix.equals(""))
throw error(L.l("'{0}' is an illegal namespace declaration.",
_text));
}
else {
prefix = "";
}
_text.clear();
ch = skipWhitespace(ch);
if (ch != '=')
throw error(L.l("xmlns: needs value at {0}", badChar(ch)));
ch = skipWhitespace(_reader.read());
ch = parseValue(_text, ch, true);
hasWhitespace = isWhitespace(ch);
ch = skipWhitespace(ch);
// topNamespaceNode = element;
String uri = _text.toString();
if (_isXmlnsPrefix) {
_namespaceMap = new NamespaceMap(_namespaceMap, prefix, uri);
_policy.setNamespace(_namespaceMap);
_contentHandler.startPrefixMapping(prefix, uri);
}
// needed for xml/032e vs xml/00ke
if (isElement && _isXmlnsAttribute
&& _contentHandler instanceof DOMBuilder) {
_attributes.add(name, uri);
}
continue;
}
String attrName = _text.toString();
_attrNames.add(attrName);
_text.clear();
ch = skipWhitespace(ch);
String value = null;
if (ch == '=') {
ch = skipWhitespace(_reader.read());
ch = parseValue(_text, ch, true);
hasWhitespace = isWhitespace(ch);
ch = skipWhitespace(ch);
value = _text.toString();
}
else if (_strictAttributes) {
throw error(L.l("attribute `{0}' expects value at {1}. XML requires attributes to have explicit values.",
attrName, badChar(ch)));
}
else {
value = attrName; // xxx: conflict xsl/0432
hasWhitespace = true;
}
_attrValues.add(value);
}
int len = _attrNames.size();
for (int i = 0; i < len; i++) {
String attrName = _attrNames.get(i);
String value = _attrValues.get(i);
_text.clear();
_text.append(attrName);
QName name;
if (_contentHandler instanceof DOMBuilder)
name = _policy.getAttributeName(_eltName, _text, true);
else
name = _policy.getAttributeName(_eltName, _text);
_attributes.add(name, value);
}
return ch;
}
/**
* Special parser to handle the use of <%= as an attribute in JSP
* files. Covers cases like the following:
*
* <pre>
* <options>
* <option name="foo" <%= test.isSelected("foo") %>/>
* </options>
* </pre>
*
* @param element the parent element
*
* @return the next character to read.
*/
private int parseJspAttribute(boolean isElement)
throws IOException, XmlParseException
{
int ch = _reader.read();
if (ch != '%')
throw error(L.l("unexpected char `{0}' in element", "%"));
ch = _reader.read();
if (ch != '=')
throw error(L.l("unexpected char `{0}' in element", "="));
_text.clear();
ch = _reader.read();
while (ch >= 0) {
if (ch == '%') {
ch = _reader.read();
if (ch == '>') {
ch = _reader.read();
break;
}
_text.append((char) ch);
}
else {
_text.append((char) ch);
ch = _reader.read();
}
}
String value = _text.toString();
if (isElement)
_attributes.add(JSP_ATTRIBUTE_NAME, value);
return ch;
}
/**
* Handle processing at a close tag. For strict XML, this will normally
* just change the current node to its parent, but HTML has a more
* complicated policy.
*/
private void closeTag(String endTagName)
throws IOException, SAXException
{
while (_activeNode != null && _activeNode != DOC_NAME) {
switch (_policy.elementCloseAction(this, _activeNode, endTagName)) {
case Policy.POP:
//if (dbg.canWrite())
// dbg.println("</" + activeNode.getNodeName() + ">");
popNode();
return;
case Policy.POP_AND_LOOP:
//if (dbg.canWrite())
// dbg.println("</" + activeNode.getNodeName() + ">");
popNode();
break;
case Policy.IGNORE:
return;
default:
throw new RuntimeException();
}
}
if (! _extraForgiving && endTagName != null && ! endTagName.equals(""))
throw error(L.l("Unexpected end tag `</{0}>' at top-level. All open tags have already been closed.",
endTagName));
}
/**
* Handles processing of the resin:include tag.
*/
private void handleResinInclude()
throws IOException, SAXException
{
String filename = _attributes.getValue("path");
if (filename == null || filename.equals(""))
filename = _attributes.getValue("href");
if (filename.equals(""))
throw error(L.l("<resin:include> expects a `path' attribute."));
pushInclude(filename);
}
/**
* Handles processing of the resin:include tag.
*/
private void handleResinIncludeDirectory()
throws IOException, SAXException
{
String filename = _attributes.getValue("path");
if (filename == null || filename.equals(""))
filename = _attributes.getValue("href");
String extension = _attributes.getValue("extension");
if (filename.equals(""))
throw error(L.l("<resin:include> expects a `path' attribute."));
Path pwd;
if (_searchPath != null)
pwd = _searchPath;
else
pwd = Vfs.lookup(_systemId).getParent();
Path dir = pwd.lookup(filename);
if (! dir.isDirectory())
throw error(L.l("`{0}' is not a directory for resin:include-directory. The href for resin:include-directory must refer to a directory.",
dir.getNativePath()));
String []list = dir.list();
Arrays.sort(list);
for (int i = list.length - 1; i >= 0; i--) {
if (list[i].startsWith(".") ||
extension != null && ! list[i].endsWith(extension))
continue;
pushInclude(dir.lookup(list[i]).getPath());
}
}
private int parseNameToken(CharBuffer name, int ch)
throws IOException, SAXException
{
name.clear();
if (! XmlChar.isNameChar(ch))
throw error(L.l("expected name at {0}", badChar(ch)));
for (; XmlChar.isNameChar(ch); ch = _reader.read())
name.append((char) ch);
return ch;
}
/**
* Pop the top-level node
*/
private void popNode()
throws SAXException
{
QName node = _activeNode;
if (_activeNode != DOC_NAME) {
String uri = _activeNode.getNamespaceURI();
String localName = _activeNode.getLocalName();
if (uri == null) {
uri = "";
if (_isNamespaceAware)
localName = _activeNode.getName();
else
localName = "";
}
_contentHandler.endElement(uri,
localName,
_activeNode.getName());
}
if (_elementTop > 0) {
_elementTop--;
NamespaceMap oldMap = _namespaces[_elementTop];
popNamespaces(oldMap);
_activeNode = _elementNames[_elementTop];
}
if (_elementTop == 0)
_activeNode = DOC_NAME;
}
public void pushNamespace(String prefix, String uri)
{
_namespaceMap = new NamespaceMap(_namespaceMap, prefix, uri);
_policy.setNamespace(_namespaceMap);
}
private void popNamespaces(NamespaceMap oldMap)
throws SAXException
{
for (;
_namespaceMap != null && _namespaceMap != oldMap;
_namespaceMap = _namespaceMap.next) {
_contentHandler.endPrefixMapping(_namespaceMap.prefix);
}
_namespaceMap = oldMap;
_policy.setNamespace(_namespaceMap);
}
private void appendText(String s)
{
if (_text.length() == 0) {
_textFilename = getFilename();
_textLine = getLine();
}
_text.append(s);
}
/**
* Parses an entity reference:
*
* <pre>
* er ::= &#d+;
* ::= &name;
* </pre>
*/
private int parseEntityReference()
throws IOException, SAXException
{
int ch;
ch = _reader.read();
// character reference
if (ch == '#') {
addText((char) parseCharacterReference());
return _reader.read();
}
// entity reference
else if (XmlChar.isNameStart(ch)) {
ch = _reader.parseName(_buf, ch);
if (ch != ';' && _strictXml)
throw error(L.l("`&{0};' expected `;' at {0}. Entity references have a `&name;' syntax.", _buf, badChar(ch)));
else if (ch != ';') {
addText('&');
addText(_buf.toString());
return ch;
}
addEntityReference(_buf.toString());
ch = _reader.read();
return ch;
} else if (_strictXml) {
throw error(L.l("expected name at {0}", badChar(ch)));
} else {
addText('&');
return ch;
}
}
private int parseCharacterReference()
throws IOException, SAXException
{
int ch = _reader.read();
int radix = 10;
if (ch == 'x') {
radix = 16;
ch = _reader.read();
}
int value = 0;
for (; ch != ';'; ch = _reader.read()) {
if (ch >= '0' && ch <= '9')
value = radix * value + ch - '0';
else if (radix == 16 && ch >= 'a' && ch <= 'f')
value = radix * value + ch - 'a' + 10;
else if (radix == 16 && ch >= 'A' && ch <= 'F')
value = radix * value + ch - 'A' + 10;
else
throw error(L.l("malformed entity ref at {0}", badChar(ch)));
}
if (value > 0xffff)
throw error(L.l("malformed entity ref at {0}", "" + value));
// xml/0072
if (_strictCharacters && ! isChar(value))
throw error(L.l("illegal character ref at {0}", badChar(value)));
return value;
}
/**
* Looks up a named entity reference, filling the text.
*/
private void addEntityReference(String name)
throws IOException, SAXException
{
boolean expand = ! _entitiesAsText || _hasDoctype || ! _switchToXml;
// XXX: not quite the right logic. There should be a soft expandEntities
if (! expand) {
addText("&" + name + ";");
return;
}
int ch = _entities.getEntity(name);
if (ch >= 0 && ch <= 0xffff) {
addText((char) ch);
return;
}
QEntity entity = _dtd == null ? null : _dtd.getEntity(name);
if (! _expandEntities) {
addText("&" + name + ";");
return;
}
if (entity == null && (_dtd == null || _dtd.getName() == null ||
! _dtd.isExternal())) {
if (_strictXml)
throw error(L.l("`&{0};' is an unknown entity. XML predefines only `<', `&', `>', `'' and `"'. All other entities must be defined in an <!ENTITY> definition in the DTD.", name));
else {
if (expand && _contentHandler instanceof DOMBuilder) {
appendText();
((DOMBuilder) _contentHandler).entityReference(name);
}
else
addText("&" + name + ";");
}
}
else if (entity != null) {
if (expand && entity._isSpecial && entity._value != null)
addText(entity._value);
else if (entity.getSystemId() != null) {
if (pushSystemEntity(entity)) {
}
/* XXX:??
else if (strictXml) {
throw error(L.l("can't open external entity at `&{0};'", name));
}
*/
else if (_contentHandler instanceof DOMBuilder) {
appendText();
((DOMBuilder) _contentHandler).entityReference(name);
}
else
addText("&" + name + ";");
}
else if (expand && entity._value != null)
setMacro(entity._value);
else
addText("&" + name + ";");
}
else {
if (expand && _contentHandler instanceof DOMBuilder) {
appendText();
((DOMBuilder) _contentHandler).entityReference(name);
}
else // XXX: error?
addText("&" + name + ";");
}
}
private boolean pushSystemEntity(QEntity entity)
throws IOException, SAXException
{
String publicId = entity.getPublicId();
String systemId = entity.getSystemId();
String value = null;
InputSource source = null;
ReadStream is = null;
if (_entityResolver != null)
source = _entityResolver.resolveEntity(publicId, systemId);
if (source != null && source.getByteStream() != null)
is = Vfs.openRead(source.getByteStream());
else if (source != null && source.getCharacterStream() != null)
is = Vfs.openRead(source.getCharacterStream());
else if (source != null && source.getSystemId() != null &&
_searchPath.lookup(source.getSystemId()).isFile()) {
_owner.addDepend(_searchPath.lookup(source.getSystemId()));
is = _searchPath.lookup(source.getSystemId()).openRead();
}
else if (systemId != null && ! systemId.equals("")) {
String path = systemId;
if (path.startsWith("file:"))
path = path.substring(5);
if (_searchPath != null && _searchPath.lookup(path).isFile()) {
_owner.addDepend(_searchPath.lookup(path));
is = _searchPath.lookup(path).openRead();
}
}
if (is == null)
return false;
_filename = systemId;
_systemId = systemId;
Path oldSearchPath = _searchPath;
Path path = is.getPath();
if (path != null) {
_owner.addDepend(path);
if (_searchPath != null) {
_searchPath = path.getParent();
_reader.setSearchPath(oldSearchPath);
}
}
_is = is;
_line = 1;
XmlReader oldReader = _reader;
_reader = null;
int ch = parseXMLDeclaration(oldReader);
unread(ch);
return true;
}
/**
* Parses an attribute value.
*
* <pre>
* value ::= '[^']*'
* ::= "[^"]*"
* ::= [^ />]*
* </pre>
*
* @param value the CharBuffer which will contain the value.
* @param ch the next character from the input stream.
* @param isGeneral true if general entities are allowed.
*
* @return the following character from the input stream
*/
private int parseValue(CharBuffer value, int ch, boolean isGeneral)
throws IOException, SAXException
{
int end = ch;
value.clear();
if (end == '\'' || end == '"')
ch = _reader.read();
else if (_strictAttributes) {
value.append((char) end);
for (ch = _reader.read();
ch >= 0 && XmlChar.isNameChar(ch);
ch = _reader.read())
value.append((char) ch);
throw error(L.l("XML attribute value must be quoted at `{0}'. XML attribute syntax is either attr=\"value\" or attr='value'.",
value));
}
else
end = 0;
while (ch != -1 && (end != 0 && ch != end ||
end == 0 && isAttributeChar(ch))) {
if (end == 0 && ch == '/') {
ch = _reader.read();
if (! isWhitespace(ch) && ch != '>') {
value.append('/');
value.append((char) ch);
}
else {
unread(ch);
return '/';
}
}
else if (ch == '&' && ! _entitiesAsText) {
if ((ch = _reader.read()) == '#')
value.append((char) parseCharacterReference());
else if (! isGeneral) {
value.append('&');
value.append((char) ch);
}
else if (XmlChar.isNameStart(ch)) {
ch = _reader.parseName(_buf, ch);
String name = _buf.toString();
if (ch != ';' && _strictXml)
throw error(L.l("expected `{0}' at {1}", ";", badChar(ch)));
else if (ch != ';') {
value.append('&');
value.append(name);
continue;
} else {
int lookup = _entities.getEntity(name);
if (lookup >= 0 && lookup <= 0xffff) {
ch = _reader.read();
value.append((char) lookup);
continue;
}
QEntity entity = _dtd == null ? null : _dtd.getEntity(name);
if (entity != null && entity._value != null)
setMacroAttr(entity._value);
else if (_strictXml)
throw error(L.l("expected local reference at `&{0};'", name));
else {
value.append('&');
value.append(name);
value.append(';');
}
}
}
}
else if (ch == '%' && ! isGeneral) {
ch = _reader.read();
if (! XmlChar.isNameStart(ch)) {
value.append('%');
continue;
}
else {
ch = _reader.parseName(_buf, ch);
if (ch != ';')
throw error(L.l("expected `{0}' at {1}", ";", badChar(ch)));
else
addPEReference(value, _buf.toString());
}
}
else if (ch == '<' && _isJsp) {
value.append('<');
ch = _reader.read();
if (ch != '%')
continue;
value.append('%');
ch = _reader.read();
while (ch >= 0) {
if (ch == '%') {
ch = _reader.read();
if (ch == '>') {
value.append("%>");
break;
}
else
value.append('%');
}
else {
value.append((char) ch);
ch = _reader.read();
}
}
}
else if (isGeneral) {
if (ch == '\r') {
ch = _reader.read();
if (ch != '\n') {
value.append('\n');
continue;
}
}
value.append((char) ch);
}
else if (ch == '\r') {
value.append(' ');
if ((ch = _reader.read()) != '\n')
continue;
}
else if (ch == '\n')
value.append(' ');
else
value.append((char) ch);
ch = _reader.read();
}
if (end != 0)
ch = _reader.read();
return ch;
}
private boolean isAttributeChar(int ch)
{
switch (ch) {
case ' ': case '\t': case '\n': case '\r':
return false;
case '<': case '>': case '\'':case '"': case '=':
return false;
default:
return true;
}
}
private void parsePcdata(QNode node) throws IOException, SAXException
{
int ch;
String tail = "</" + node.getNodeName() + ">";
_text.clear();
ch = _reader.read();
if (ch == '\n')
ch = _reader.read();
for (; ch != -1; ch = _reader.read()) {
addText((char) ch);
if (_text.endsWith(tail)) {
_text.setLength(_text.length() - tail.length());
if (_text.length() > 1 && _text.charAt(_text.length() - 1) == '\n')
_text.setLength(_text.length() - 1);
appendText();
return;
}
}
throw error("bad pcdata");
}
private int parseXMLDeclaration(XmlReader oldReader)
throws IOException, SAXException
{
int startOffset = _is.getOffset();
boolean isEBCDIC = false;
int ch = _is.read();
XmlReader reader = null;
// utf-16 starts with \xfe \xff
if (ch == 0xfe) {
ch = _is.read();
if (ch == 0xff) {
_owner.setAttribute("encoding", "UTF-16");
_is.setEncoding("utf-16");
reader = new Utf16Reader(this, _is);
ch = reader.read();
}
}
// utf-16 rev starts with \xff \xfe
else if (ch == 0xff) {
ch = _is.read();
if (ch == 0xfe) {
_owner.setAttribute("encoding", "UTF-16");
_is.setEncoding("utf-16");
reader = new Utf16Reader(this, _is);
((Utf16Reader) reader).setReverse(true);
ch = reader.read();
}
}
// utf-16 can also start with \x00 <
else if (ch == 0x00) {
ch = _is.read();
_owner.setAttribute("encoding", "UTF-16");
_is.setEncoding("utf-16");
reader = new Utf16Reader(this, _is);
}
// utf-8 BOM is \xef \xbb \xbf
else if (ch == 0xef) {
ch = _is.read();
if (ch == 0xbb) {
ch = _is.read();
if (ch == 0xbf) {
ch = _is.read();
_owner.setAttribute("encoding", "UTF-8");
_is.setEncoding("utf-8");
reader = new Utf8Reader(this, _is);
}
}
}
else if (ch == 0x4c) {
// ebcdic
// xml/00l1
_is.unread();
// _is.setEncoding("cp037");
_is.setEncoding("cp500");
isEBCDIC = true;
reader = new XmlReader(this, _is);
ch = reader.read();
}
else {
int ch2 = _is.read();
if (ch2 == 0x00) {
_owner.setAttribute("encoding", "UTF-16LE");
_is.setEncoding("utf-16le");
reader = new Utf16Reader(this, _is);
((Utf16Reader) reader).setReverse(true);
}
else if (ch2 > 0)
_is.unread();
}
if (reader != null && reader != oldReader) {
}
else if (_policy instanceof HtmlPolicy ||
_is.getSource() instanceof ReaderWriterStream) {
reader = new XmlReader(this, _is);
}
else {
reader = new Utf8Reader(this, _is);
}
if (ch == '\n')
reader.setLine(2);
reader.setSystemId(_systemId);
if (_systemId == null)
reader.setSystemId(_filename);
reader.setFilename(_filename);
reader.setPublicId(_publicId);
reader.setNext(oldReader);
_reader = reader;
/* XXX: this might be too strict. */
/*
if (! strictXml) {
for (; XmlChar.isWhitespace(ch); ch = reader.read()) {
}
}
*/
if (ch != '<')
return ch;
if (parseXMLDecl(_reader) && isEBCDIC) {
// EBCDIC requires a re-read
_is.setOffset(startOffset);
ch = _reader.read();
if (ch != '<')
throw new IllegalStateException();
parseXMLDecl(_reader);
}
return _reader.read();
}
private boolean parseXMLDecl(XmlReader reader)
throws IOException, SAXException
{
int ch = reader.read();
if (ch != '?') {
unread((char) ch);
unread('<');
return false;
}
ch = _reader.read();
if (! XmlChar.isNameStart(ch))
throw error(L.l("expected name after '<?' at {0}. Processing instructions expect a name like <?foo ... ?>", badChar(ch)));
ch = _reader.parseName(_text, ch);
String piName = _text.toString();
if (! piName.equals("xml")) {
ch = parsePITail(piName, ch);
unread(ch);
return false;
}
if (_switchToXml && _activeNode == DOC_NAME && ! _inDtd) {
_policy = new XmlPolicy();
}
ch = parseAttributes(ch, false);
if (ch != '?')
throw error(L.l("expected `?' at {0}. Processing instructions end with `?>' like <?foo ... ?>", badChar(ch)));
if ((ch = _reader.read()) != '>')
throw error(L.l("expected `>' at {0}. Processing instructions end with `?>' like <?foo ... ?>", ">", badChar(ch)));
for (int i = 0; i < _attributes.getLength(); i++) {
QName name = _attributes.getName(i);
String value = _attributes.getValue(i);
if (_owner != null)
_owner.setAttribute(name.getName(), value);
if (name.getName().equals("encoding")) { // xml/00hb // && ! _inDtd) {
String encoding = value;
if (! _isStaticEncoding &&
! encoding.equalsIgnoreCase("UTF-8") &&
! encoding.equalsIgnoreCase("UTF-16") &&
! (_is.getSource() instanceof ReaderWriterStream)) {
_is.setEncoding(encoding);
XmlReader oldReader = _reader;
_reader = new XmlReader(this, _is);
// _reader.setNext(oldReader);
_reader.setLine(oldReader.getLine());
_reader.setSystemId(_filename);
_reader.setPublicId(null);
}
}
}
return true;
}
private int parsePI()
throws IOException, SAXException
{
int ch;
appendText();
ch = _reader.read();
if (! XmlChar.isNameStart(ch))
throw error(L.l("expected name after '<?' at {0}. Processing instructions expect a name like <?foo ... ?>", badChar(ch)));
ch = _reader.parseName(_text, ch);
String piName = _text.toString();
if (! piName.equals("xml"))
return parsePITail(piName, ch);
else if (_switchToXml && _activeNode == DOC_NAME && ! _inDtd) {
_policy = new XmlPolicy();
return parsePITail(piName, ch);
}
else {
throw error(L.l("<?xml ... ?> occurs after content. The <?xml ... ?> prolog must be at the document start."));
}
}
private int parsePITail(String piName, int ch)
throws IOException, SAXException
{
ch = skipWhitespace(ch);
_text.clear();
while (ch != -1) {
if (ch == '?') {
if ((ch = _reader.read()) == '>')
break;
else
_text.append('?');
} else {
_text.append((char) ch);
ch = _reader.read();
}
}
if (_inDtd) {
QProcessingInstruction pi;
pi = new QProcessingInstruction(piName, _text.toString());
pi._owner = _dtd._owner;
_dtd.appendChild(pi);
}
else
_contentHandler.processingInstruction(piName, _text.toString());
return _reader.read();
}
/**
* Parses a comment. The "<!--" has already been read.
*/
private void parseComment()
throws IOException, SAXException
{
if (! _skipComments)
appendText();
int ch = _reader.read();
if (ch != '-')
throw error(L.l("expected comment at {0}", badChar(ch)));
ch = _reader.read();
if (! _skipComments)
_buf.clear();
comment:
while (ch != -1) {
if (ch == '-') {
ch = _reader.read();
while (ch == '-') {
if ((ch = _reader.read()) == '>')
break comment;
else if (_strictComments)
throw error(L.l("XML forbids `--' in comments"));
else if (ch == '-') {
if (! _skipComments)
_buf.append('-');
}
else {
if (! _skipComments)
_buf.append("--");
break;
}
}
_buf.append('-');
} else if (! XmlChar.isChar(ch)) {
throw error(L.l("bad character {0}", hex(ch)));
} else {
_buf.append((char) ch);
ch = _reader.read();
}
}
if (_inDtd) {
QComment comment = new QComment(_buf.toString());
comment._owner = _dtd._owner;
_dtd.appendChild(comment);
}
else if (_skipComments) {
}
else if (_contentHandler instanceof XMLWriter && ! _skipComments) {
((XMLWriter) _contentHandler).comment(_buf.toString());
_isIgnorableWhitespace = true;
}
else if (_lexicalHandler != null) {
_lexicalHandler.comment(_buf.getBuffer(), 0, _buf.getLength());
_isIgnorableWhitespace = true;
}
}
/**
* Parses the contents of a cdata section.
*
* <pre>
* cdata ::= <![CDATA[ ... ]]>
* </pre>
*/
private void parseCdata()
throws IOException, SAXException
{
int ch;
if (_forgiving) {
if ((ch = _reader.read()) != 'C') {
appendText("<![" + (char) ch);
return;
}
else if ((ch = _reader.read()) != 'D') {
appendText("<![C" + (char) ch);
return;
}
else if ((ch = _reader.read()) != 'A') {
appendText("<![CD" + (char) ch);
return;
}
else if ((ch = _reader.read()) != 'T') {
appendText("<![CDA" + (char) ch);
return;
}
else if ((ch = _reader.read()) != 'A') {
appendText("<![CDAT" + (char) ch);
return;
}
else if ((ch = _reader.read()) != '[') {
appendText("<![CDATA" + (char) ch);
return;
}
}
else if ((ch = _reader.read()) != 'C' ||
(ch = _reader.read()) != 'D' ||
(ch = _reader.read()) != 'A' ||
(ch = _reader.read()) != 'T' ||
(ch = _reader.read()) != 'A' ||
(ch = _reader.read()) != '[') {
throw error(L.l("expected `<![CDATA[' at {0}", badChar(ch)));
}
ch = _reader.read();
if (_lexicalHandler != null) {
_lexicalHandler.startCDATA();
appendText();
}
else if (! _isCoalescing)
appendText();
cdata:
while (ch != -1) {
if (ch == ']') {
ch = _reader.read();
while (ch == ']') {
if ((ch = _reader.read()) == '>')
break cdata;
else if (ch == ']')
addText(']');
else {
addText(']');
break;
}
}
addText(']');
} else if (_strictCharacters && ! isChar(ch)) {
throw error(L.l("expected character in cdata at {0}", badChar(ch)));
} else {
addText((char) ch);
ch = _reader.read();
}
}
if (_lexicalHandler != null) {
appendText();
_lexicalHandler.endCDATA();
}
else if (! _isCoalescing)
appendText();
}
/**
* Ignores content to the ']]>'
*/
private void parseIgnore()
throws IOException, SAXException
{
int ch = read();
while (ch >= 0) {
if (ch != ']') {
ch = read();
}
else if ((ch = read()) != ']') {
}
else if ((ch = read()) == '>')
return;
}
}
private int parseContentSpec(QElementDef def, int ch)
throws IOException, SAXException
{
ch = expandPE(ch);
if (XmlChar.isNameStart(ch)) {
ch = _reader.parseName(_text, ch);
String name = _text.toString();
if (name.equals("EMPTY")) {
def._content = "EMPTY";
return ch;
}
else if (name.equals("ANY")) {
def._content = "ANY";
return ch;
}
else
throw error(L.l("expected EMPTY or ANY at `{0}'", name));
}
else if (ch != '(') {
throw error(L.l("expected grammar definition starting with '(' at {0}. <!ELEMENT> definitions have the syntax <!ELEMENT name - - (grammar)>", badChar(ch)));
}
else {
QContentParticle cp = new QContentParticle();
def._content = cp;
return parseContentParticle(cp, true);
}
}
/**
* Parses a content-particle, i.e. a grammer particle in the DTD
* regexp.
*/
private int parseContentParticle(QContentParticle cp, boolean isTop)
throws IOException, SAXException
{
boolean hasCdata = false;
cp._separator = 0;
cp._repeat = 0;
int ch;
ch = expandPE(_reader.read());
for (; ch != -1; ch = expandPE(ch)) {
if (ch == '(') {
QContentParticle child = new QContentParticle();
cp.addChild(child);
ch = parseContentParticle(child, false);
}
else if (XmlChar.isNameStart(ch)) {
ch = _reader.parseName(_text, ch);
cp.addChild(_text.toString());
}
else if (ch == '#') {
ch = _reader.parseName(_text, _reader.read());
String name = _text.toString();
if (_strictXml && cp._children.size() != 0)
throw error(L.l("`#{0}' must occur first", name));
if (_strictXml && ! isTop)
throw error(L.l("`#{0}' may only occur at top level", name));
if (name.equals("PCDATA"))
cp.addChild("#PCDATA");
else
throw error(L.l("illegal content particle at `#{0}'", name));
hasCdata = true;
}
else
throw error(L.l("expected content particle at {0}", badChar(ch)));
ch = expandPE(ch);
if (ch == '?' || ch == '*' || ch == '+') {
Object child = cp.getChild(cp.getChildSize() - 1);
if (child instanceof QContentParticle) {
QContentParticle cpChild = (QContentParticle) child;
cpChild._repeat = ch;
}
else {
QContentParticle cpChild = new QContentParticle();
cpChild.addChild(child);
cpChild._repeat = ch;
cp.setChild(cp.getChildSize() - 1, cpChild);
}
ch = expandPE(_reader.read());
}
if (ch == ')')
break;
else if (cp._separator == 0) {
if (ch == '|')
cp._separator = ch;
else if (hasCdata && _strictXml)
throw error(L.l("#PCDATA must be separated by `|' at {0}",
badChar(ch)));
else if (ch == ',')
cp._separator = ch;
else if (! _strictXml && ch =='&')
cp._separator = ch;
else
throw error(L.l("expected separator at {0}", badChar(ch)));
ch = _reader.read();
} else if (ch != cp._separator)
throw error(L.l("expected `{0}' at {1}",
"" + (char) cp._separator, badChar(ch)));
else
ch = _reader.read();
}
ch = expandPE(_reader.read());
if (_strictXml && hasCdata && (ch == '+' || ch == '?'))
throw error(L.l("pcdata clause can not have {0}", badChar(ch)));
else if (ch == '*' || ch == '+' || ch == '?') {
cp._repeat = ch;
return _reader.read();
}
else
return ch;
}
private int expandPE(int ch)
throws IOException, SAXException
{
ch = skipWhitespace(ch);
while (ch == '%') {
parsePEReference();
ch = skipWhitespace(_reader.read());
}
return ch;
}
/**
* Parses a PE reference %foo; and inserts the macro text to the input
* stream.
*/
private void parsePEReference()
throws IOException, SAXException
{
int ch = _reader.parseName(_buf, _reader.read());
if (ch != ';')
throw error(L.l("`%{0};' expects `;' at {1}. Parameter entities have a `%name;' syntax.", _buf, badChar(ch)));
addPEReference(_text, _buf.toString());
}
/**
* Expands the macro value of a PE reference.
*/
private void addPEReference(CharBuffer value, String name)
throws IOException, SAXException
{
QEntity entity = _dtd.getParameterEntity(name);
if (entity == null && ! _dtd.isExternal())
throw error(L.l("`%{0};' is an unknown parameter entity. Parameter entities must be defined in an <!ENTITY> declaration before use.", name));
else if (entity != null && entity._value != null) {
setMacro(entity._value);
}
else if (entity != null && entity.getSystemId() != null) {
pushInclude(entity.getPublicId(), entity.getSystemId());
}
else {
value.append("%");
value.append(name);
value.append(";");
}
}
/**
* <!ELEMENT name contentspec>
*/
private void parseElementDecl(QDocumentType doctype)
throws IOException, SAXException
{
int ch = skipWhitespace(_reader.read());
ch = _reader.parseName(_text, ch);
String name = _text.toString();
ch = skipWhitespace(ch);
QElementDef def = _dtd.addElement(name);
def.setLocation(getSystemId(), getFilename(), getLine(), getColumn());
boolean needsStartTag = true;
boolean needsEndTag = true;
if (_optionalTags && (ch == 'O' || ch == '-')) {
needsStartTag = ch == '-';
ch = skipWhitespace(ch);
if (ch == '0')
needsEndTag = false;
else if (ch == '-')
needsEndTag = true;
else
throw error(L.l("unknown short tag"));
}
ch = parseContentSpec(def, ch);
ch = skipWhitespace(ch);
if (ch != '>')
throw error(L.l("`<!ELEMENT' must close with `>' at {0}", badChar(ch)));
}
private static String toAttrDefault(CharBuffer text)
{
for (int i = 0; i < text.length(); i++) {
int ch = text.charAt(i);
if (ch == '"') {
text.delete(i, i + 1);
text.insert(i, """);
i--;
} else if (ch == '\'') {
text.delete(i, i + 1);
text.insert(i, "'");
i--;
}
}
return text.toString();
}
/**
* <!ATTLIST name (attr type def)*>
*/
private void parseAttlistDecl(QDocumentType doctype)
throws IOException, SAXException
{
int ch = skipWhitespace(_reader.read());
ch = _reader.parseName(_text, ch);
String name = _text.toString();
ch = skipWhitespace(ch);
QElementDef def = _dtd.addElement(name);
while (XmlChar.isNameStart((ch = expandPE(ch)))) {
ch = _reader.parseName(_text, ch);
String attrName = _text.toString();
String attrType = null;
ArrayList<String> enumeration = null;
ch = expandPE(ch);
if (ch == '(') {
attrType = "#ENUM";
enumeration = new ArrayList<String>();
do {
ch = expandPE(_reader.read());
ch = parseNameToken(_text, ch);
enumeration.add(_text.toString());
ch = expandPE(ch);
} while (ch == '|');
if (ch != ')')
throw error(L.l("expected `{0}' at {1}. <!ATTRLIST> enumerations definitions are enclosed in '(' ... ')'.", ")", badChar(ch)));
ch = _reader.read();
}
else {
ch = _reader.parseName(_text, ch);
attrType = _text.toString();
if (attrType.equals("NOTATION")) {
enumeration = new ArrayList<String>();
ch = expandPE(ch);
if (ch != '(')
throw error(L.l("expected `{0}' at {1}", "(", badChar(ch)));
do {
ch = expandPE(_reader.read());
ch = _reader.parseName(_text, ch);
enumeration.add(_text.toString());
ch = expandPE(ch);
} while (ch == '|');
if (ch != ')')
throw error(L.l("expected `{0}' at {1}", ")", badChar(ch)));
ch = _reader.read();
}
else if (_attrTypes.get(attrType) != null) {
}
else
throw error(L.l("expected attribute type at `{0}'", attrType));
}
ch = skipWhitespace(ch);
String qualifier = null;
String attrDefault = null;
if (ch == '#') {
ch = _reader.parseName(_text, _reader.read());
qualifier = "#" + _text.toString();
if (qualifier.equals("#IMPLIED")) {
}
else if (qualifier.equals("#REQUIRED")) {
}
else if (qualifier.equals("#FIXED")) {
ch = skipWhitespace(ch);
ch = parseValue(_text, ch, false);
attrDefault = _text.toString();
} else
throw error(L.l("expected attribute default at `{0}'",
qualifier));
}
else if (ch != '>') {
ch = parseValue(_text, ch, false);
attrDefault = _text.toString();
}
def.addAttribute(attrName, attrType, enumeration,
qualifier, attrDefault);
if (attrType != null && attrType.equals("ID"))
doctype.setElementId(name, attrName);
ch = skipWhitespace(ch);
}
if (ch != '>')
throw error(L.l("expected `{0}' at {1}", ">", badChar(ch)));
}
/**
* <!NOTATION name systemId publicId>
*/
private void parseNotationDecl(QDocumentType doctype)
throws IOException, SAXException
{
int ch = skipWhitespace(_reader.read());
ch = _reader.parseName(_text, ch);
String name = _text.toString();
ch = skipWhitespace(ch);
ch = _reader.parseName(_text, ch);
String key = _text.toString();
ch = skipWhitespace(ch);
ch = parseValue(_text, ch, false);
String id = _text.toString();
ch = skipWhitespace(ch);
QNotation notation;
if (key.equals("PUBLIC")) {
String systemId = null;
if (ch == '"' || ch == '\'') {
ch = parseValue(_text, ch, false);
ch = skipWhitespace(ch);
systemId = _text.toString();
}
notation = new QNotation(name, id, systemId);
notation._owner = doctype._owner;
notation.setLocation(getSystemId(), getFilename(), getLine(), getColumn());
}
else if (key.equals("SYSTEM")) {
notation = new QNotation(name, null, id);
notation._owner = doctype._owner;
notation.setLocation(getSystemId(), getFilename(), getLine(), getColumn());
}
else
throw error(L.l("expected PUBLIC or SYSTEM at `{0}'", key));
doctype.addNotation(notation);
doctype.appendChild(notation);
if (ch != '>')
throw error(L.l("expected `{0}' at {1}", ">", badChar(ch)));
}
/**
* externalID ::= PUBLIC publicId systemId
* ::= SYSTEM systemId
*/
private int parseExternalID(int ch)
throws IOException, SAXException
{
ch = _reader.parseName(_text, ch);
String key = _text.toString();
ch = skipWhitespace(ch);
_extSystemId = null;
_extPublicId = null;
if (key.equals("PUBLIC") || _forgiving && key.equalsIgnoreCase("public")) {
ch = parseValue(_text, ch, false);
_extPublicId = _text.toString();
ch = skipWhitespace(ch);
if (_extPublicId.indexOf('&') > 0)
throw error(L.l("Illegal character '&' in PUBLIC identifier '{0}'",
_extPublicId));
ch = parseValue(_text, ch, false);
ch = skipWhitespace(ch);
_extSystemId = _text.toString();
}
else if (key.equals("SYSTEM") ||
_forgiving && key.equalsIgnoreCase("system")) {
ch = parseValue(_text, ch, false);
_extSystemId = _text.toString();
}
else
throw error(L.l("expected PUBLIC or SYSTEM at `{0}'", key));
return ch;
}
/**
* <!ENTITY name systemId publicId>
*/
private void parseEntityDecl(QDocumentType doctype)
throws IOException, SAXException
{
int ch = skipWhitespace(_reader.read());
boolean isPe = ch == '%';
if (isPe)
ch = skipWhitespace(_reader.read());
ch = _reader.parseName(_text, ch);
String name = _text.toString();
ch = skipWhitespace(ch);
QEntity entity;
if (ch == '"' || ch == '\'') {
ch = parseValue(_text, ch, false);
entity = new QEntity(name, _text.toString(), null, null);
entity._owner = doctype._owner;
entity.setLocation(getSystemId(), getFilename(), getLine(), getColumn());
}
else {
ch = parseExternalID(ch);
entity = new QEntity(name, null, _extPublicId, _extSystemId);
entity._owner = doctype._owner;
entity.setLocation(getSystemId(), getFilename(), getLine(), getColumn());
ch = skipWhitespace(ch);
if (! isPe && XmlChar.isNameStart(ch)) {
ch = _reader.parseName(_text, ch);
String key = _text.toString();
if (key.equals("NDATA")) {
ch = skipWhitespace(ch);
ch = _reader.parseName(_text, ch);
String ndata = _text.toString();
entity._ndata = ndata;
} else
throw error(L.l("expected `NDATA' at `{0}'", key));
}
}
entity._isPe = isPe;
if (isPe)
doctype.addParameterEntity(entity);
else
doctype.addEntity(entity);
doctype.appendChild(entity);
ch = skipWhitespace(ch);
if (ch != '>')
throw error(L.l("expected `>' at {0}", badChar(ch)));
}
private boolean isWhitespace(int ch)
{
return ch <= 0x20 && (ch == 0x20 || ch == 0x9 || ch == 0xa || ch == 0xd);
}
private boolean isChar(int ch)
{
return (ch >= 0x20 && ch <= 0xd7ff ||
ch == 0x9 ||
ch == 0xa ||
ch == 0xd ||
ch >= 0xe000 && ch <= 0xfffd);
}
/**
* Returns the hex representation of a byte.
*/
private static String hex(int value)
{
CharBuffer cb = CharBuffer.allocate();
for (int b = 3; b >= 0; b--) {
int v = (value >> (4 * b)) & 0xf;
if (v < 10)
cb.append((char) (v + '0'));
else
cb.append((char) (v - 10 + 'a'));
}
return cb.close();
}
/**
* Returns the current filename.
*/
public String getFilename()
{
return _filename;
}
/**
* Returns the current line.
*/
public int getLine()
{
return _line;
}
/**
* Returns the current column.
*/
private int getColumn()
{
return 0;
}
/**
* Returns the opening line of the current node.
*/
int getNodeLine()
{
if (_elementTop > 0)
return _elementLines[_elementTop - 1];
else
return 1;
}
/**
* Returns the current public id being read.
*/
public String getPublicId()
{
if (_reader != null)
return _reader.getPublicId();
else
return _publicId;
}
/**
* Returns the current system id being read.
*/
public String getSystemId()
{
if (_reader != null)
return _reader.getSystemId();
else if (_systemId != null)
return _systemId;
else
return _filename;
}
public void setLine(int line)
{
_line = line;
}
public int getLineNumber() { return getLine(); }
public int getColumnNumber() { return getColumn(); }
/**
* Adds a string to the current text buffer.
*/
private void addText(String s)
throws IOException, SAXException
{
int len = s.length();
for (int i = 0; i < len; i++)
addText(s.charAt(i));
}
/**
* Adds a character to the current text buffer.
*/
private void addText(char ch)
throws IOException, SAXException
{
if (_textLength >= _textCapacity) {
appendText();
}
if (_textLength > 0 && _textBuffer[_textLength - 1] == '\r') {
_textBuffer[_textLength - 1] = '\n';
if (ch == '\n')
return;
}
if (_isIgnorableWhitespace && ! XmlChar.isWhitespace(ch))
_isIgnorableWhitespace = false;
_textBuffer[_textLength++] = ch;
}
/**
* Flushes the text buffer to the SAX callback.
*/
private void appendText()
throws IOException, SAXException
{
if (_textLength > 0) {
if (_activeNode == DOC_NAME) {
if (_isJspText) {
_contentHandler.characters(_textBuffer, 0, _textLength);
}
else if (_isIgnorableWhitespace) {
}
else if (_strictXml)
throw error(L.l("expected top element at `{0}'",
new String(_textBuffer, 0, _textLength)));
else {
addChild(TEXT_NAME);
_contentHandler.characters(_textBuffer, 0, _textLength);
}
}
else if (_isJspText) {
_contentHandler.characters(_textBuffer, 0, _textLength);
}
else if (_isIgnorableWhitespace) {
if (_isHtml)
_contentHandler.characters(_textBuffer, 0, _textLength);
else
_contentHandler.ignorableWhitespace(_textBuffer, 0, _textLength);
}
else if (_strictXml && ! _isIgnorableWhitespace && _activeNode == DOC_NAME) {
}
else {
if (_isJspText) {
}
else if (_isIgnorableWhitespace)
addChild(WHITESPACE_NAME);
else
addChild(TEXT_NAME);
_contentHandler.characters(_textBuffer, 0, _textLength);
}
_textLength = 0;
_isIgnorableWhitespace = true;
}
}
private void addElement(String child, boolean isEmpty,
QAttributes attributes,
NamespaceMap oldNamespace)
throws IOException, SAXException
{
_text.clear();
_text.append(child);
addElement(_policy.getName(_text), isEmpty, attributes, oldNamespace);
}
/**
* Adds an element as a child of the current tree. Some
* DTDs, like HTML, will push additional nodes to make
* the tree work, e.g. the body tag.
*
* @param child the new child to be added.
* @param isEmpty true if the tag is already closed.
*/
private void addElement(QName child, boolean isEmpty,
QAttributes attributes, NamespaceMap oldNamespace)
throws IOException, SAXException
{
if (! _doResinInclude) {
}
else if (child.getName() == "include" &&
child.getNamespaceURI() == "http://caucho.com/ns/resin/core" ||
child.getName() == "resin:include") {
if (! isEmpty)
throw error(L.l("resin:include must be an empty tag"));
handleResinInclude();
return;
}
else if (child.getName() == "include-directory" &&
child.getNamespaceURI() == "http://caucho.com/ns/resin/core" ||
child.getName() == "resin:include-directory") {
if (! isEmpty)
throw error(L.l("resin:include-directory must be an empty tag"));
handleResinIncludeDirectory();
return;
}
if (_activeNode == DOC_NAME && _hasTopElement && _strictXml)
throw error(L.l("expected a single top-level element at `{0}'",
child.getName()));
_hasTopElement = true;
String childURI = child.getNamespaceURI();
String childLocal = child.getLocalName();
if (childURI == null) {
childURI = "";
if (_isNamespaceAware)
childLocal = child.getName();
else
childLocal = "";
}
while (true) {
int action = _policy.openAction(this, _activeNode, child);
switch (action) {
case Policy.IGNORE:
return;
case Policy.PUSH:
//if (dbg.canWrite())
// dbg.println("<" + child.getNodeName() + ">");
if (_contentHandler instanceof DOMBuilder)
((DOMBuilder) _contentHandler).startElement(child, attributes);
else {
_contentHandler.startElement(childURI,
childLocal,
child.getName(),
attributes);
}
if (isEmpty) {
_contentHandler.endElement(childURI,
childLocal,
child.getName());
popNamespaces(oldNamespace);
}
else {
if (_elementTop == _elementNames.length) {
int len = _elementNames.length;
QName []names = new QName[2 * len];
NamespaceMap []newNamespaces = new NamespaceMap[2 * len];
int []lines = new int[2 * len];
System.arraycopy(_elementNames, 0, names, 0, len);
System.arraycopy(_elementLines, 0, lines, 0, len);
System.arraycopy(_namespaces, 0, newNamespaces, 0, len);
_elementNames = names;
_elementLines = lines;
_namespaces = newNamespaces;
}
_namespaces[_elementTop] = oldNamespace;
_elementLines[_elementTop] = getLine();
_elementNames[_elementTop] = _activeNode;
_elementTop++;
_activeNode = child;
_isTagStart = true;
}
return;
case Policy.PUSH_EMPTY:
//if (dbg.canWrite())
// dbg.println("<" + child.getNodeName() + "/>");
if (_contentHandler instanceof DOMBuilder)
((DOMBuilder) _contentHandler).startElement(child, attributes);
else {
_contentHandler.startElement(childURI,
childLocal,
child.getName(),
attributes);
}
_contentHandler.endElement(childURI,
childLocal,
child.getName());
popNamespaces(oldNamespace);
return;
case Policy.PUSH_OPT:
addElement(_policy.getOpt(), false, _nullAttributes, oldNamespace);
break;
case Policy.PUSH_VERBATIM:
if (_contentHandler instanceof DOMBuilder)
((DOMBuilder) _contentHandler).startElement(child, attributes);
else
_contentHandler.startElement(childURI,
childLocal,
child.getName(),
attributes);
scanVerbatim(child.getName());
appendText();
_contentHandler.endElement(childURI,
childLocal,
child.getName());
return;
case Policy.POP:
//if (dbg.canWrite())
// dbg.println("</" + activeNode.getNodeName() + ">");
popNode();
if (_activeNode == null)
return;
break;
default:
throw error(L.l("can't add `{0}' to `{1}'",
child.getName(), _activeNode.getName()));
}
}
}
/**
* Adds a child node to the current node.
*/
private void addChild(QName child)
throws IOException, SAXException
{
while (_activeNode != null) {
int action = _policy.openAction(this, _activeNode, child);
switch (action) {
case Policy.IGNORE:
return;
case Policy.PUSH:
_isTagStart = true;
case Policy.PUSH_EMPTY:
//if (dbg.canWrite())
// dbg.println("<" + child.getNodeName() + ">");
/*
if (child.getNodeType() == child.TEXT_NODE) {
String value = child.getNodeValue();
contentHandler.characters(value.toCharArray(), 0, value.length());
}
*/
return;
case Policy.PUSH_OPT:
addElement(_policy.getOpt(), false, _nullAttributes, _namespaceMap);
break;
case Policy.PUSH_VERBATIM:
scanVerbatim(child.getName());
return;
case Policy.POP:
// if (dbg.canWrite())
// dbg.println("</" + activeNode.getNodeName() + ">");
popNode();
break;
default:
throw error(L.l("cannot add `{0}' to `{1}'",
child.getName(), _activeNode.getName()));
}
}
}
private void scanVerbatim(String name)
throws IOException, SAXException
{
int ch = _reader.read();
while (ch >= 0) {
if (ch != '<') {
addText((char) ch);
ch = _reader.read();
}
else if ((ch = _reader.read()) != '/')
addText('<');
else {
ch = _reader.parseName(_eltName, _reader.read());
if (! _eltName.matchesIgnoreCase(name)) {
addText("</");
addText(_eltName.toString());
}
else if (ch != '>') {
addText("</");
addText(_eltName.toString());
}
else {
return;
}
}
}
throw error(L.l("expected </{0}> at {1}", name,
badChar(ch)));
}
private int skipWhitespace(int ch)
throws IOException, SAXException
{
while (ch <= 0x20 && (ch == 0x20 || ch == 0x9 || ch == 0xa || ch == 0xd)) {
ch = read();
}
return ch;
}
public void setReader(XmlReader reader)
{
_reader = reader;
}
/**
* Adds text to the macro, escaping attribute values.
*/
private void setMacroAttr(String text)
throws IOException, SAXException
{
if (_reader != _macro) {
_macro.init(this, _reader);
_reader = _macro;
}
int j = _macroIndex;
for (int i = 0; i < text.length(); i++) {
int ch = text.charAt(i);
if (ch == '\'')
_macro.add("'");
else if (ch == '"')
_macro.add(""");
else
_macro.add((char) ch);
}
}
private void pushInclude(String systemId)
throws IOException, SAXException
{
pushInclude(null, systemId);
}
/**
* Pushes the named file as a lexical include.
*
* @param systemId the name of the file to include.
*/
private void pushInclude(String publicId, String systemId)
throws IOException, SAXException
{
InputStream stream = openStream(systemId, publicId);
if (stream == null)
throw new FileNotFoundException(systemId);
_is = Vfs.openRead(stream);
Path oldSearchPath = _searchPath;
Path path = _is.getPath();
if (path != null) {
_owner.addDepend(path);
if (_searchPath != null) {
_searchPath = path.getParent();
_reader.setSearchPath(oldSearchPath);
}
}
_filename = systemId;
/*
XmlReader nextReader;
if (_reader instanceof Utf8Reader)
nextReader = new Utf8Reader(this, _is);
else {
_is.setEncoding(_reader.getReadStream().getEncoding());
nextReader = new XmlReader(this, _is);
}
_reader = nextReader;
*/
XmlReader oldReader = _reader;
_reader = null;
_line = 1;
int ch = parseXMLDeclaration(oldReader);
XmlReader reader = _reader;
if (reader instanceof MacroReader)
reader = reader.getNext();
reader.setSystemId(systemId);
reader.setFilename(systemId);
reader.setPublicId(publicId);
reader.setNext(oldReader);
unread(ch);
}
private void popInclude()
throws IOException, SAXException
{
XmlReader oldReader = _reader;
_reader = _reader.getNext();
oldReader.setNext(null);
_filename = _reader.getFilename();
_line = _reader.getLine();
_is = _reader.getReadStream();
if (_reader.getSearchPath() != null)
_searchPath = _reader.getSearchPath();
}
private void setMacro(String text)
throws IOException, SAXException
{
if (_reader == _macro) {
}
else if (_macro.getNext() == null) {
_macro.init(this, _reader);
_reader = _macro;
}
else {
_macro = new MacroReader();
_macro.init(this, _reader);
_reader = _macro;
}
_macro.add(text);
}
private int read()
throws IOException, SAXException
{
int ch = _reader.read();
while (ch < 0 && _reader.getNext() != null) {
if (_stopOnIncludeEnd)
return -1;
popInclude();
ch = _reader.read();
}
return ch;
}
public void unread(int ch)
{
if (ch < 0) {
return;
}
else if (_reader == _macro) {
}
else if (_macro.getNext() == null) {
_macro.init(this, _reader);
_reader = _macro;
}
else {
_macro = new MacroReader();
_macro.init(this, _reader);
_reader = _macro;
}
_macro.prepend((char) ch);
}
/**
* Returns an error including the current line.
*
* @param text the error message text.
*/
XmlParseException error(String text)
{
StringBuilder lines = new StringBuilder();
try {
Path path = Vfs.lookup(_systemId);
if (path.canRead()) {
ReadStream is = path.openRead();
lines.append("\n");
try {
for (int i = 1; i < _line + 3; i++) {
String line = is.readLine();
if (line == null)
break;
if (_line - 3 < i && i < _line + 3) {
lines.append(i).append(": ").append(line).append("\n");
}
}
} finally {
is.close();
}
}
} catch (IOException e) {
}
text = text + lines;
if (_errorHandler != null) {
SAXParseException e = new SAXParseException(text, _locator);
try {
_errorHandler.fatalError(e);
} catch (SAXException e1) {
}
}
return new XmlParseException(_filename, _line, text);
}
private void generateDtdValidator(QDocumentType dtd)
throws SAXException
{
DtdRelaxGenerator gen = new DtdRelaxGenerator(dtd);
ContentHandler handler = gen.generate();
if (handler != null) {
handler.setDocumentLocator(_locator);
handler.startDocument();
_contentHandler = new TeeContentHandler(handler, _contentHandler);
}
}
public void free()
{
_filename = null;
}
/**
* Returns a user-readable string for an error character.
*/
static String badChar(int ch)
{
if (ch < 0 || ch == 0xffff)
return L.l("end of file");
else if (ch == '\n' || ch == '\r')
return L.l("end of line");
else if (ch >= 0x20 && ch <= 0x7f)
return "`" + (char) ch + "'";
else
return "`" + (char) ch + "' (\\u" + hex(ch) + ")";
}
private void printDebugNode(WriteStream s, Node node, int depth)
throws IOException
{
if (node == null)
return;
for (int i = 0; i < depth; i++)
s.print(' ');
if (node.getFirstChild() != null) {
s.println("<" + node.getNodeName() + ">");
for (Node child = node.getFirstChild();
child != null;
child = child.getNextSibling()) {
printDebugNode(s, child, depth + 2);
}
for (int i = 0; i < depth; i++)
s.print(' ');
s.println("</" + node.getNodeName() + ">");
}
else
s.println("<" + node.getNodeName() + "/>");
}
public static class LocatorImpl implements ExtendedLocator {
XmlParser _parser;
LocatorImpl(XmlParser parser)
{
_parser = parser;
}
public String getSystemId()
{
if (_parser._reader != null && _parser._reader.getSystemId() != null)
return _parser._reader.getSystemId();
else if (_parser.getSystemId() != null)
return _parser.getSystemId();
else if (_parser._reader != null && _parser._reader.getFilename() != null)
return _parser._reader.getFilename();
else if (_parser.getFilename() != null)
return _parser.getFilename();
else
return null;
}
public String getFilename()
{
if (_parser._reader != null && _parser._reader.getFilename() != null)
return _parser._reader.getFilename();
else if (_parser.getFilename() != null)
return _parser.getFilename();
else if (_parser._reader != null && _parser._reader.getSystemId() != null)
return _parser._reader.getSystemId();
else if (_parser.getSystemId() != null)
return _parser.getSystemId();
else
return null;
}
public String getPublicId()
{
if (_parser._reader != null)
return _parser._reader.getPublicId();
else
return _parser.getPublicId();
}
public int getLineNumber()
{
if (_parser._reader != null)
return _parser._reader.getLine();
else
return _parser.getLineNumber();
}
public int getColumnNumber()
{
return _parser.getColumnNumber();
}
}
}