import java.util.ArrayList;
import java.util.Iterator;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.ext.LexicalHandler;
import javax.xml.XMLConstants;
import javax.xml.namespace.NamespaceContext;
import javax.xml.namespace.QName;
import org.codehaus.stax2.XMLStreamLocation2;
import org.codehaus.stax2.typed.Base64Variant;
import org.codehaus.stax2.typed.TypedArrayDecoder;
import org.codehaus.stax2.typed.TypedValueDecoder;
import org.codehaus.stax2.typed.TypedXMLStreamException;
import org.codehaus.stax2.ri.typed.CharArrayBase64Decoder;
import com.fasterxml.aalto.WFCException;
import com.fasterxml.aalto.impl.*;
import com.fasterxml.aalto.util.*;
* This is the abstract base class for all scanner implementations,
* defining operations the actual parser requires from the low-level
* scanners.
* Scanners are encoding and input type (byte, char / stream, block)
* specific, so there are many implementations.
public abstract class XmlScanner
implements XmlConsts, XMLStreamConstants, NamespaceContext
// // // Constants:
* String that identifies CDATA section (after "<![" prefix)
final protected String CDATA_STR = "CDATA[";
* This token type signifies end-of-input, in cases where it can be
* returned. In other cases, an exception may be thrown.
public final static int TOKEN_EOI = -1;
* This constant defines the highest Unicode character allowed
* in XML content.
protected final static int MAX_UNICODE_CHAR = 0x10FFFF;
protected final static int INT_NULL = 0;
protected final static int INT_CR = (int) '\r';
protected final static int INT_LF = (int) '\n';
protected final static int INT_TAB = (int) '\t';
protected final static int INT_SPACE = 0x0020;
protected final static int INT_HYPHEN = (int) '-';
protected final static int INT_QMARK = (int) '?';
protected final static int INT_AMP = (int) '&';
protected final static int INT_LT = (int) '<';
protected final static int INT_GT = (int) '>';
protected final static int INT_QUOTE = (int) '"';
protected final static int INT_APOS = (int) '\'';
protected final static int INT_EXCL = (int) '!';
protected final static int INT_COLON = (int) ':';
protected final static int INT_LBRACKET = (int) '[';
protected final static int INT_RBRACKET = (int) ']';
protected final static int INT_SLASH = (int) '/';
protected final static int INT_EQ = (int) '=';
protected final static int INT_A = (int) 'A';
protected final static int INT_F = (int) 'F';
protected final static int INT_a = (int) 'a';
protected final static int INT_f = (int) 'f';
protected final static int INT_z = (int) 'z';
protected final static int INT_0 = (int) '0';
protected final static int INT_9 = (int) '9';
// // // Config for bound PName cache:
* Let's activate cache quite soon, no need to wait for hundreds
* of misses; just try to avoid cache construction if all we get
* is soap envelope element or such.
private final static int BIND_MISSES_TO_ACTIVATE_CACHE = 10;
* Size of the bind cache can be reasonably small, and should
* still get high enough hit rate
private final static int BIND_CACHE_SIZE = 0x40;
private final static int BIND_CACHE_MASK = 0x3F;
/* Configuration
protected final ReaderConfig _config;
* Whether validity checks (wrt. name and text characters)
* and normalization (linefeeds) is to be
* done using xml 1.1 rules, or basic xml 1.0 rules. Default
* is 1.0.
protected final boolean _xml11;
protected final boolean _cfgCoalescing;
/* Note: non-final since it may need to be disabled after
* construction.
protected boolean _cfgLazyParsing;
/* Tokenization state
protected int _currToken = START_DOCUMENT;
protected boolean _tokenIncomplete = false;
* Number of START_ELEMENT events returned for which no END_ELEMENT
* has been returned; including current event.
protected int _depth = 0;
* Textual content of the current event
protected final TextBuilder _textBuilder;
* Flag set to indicate that an entity is pending
protected boolean _entityPending = false;
/* Name/String handling
* Similarly, need a char buffer for actual String construction
* (in future, could perhaps use StringBuilder?). It is used
* for holding things like names (element, attribute), and
* attribute values.
protected char[] _nameBuffer = null;
* Current name associated with the token, if any. Name of the
* current element, target of processing instruction, or name
* of an unexpanded entity.
protected PName _tokenName = null;
/* Element information
* Flag that is used if the current state is <code>START_ELEMENT</code>
* or <code>END_ELEMENT</code>, to indicate if the underlying physical
* tag is a so-called empty tag (one ending with "/>")
protected boolean _isEmptyTag = false;
* Information about the current element on the stack
protected ElementScope _currElem;
* Public id of the current event (DTD), if any.
protected String _publicId;
* System id of the current event (DTD), if any.
protected String _systemId;
/* Namespace binding
* Pointer to the last namespace declaration encountered. Because of backwards
* linking, it also serves as the head of the linked list of all active
* namespace declarations starting from the most recent one.
protected NsDeclaration _lastNsDecl = null;
* This is a temporary state variable, valid during START_ELEMENT
* event. For those events, contains number of namespace declarations
* available. For END_ELEMENT, this count is computed on the fly.
protected int _currNsCount = 0;
* Default namespace binding is a per-document singleton, like
* explicit bindings, and used for elements (never for attributes).
protected NsBinding _defaultNs = NsBinding.createDefaultNs();
* Array containing all prefix bindings needed within the current
* document, so far (if any). These bindings are not in a particular
* order, and they specifically do NOT represent actual namespace
* declarations parsed from xml content.
protected NsBinding[] _nsBindings;
protected int _nsBindingCount = 0;
* Although unbound pname instances can be easily and safely reused,
* bound ones are per-document. However, it makes sense to try to
* reuse them too; at least using a minimal static cache, activate
* only after certain number of cache misses (to avoid overhead for
* tiny documents, or documents with few or no namespace prefixes).
protected PName[] _nsBindingCache = null;
protected int _nsBindMisses = 0;
/* Support for non-transient NamespaceContext
* Last returned {@link NamespaceContext}, created for a call
* to {@link #getNonTransientNamespaceContext}, iff this would
* still be a valid context.
protected FixedNsContext _lastNsContext = FixedNsContext.EMPTY_CONTEXT;
/* Attribute info
protected final AttributeCollector _attrCollector;
protected int _attrCount = 0;
/* Minimal location info for all impls
* Number of bytes that were read and processed before the contents
* of the current buffer; used for calculating absolute offsets.
protected long _pastBytesOrChars;
* The row on which the character to read next is on. Note that
* it is 0-based, so API will generally add one to it before
* returning the value
protected int _currRow;
* Offset used to calculate the column value given current input
* buffer pointer. May be negative, if the first character of the
* row was contained within an earlier buffer.
protected int _rowStartOffset;
* Offset (in chars or bytes) at start of current token
protected long _startRawOffset;
* Current row at start of current (last returned) token
protected long _startRow = -1L;
* Current column at start of current (last returned) token
protected long _startColumn = -1L;
/* Life-cycle
protected XmlScanner(ReaderConfig cfg)
_config = cfg;
_cfgCoalescing = cfg.willCoalesceText();
_cfgLazyParsing = cfg.willParseLazily();
_xml11 = cfg.isXml11();
_textBuilder = TextBuilder.createRecyclableBuffer(_config);
_attrCollector = new AttributeCollector(cfg);
_nameBuffer = cfg.allocSmallCBuffer(ReaderConfig.DEFAULT_SMALL_BUFFER_LEN);
_currRow = 0;
* Method called at point when the parsing process has ended (either
* by encountering end of the input, or via explicit close), and
* buffers can and should be released.
* @param forceCloseSource True if the underlying input source is
* to be closed, independent of whether auto-close has been set
* to true via configuration (or if the scanner manages the input
* source)
public final void close(boolean forceCloseSource)
throws XMLStreamException
if (forceCloseSource || _config.willAutoCloseInput()) {
try {
} catch (IOException ioe) {
throw new IoStreamException(ioe);
protected void _releaseBuffers()
if (_nameBuffer != null) {
char[] buf = _nameBuffer;
_nameBuffer = null;
protected abstract void _closeSource() throws IOException;
/* Package access methods, needed by SAX impl
public ReaderConfig getConfig() { return _config; }
public AttributeCollector getAttrCollector() { return _attrCollector; }
/* Public scanner interface, iterating
// // // First, main iteration methods
public abstract int nextFromProlog(boolean isProlog) throws XMLStreamException;
public abstract int nextFromTree() throws XMLStreamException;
* This token is called to ensure that the current token/event has been
* completely parsed, such that we have all the data needed to return
* it (textual content, PI data, comment text etc)
protected final void finishToken() throws XMLStreamException
_tokenIncomplete = false;
switch (_currToken) {
case SPACE:
case DTD:
finishDTD(true); // true -> get text
case CDATA:
throw new Error("Internal error, unexpected incomplete token type "+ErrorConsts.tokenTypeDesc(_currToken));
* This method is called to essentially skip remaining of the
* current token (data of PI etc)
* @return True If by skipping we also figured out following event
* type (and assigned its type to _currToken); false if that remains
* to be done
protected final boolean skipToken()
throws XMLStreamException
_tokenIncomplete = false;
switch (_currToken) {
if (skipCharacters()) { // encountered an entity
// _tokenName already set, just need to set curr token
return true;
if (_cfgCoalescing) {
if (skipCoalescedText()) { // encountered an entity
return true;
case SPACE:
case CDATA:
if (_cfgCoalescing) {
if (_entityPending) { // encountered an entity
return true;
case DTD:
finishDTD(false); // false -> skip subset text
throw new Error("Internal error, unexpected incomplete token type "+ErrorConsts.tokenTypeDesc(_currToken));
return false;
/* Public scanner interface, location access
* @return Current input location
public abstract XMLStreamLocation2 getCurrentLocation();
public final XMLStreamLocation2 getStartLocation()
// !!! TODO: deal with impedance wrt int/long (flaw in Stax API)
int row = (int) _startRow;
int col = (int) _startColumn;
return LocationImpl.fromZeroBased(_config.getPublicId(), _config.getSystemId(),
_startRawOffset, row, col);
public abstract long getStartingByteOffset();
public abstract long getStartingCharOffset();
public abstract long getEndingByteOffset() throws XMLStreamException;
public abstract long getEndingCharOffset() throws XMLStreamException;
public XMLStreamLocation2 getEndLocation() throws XMLStreamException
// Have to complete the token to know the ending location...
if (_tokenIncomplete) {
return getCurrentLocation();
public final int getCurrentLineNr() {
return _currRow+1;
public abstract int getCurrentColumnNr();
public final String getInputSystemId() {
return _config.getSystemId();
public final String getInputPublicId() {
return _config.getPublicId();
/* Public scanner interface, other methods
public final boolean hasEmptyStack() {
return (_depth == 0);
public final int getDepth() { return _depth; }
public final boolean isEmptyTag() { return _isEmptyTag; }
/* Data accessors, names:
public final PName getName() {
return _tokenName;
public final QName getQName() {
return _tokenName.constructQName(_defaultNs);
public final String getDTDPublicId() {
return _publicId;
public final String getDTDSystemId() {
return _systemId;
/* Data accessors, (element) text:
public final String getText() throws XMLStreamException
if (_tokenIncomplete) {
return _textBuilder.contentsAsString();
public final int getTextLength()
throws XMLStreamException
if (_tokenIncomplete) {
return _textBuilder.size();
public final char[] getTextCharacters()
throws XMLStreamException
if (_tokenIncomplete) {
return _textBuilder.getTextBuffer();
public final int getTextCharacters(int srcStart, char[] target, int targetStart, int len)
throws XMLStreamException
if (_tokenIncomplete) {
return _textBuilder.contentsToArray(srcStart, target, targetStart, len);
public final int getText(Writer w, boolean preserveContents)
throws XMLStreamException
if (_tokenIncomplete) {
/* !!! Preserve or not, we'll hold the contents in memory.
* Could be improved if necessary.
try {
return _textBuilder.rawContentsTo(w);
} catch (IOException ioe) {
throw new IoStreamException(ioe);
public final boolean isTextWhitespace()
throws XMLStreamException
if (_tokenIncomplete) {
return _textBuilder.isAllWhitespace();
* Method called by the stream reader to decode space-separated tokens
* that are part of the current text event, using given decoder.
* @param reset If true, need to tell text buffer to reset its decoding
* state; if false, shouldn't
public final int decodeElements(TypedArrayDecoder tad, boolean reset)
throws XMLStreamException
if (_tokenIncomplete) {
try {
return _textBuilder.decodeElements(tad, reset);
} catch (TypedXMLStreamException tex) {
// Need to add location?
Location loc = getCurrentLocation();
String lexical = tex.getLexical();
IllegalArgumentException iae = (IllegalArgumentException)tex.getCause();
throw new TypedXMLStreamException(lexical, tex.getMessage(), loc, iae);
* Method called by the stream reader to reset given base64 decoder
* with data from the current text event.
public final void resetForDecoding(Base64Variant v, CharArrayBase64Decoder dec, boolean firstChunk)
throws XMLStreamException
if (_tokenIncomplete) {
_textBuilder.resetForBinaryDecode(v, dec, firstChunk);
/* Data accessors, firing SAX events
public void fireSaxStartElement(ContentHandler h, Attributes attrs)
throws SAXException
if (h != null) {
// First; any ns declarations?
NsDeclaration nsDecl = _lastNsDecl;
/* 17-Sep-2006, tatus: There is disparity between START/END_ELEMENT;
* with START_ELEMENT, _depth is one higher than that of ns
* declarations; with END_ELEMENT, the same
int level = _depth-1;
while (nsDecl != null && nsDecl.getLevel() == level) {
String prefix = nsDecl.getPrefix();
String uri = nsDecl.getCurrNsURI();
h.startPrefixMapping((prefix == null) ? "" : prefix, uri);
nsDecl = nsDecl.getPrev();
// Then start-elem event itself:
PName n = getName();
String uri = n.getNsUri();
// Sax requires "" (not null) for ns uris...
h.startElement((uri == null) ? "" : uri,
n.getLocalName(), n.getPrefixedName(),
public void fireSaxEndElement(ContentHandler h)
throws SAXException
if (h != null) {
/* Order of events is reversed (wrt. start-element): first
* the end tag event, then unbound prefixes
// End element:
PName n = getName();
String uri = n.getNsUri();
// Sax requires "" (not null) for ns uris...
h.endElement((uri == null) ? "" : uri, n.getLocalName(), n.getPrefixedName());
// Then, any expiring ns declarations?
NsDeclaration nsDecl = _lastNsDecl;
/* 17-Sep-2006, tatus: There is disparity between START/END_ELEMENT;
* with START_ELEMENT, _depth is one higher than that of ns
* declarations; with END_ELEMENT, the same
int level = _depth;
while (nsDecl != null && nsDecl.getLevel() == level) {
String prefix = nsDecl.getPrefix();
h.endPrefixMapping((prefix == null) ? "" : prefix);
nsDecl = nsDecl.getPrev();
public void fireSaxCharacterEvents(ContentHandler h)
throws XMLStreamException, SAXException
if (h != null) {
if (_tokenIncomplete) {
public void fireSaxSpaceEvents(ContentHandler h)
throws XMLStreamException, SAXException
if (h != null) {
if (_tokenIncomplete) {
public void fireSaxCommentEvent(LexicalHandler h)
throws XMLStreamException, SAXException
if (h != null) {
if (_tokenIncomplete) {
public void fireSaxPIEvent(ContentHandler h)
throws XMLStreamException, SAXException
if (h != null) {
if (_tokenIncomplete) {
h.processingInstruction(_tokenName.getLocalName(), getText());
/* Data accessors, attributes:
public final int getAttrCount() {
return _attrCount;
public final String getAttrLocalName(int index)
// Note: caller checks indices:
return _attrCollector.getName(index).getLocalName();
public final QName getAttrQName(int index)
// Note: caller checks indices:
return _attrCollector.getQName(index);
public final String getAttrPrefixedName(int index)
// Note: caller checks indices:
return _attrCollector.getName(index).getPrefixedName();
public final String getAttrNsURI(int index)
// Note: caller checks indices:
return _attrCollector.getName(index).getNsUri();
public final String getAttrPrefix(int index)
// Note: caller checks indices:
return _attrCollector.getName(index).getPrefix();
public final String getAttrValue(int index)
// Note: caller checks indices
return _attrCollector.getValue(index);
public final String getAttrValue(String nsURI, String localName)
/* Collector may not be reset if there are no attributes,
* need to check if any could be found first:
if (_attrCount < 1) {
return null;
return _attrCollector.getValue(nsURI, localName);
public final void decodeAttrValue(int index, TypedValueDecoder tvd)
throws XMLStreamException
_attrCollector.decodeValue(index, tvd);
* Method called to decode the attribute value that consists of
* zero or more space-separated tokens.
* Decoding is done using the decoder provided.
* @return Number of tokens decoded
public final int decodeAttrValues(int index, TypedArrayDecoder tad)
throws XMLStreamException
return _attrCollector.decodeValues(index, tad, this);
public final byte[] decodeAttrBinaryValue(int index, Base64Variant v, CharArrayBase64Decoder dec)
throws XMLStreamException
return _attrCollector.decodeBinaryValue(index, v, dec, this);
public final int findAttrIndex(String nsURI, String localName)
/* Collector may not be reset if there are no attributes,
* need to check if any could be found first:
if (_attrCount < 1) {
return -1;
return _attrCollector.findIndex(nsURI, localName);
public final String getAttrType(int index)
// Note: caller checks indices:
// !!! TBI
return "CDATA";
public final boolean isAttrSpecified(int index)
// !!! TBI
// (for now works ok as we don't handle DTD info, no attr value defaults)
return true;
/* Data accessors, namespace declarations:
public final int getNsCount()
if (_currToken == START_ELEMENT) {
return _currNsCount;
return (_lastNsDecl == null) ? 0 : _lastNsDecl.countDeclsOnLevel(_depth);
public final String getNamespacePrefix(int index)
return findCurrNsDecl(index).getBinding().mPrefix;
public final String getNamespaceURI(int index)
return findCurrNsDecl(index).getBinding().mURI;
private NsDeclaration findCurrNsDecl(int index)
NsDeclaration nsDecl = _lastNsDecl;
/* 17-Sep-2006, tatu: There is disparity between START/END_ELEMENT;
* with START_ELEMENT, _depth is one higher than that of ns
* declarations; with END_ELEMENT, the same
int level = _depth;
int count;
// 20-Jan-2011, tatu: Hmmh... since declarations are in reverse order should we reorder?
if (_currToken == START_ELEMENT) {
count = _currNsCount - 1 - index;
} else {
count = index;
while (nsDecl != null && nsDecl.getLevel() == level) {
if (count == 0) {
return nsDecl;
nsDecl = nsDecl.getPrev();
return null; // never gets here
// Part of NamespaceContext impl below
//public final String getNsUri(String prefix)
public final String getNamespaceURI()
String uri = _tokenName.getNsUri();
// Null means it uses the default ns:
return (uri == null) ? _defaultNs.mURI : uri;
public final NamespaceContext getNonTransientNamespaceContext()
_lastNsContext = _lastNsContext.reuseOrCreate(_lastNsDecl);
return _lastNsContext;
/* NamespaceContext implementation
public String getNamespaceURI(String prefix)
if (prefix == null) {
throw new IllegalArgumentException(ErrorConsts.ERR_NULL_ARG);
if (prefix.length() == 0) { // default namespace?
// Need to check if it's null, too, to convert
String uri = _defaultNs.mURI;
return (uri == null) ? "" : uri;
// xml, xmlns?
if (prefix.equals(XMLConstants.XML_NS_PREFIX)) {
return XMLConstants.XML_NS_URI;
if (prefix.equals(XMLConstants.XMLNS_ATTRIBUTE)) {
// Nope, a specific other prefix
NsDeclaration nsDecl = _lastNsDecl;
while (nsDecl != null) {
if (nsDecl.hasPrefix(prefix)) {
return nsDecl.getCurrNsURI();
nsDecl = nsDecl.getPrev();
return null;
public String getPrefix(String nsURI)
/* As per JDK 1.5 JavaDocs, null is illegal; but no mention
* about empty String (""). But that should
if (nsURI == null) {
throw new IllegalArgumentException(ErrorConsts.ERR_NULL_ARG);
if (nsURI.equals(XMLConstants.XML_NS_URI)) {
return XMLConstants.XML_NS_PREFIX;
if (nsURI.equals(XMLConstants.XMLNS_ATTRIBUTE_NS_URI)) {
return XMLConstants.XMLNS_ATTRIBUTE;
// First: does the default namespace bind to the URI?
if (nsURI.equals(_defaultNs.mURI)) {
return "";
/* Need to loop twice; first find a prefix, then ensure it's
* not masked by a later declaration
for (NsDeclaration nsDecl = _lastNsDecl; nsDecl != null;
nsDecl = nsDecl.getPrev()) {
if (nsDecl.hasNsURI(nsURI)) {
// Ok: but is prefix masked?
String prefix = nsDecl.getPrefix();
// Plus, default ns wouldn't do (since current one was already checked)
if (prefix != null) {
for (NsDeclaration decl2 = _lastNsDecl; decl2 != nsDecl;
decl2 = decl2.getPrev()) {
if (decl2.hasPrefix(prefix)) {
continue main_loop;
return prefix;
return null;
public Iterator<String> getPrefixes(String nsURI)
if (nsURI == null) {
throw new IllegalArgumentException(ErrorConsts.ERR_NULL_ARG);
if (nsURI.equals(XMLConstants.XML_NS_URI)) {
return new SingletonIterator(XMLConstants.XML_NS_PREFIX);
if (nsURI.equals(XMLConstants.XMLNS_ATTRIBUTE_NS_URI)) {
return new SingletonIterator(XMLConstants.XMLNS_ATTRIBUTE);
ArrayList<String> l = null;
// First, the default ns?
if (nsURI.equals(_defaultNs.mURI)) {
l = new ArrayList<String>();
for (NsDeclaration nsDecl = _lastNsDecl; nsDecl != null;
nsDecl = nsDecl.getPrev()) {
if (nsDecl.hasNsURI(nsURI)) {
// Ok: but is prefix masked?
String prefix = nsDecl.getPrefix();
// Plus, default ns wouldn't do (since current one was already checked)
if (prefix != null) {
for (NsDeclaration decl2 = _lastNsDecl; decl2 != nsDecl;
decl2 = decl2.getPrev()) {
if (decl2.hasPrefix(prefix)) {
continue main_loop;
if (l == null) {
l = new ArrayList<String>();
if (l == null) {
return EmptyIterator.getInstance();
if (l.size() == 1) {
return new SingletonIterator(l.get(0));
return l.iterator();
/* Abstract methods for sub-classes to implement
// // token-finish methods
protected abstract void finishCharacters()
throws XMLStreamException;
protected abstract void finishCData()
throws XMLStreamException;
protected abstract void finishComment()
throws XMLStreamException;
protected abstract void finishDTD(boolean copyContents)
throws XMLStreamException;
protected abstract void finishPI()
throws XMLStreamException;
protected abstract void finishSpace()
throws XMLStreamException;
// // token-skip methods
* @return True, if an unexpanded entity was encountered (and
* is now pending)
protected abstract boolean skipCharacters()
throws XMLStreamException;
protected abstract void skipCData()
throws XMLStreamException;
protected abstract void skipComment()
throws XMLStreamException;
protected abstract void skipPI()
throws XMLStreamException;
protected abstract void skipSpace()
throws XMLStreamException;
* Secondary skip method called after primary text segment
* has been skipped, and we are in coalescing mode.
* @return True, if an unexpanded entity was encountered (and
* is now pending)
protected abstract boolean skipCoalescedText()
throws XMLStreamException;
// // Raw input access:
protected abstract boolean loadMore()
throws XMLStreamException;
/* Basic namespace binding methods
* This method is called to find/create a fully qualified (bound)
* name (element / attribute), for a name with prefix. For non-prefixed
* names this method will not get called
protected final PName bindName(PName name, String prefix)
// First, do we have a cache, to perhaps find bound name from?
if (_nsBindingCache != null) {
PName cn = _nsBindingCache[name.unboundHashCode() & BIND_CACHE_MASK];
if (cn != null && cn.unboundEquals(name)) {
return cn;
// If no cache, or not found there, need to first find binding
for (int i = 0, len = _nsBindingCount; i < len; ++i) {
NsBinding b = _nsBindings[i];
if (b.mPrefix != prefix) { // prefixes are canonicalized
// Ok, match!
// Can we bubble prefix closer to the head?
if (i > 0) {
_nsBindings[i] = _nsBindings[i-1];
_nsBindings[i-1] = b;
// Plus, should we cache it?
PName bn = name.createBoundName(b);
if (_nsBindingCache == null) {
if (++_nsBindMisses < BIND_MISSES_TO_ACTIVATE_CACHE) {
return bn;
_nsBindingCache = new PName[BIND_CACHE_SIZE];
_nsBindingCache[bn.unboundHashCode() & BIND_CACHE_MASK] = bn;
return bn;
// If not even binding, need to create that first
// No match; perhaps "xml"? But is "xmlns" legal to use too?
if (prefix == "xml") {
return name.createBoundName(NsBinding.XML_BINDING);
/* Nope. Need to create a new binding. For such entries, let's
* not try caching, yet, but let's note it as a miss
NsBinding b = new NsBinding(prefix);
if (_nsBindingCount == 0) {
_nsBindings = new NsBinding[16];
} else if (_nsBindingCount >= _nsBindings.length) {
_nsBindings = (NsBinding[]) DataUtil.growAnyArrayBy(_nsBindings, _nsBindings.length);
_nsBindings[_nsBindingCount] = b;
return name.createBoundName(b);
* Method called when a namespace declaration needs to find the
* binding object (essentially a per-prefix-per-document canonical
* container object)
protected final NsBinding findOrCreateBinding(String prefix)
throws XMLStreamException
// !!! TODO: switch to hash at size N?
// TEST only (for ns-soap.xml):
//int MAX = (_nsBindingCount > 8) ? 8 : _nsBindingCount;
//for (int i = 0; i < MAX; ++i) {
for (int i = 0, len = _nsBindingCount; i < len; ++i) {
NsBinding b = _nsBindings[i];
if (b.mPrefix == prefix) { // prefixes are interned
if (i > 0) { // let's do bubble it up a notch... can speed things up
_nsBindings[i] = _nsBindings[i-1];
_nsBindings[i-1] = b;
return b;
if (prefix == "xml") {
return NsBinding.XML_BINDING;
if (prefix == "xmlns") {
return NsBinding.XMLNS_BINDING;
// Nope. Need to create a new binding
NsBinding b = new NsBinding(prefix);
if (_nsBindingCount == 0) {
_nsBindings = new NsBinding[16];
} else if (_nsBindingCount >= _nsBindings.length) {
_nsBindings = (NsBinding[]) DataUtil.growAnyArrayBy(_nsBindings, _nsBindings.length);
_nsBindings[_nsBindingCount] = b;
return b;
* Method called when we are ready to bind a declared namespace.
protected final void bindNs(PName name, String uri)
throws XMLStreamException
NsBinding ns;
String prefix = name.getPrefix();
if (prefix == null) { // default ns
ns = _defaultNs;
} else {
prefix = name.getLocalName();
ns = findOrCreateBinding(prefix);
if (ns.isImmutable()) { // xml, xmlns
checkImmutableBinding(prefix, uri);
/* 28-Oct-2006, tatus: Also need to ensure that neither
* xml nor xmlns-bound namespaces are bound to any
* other prefixes. Since we know that URIs are intern()ed,
* can just do identity comparison
if (!ns.isImmutable()) {
if (uri == XMLConstants.XML_NS_URI) {
reportIllegalNsDecl("xml", XMLConstants.XML_NS_URI);
} else if (uri == XMLConstants.XMLNS_ATTRIBUTE_NS_URI) {
reportIllegalNsDecl("xmlns", XMLConstants.XMLNS_ATTRIBUTE_NS_URI);
// Already declared in current scope?
if (_lastNsDecl != null && _lastNsDecl.alreadyDeclared(prefix, _depth)) {
_lastNsDecl = new NsDeclaration(ns, uri, _lastNsDecl, _depth);
* Method called when an immutable ns prefix (xml, xmlns) is
* encountered.
protected final void checkImmutableBinding(String prefix, String uri)
throws XMLStreamException
if (prefix != "xml" || !uri.equals(XMLConstants.XML_NS_URI)) {
/* Helper methods for sub-classes, input data
* Method that tries to load at least one more byte into buffer;
* and if that fails, throws an appropriate EOI exception.
protected final void loadMoreGuaranteed()
throws XMLStreamException
if (!loadMore()) {
reportInputProblem("Unexpected end-of-input when trying to parse "+ErrorConsts.tokenTypeDesc(_currToken));
protected final void loadMoreGuaranteed(int tt) throws XMLStreamException
if (!loadMore()) {
reportInputProblem("Unexpected end-of-input when trying to parse "+ErrorConsts.tokenTypeDesc(tt));
/* Helper methods for sub-classes, character validity checks
protected final void verifyXmlChar(int value) throws XMLStreamException
// Ok, and then need to check result is a valid XML content char:
if (value >= 0xD800) { // note: checked for overflow earlier
if (value < 0xE000) { // no surrogates via entity expansion
if (value == 0xFFFE || value == 0xFFFF) {
} else if (value < 32) {
// XML 1.1 allows most other chars; 1.0 does not:
if (value != INT_LF && value != INT_CR && value != INT_TAB) {
if (!_xml11 || value == 0) {
/* Helper methods for sub-classes, error reporting
protected void reportInputProblem(String msg)
throws XMLStreamException
/* 29-Mar-2008, tatus: Not sure if these are all Well-Formedness
* Constraint (WFC) violations? They should be... ?
throw new WFCException(msg, getCurrentLocation());
* Method called when a call to expand an entity within attribute
* value fails to expand it.
protected void reportUnexpandedEntityInAttr(PName name, boolean isNsDecl)
throws XMLStreamException
reportInputProblem("Unexpanded ENTITY_REFERENCE ("+_tokenName+") in "
+(isNsDecl ? "namespace declaration" : "attribute value"));
protected void reportPrologUnexpChar(boolean isProlog, int ch, String msg)
throws XMLStreamException
String fullMsg = isProlog ? ErrorConsts.SUFFIX_IN_PROLOG : ErrorConsts.SUFFIX_IN_EPILOG;
if (msg == null) {
if (ch == '&') {
throwUnexpectedChar(ch, fullMsg+"; no entities allowed");
} else {
fullMsg += msg;
throwUnexpectedChar(ch, fullMsg);
protected void reportPrologProblem(boolean isProlog, String msg)
throws XMLStreamException
String prefix = isProlog ? ErrorConsts.SUFFIX_IN_PROLOG : ErrorConsts.SUFFIX_IN_EPILOG;
reportInputProblem(prefix+": "+msg);
protected void reportTreeUnexpChar(int ch, String msg)
throws XMLStreamException
String fullMsg = ErrorConsts.SUFFIX_IN_TREE;
if (msg != null) {
fullMsg += msg;
throwUnexpectedChar(ch, fullMsg);
protected void reportInvalidNameChar(int ch, int index)
throws XMLStreamException
if (ch == INT_COLON) {
reportInputProblem("Invalid colon in name: at most one colon allowed in element/attribute names, and none in PI target or entity names");
if (index == 0) {
reportInputProblem("Invalid name start character (0x"
reportInputProblem("Invalid name character (0x"
protected void reportInvalidXmlChar(int ch)
throws XMLStreamException
if (ch == 0) {
reportInputProblem("Invalid null character");
if (ch < 32) {
reportInputProblem("Invalid white space character (0x"
reportInputProblem("Invalid xml content character (0x"
protected void reportEofInName(char[] cbuf, int clen)
throws XMLStreamException
reportInputProblem("Unexpected end-of-input in name (parsing "+ErrorConsts.tokenTypeDesc(_currToken)+")");
* Called when there's an unexpected char after PI target (non-ws,
* not part of '?>' end marker
protected void reportMissingPISpace(int ch)
throws XMLStreamException
throwUnexpectedChar(ch, ": expected either white space, or closing '?>'");
protected void reportDoubleHyphenInComments()
throws XMLStreamException
reportInputProblem("String '--' not allowed in comment (missing '>'?)");
protected void reportMultipleColonsInName()
throws XMLStreamException
reportInputProblem("Multiple colons not allowed in names");
protected void reportEntityOverflow()
throws XMLStreamException
reportInputProblem("Illegal character entity: value higher than max allowed (0x"+Integer.toHexString(MAX_UNICODE_CHAR)+")");
protected void reportInvalidNsIndex(int index)
/* 24-Jun-2006, tatus: Stax API doesn't specify what (if anything)
* should be thrown. Ref. Impl. throws IndexOutOfBounds, which
* makes sense; could also throw IllegalArgumentException.
throw new IndexOutOfBoundsException("Illegal namespace declaration index, "+index+", current START_ELEMENT/END_ELEMENT has "+getNsCount()+" declarations");
protected void reportUnboundPrefix(PName name, boolean isAttr)
throws XMLStreamException
reportInputProblem("Unbound namespace prefix '"+name.getPrefix()+"' (for "+(isAttr ? "attribute" : "element")+" name '"+name.getPrefixedName()+"')");
protected void reportDuplicateNsDecl(String prefix)
throws XMLStreamException
if (prefix == null) {
reportInputProblem("Duplicate namespace declaration for the default namespace");
} else {
reportInputProblem("Duplicate namespace declaration for prefix '"+prefix+"'");
protected void reportIllegalNsDecl(String prefix)
throws XMLStreamException
reportInputProblem("Illegal namespace declaration: can not re-bind prefix '"+prefix+"'");
protected void reportIllegalNsDecl(String prefix, String uri)
throws XMLStreamException
reportInputProblem("Illegal namespace declaration: can not bind URI '"+uri+"' to prefix other than '"+prefix+"'");
protected void reportUnexpectedEndTag(String expName)
throws XMLStreamException
reportInputProblem("Unexpected end tag: expected </"+expName+">");
// Thrown when ']]>' found in text content
protected void reportIllegalCDataEnd()
throws XMLStreamException
reportInputProblem("String ']]>' not allowed in textual content, except as the end marker of CDATA section");
protected void throwUnexpectedChar(int i, String msg)
throws XMLStreamException
// But first, let's check illegals
if (i < 32 && i != '\r' && i != '\n' && i != '\t') {
char c = (char) i;
String excMsg = "Unexpected character "+XmlChars.getCharDesc(c)+msg;
protected void throwNullChar()
throws XMLStreamException
reportInputProblem("Illegal character (NULL, unicode 0) encountered: not valid in any content");
protected char handleInvalidXmlChar(int i)
throws XMLStreamException
final IllegalCharHandler iHandler = _config.getIllegalCharHandler();
if (iHandler != null) {
return iHandler.convertIllegalChar(i);
char c = (char) i;
if (c == CHAR_NULL) {
String msg = "Illegal XML character ("+XmlChars.getCharDesc(c)+")";
if (_xml11) {
if (i < INT_SPACE) {
msg += " [note: in XML 1.1, it could be included via entity expansion]";
//will not reach this block
return (char) i;
protected void throwInvalidSpace(int i)
throws XMLStreamException
char c = (char) i;
if (c == CHAR_NULL) {
String msg = "Illegal character ("+XmlChars.getCharDesc(c)+")";
if (_xml11) {
if (i < INT_SPACE) {
msg += " [note: in XML 1.1, it could be included via entity expansion]";