package javolution.xml.internal.stream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.util.Map;
import javolution.io.UTF8StreamReader;
import javolution.lang.Realtime;
import javolution.text.CharArray;
import javolution.xml.sax.Attributes;
import javolution.xml.stream.Location;
import javolution.xml.stream.NamespaceContext;
import javolution.xml.stream.XMLInputFactory;
import javolution.xml.stream.XMLStreamConstants;
import javolution.xml.stream.XMLStreamException;
import javolution.xml.stream.XMLStreamReader;
* {@link XMLStreamReader} implementation.
* This implementation returns all contiguous character data in a single
* chunk (always coalescing). It is non-validating (DTD is returned
* unparsed). Although, users may define custom entities mapping using
* the {@link #setEntities} method (e.g. after parsing/resolving
* external entities).
public final class XMLStreamReaderImpl implements XMLStreamReader {
* Holds the textual representation for events.
static final String[] NAMES_OF_EVENTS = new String[] { "UNDEFINED",
* Holds the reader buffer capacity.
static final int READER_BUFFER_CAPACITY = 4096;
* Holds the prolog if any.
CharArray _prolog;
* Holds the current index in the character buffer.
private int _readIndex;
* Number of characters read from reader
private int _readCount;
* Holds the data buffer for CharSequence produced by this parser.
private char[] _data = new char[READER_BUFFER_CAPACITY * 2];
* Holds the current index of the data buffer (_data).
private int _index;
* Holds the current element nesting.
private int _depth;
* Holds qualified name (include prefix).
private CharArray _qName;
* Holds element prefix separator index.
private int _prefixSep;
* Holds attribute qualified name.
private CharArray _attrQName;
* Holds attribute prefix separator index.
private int _attrPrefixSep;
* Holds attribute value.
private CharArray _attrValue;
* Holds current event type
private int _eventType = START_DOCUMENT;
* Indicates if event type is START_TAG, and tag is empty, i.e. <sometag/>
private boolean _isEmpty;
* Indicates if characters are pending for potential coalescing.
boolean _charactersPending = false;
* Holds the start index for the current state within _data array.
private int _start;
* Holds the parser state.
private int _state = STATE_CHARACTERS;
* Holds the current text.
private CharArray _text;
* Holds the reader input source (<code>null</code> when unused).
private Reader _reader;
* Holds the character buffer used for reading.
private final char[] _readBuffer = new char[READER_BUFFER_CAPACITY];
* Holds the start offset in the character buffer (due to auto detection
* of encoding).
private int _startOffset; // Byte Order Mark count.
* Holds the location object.
private final LocationImpl _location = new LocationImpl();
* Holds the namespace stack.
private final NamespacesImpl _namespaces = new NamespacesImpl();
* Holds the current attributes.
private final AttributesImpl _attributes = new AttributesImpl(_namespaces);
* Holds working stack (by nesting level).
private CharArray[] _elemStack = new CharArray[16];
* Holds stream encoding if known.
private String _encoding;
* Holds the entities.
private final EntitiesImpl _entities = new EntitiesImpl();
* Holds the reader for input streams.
private final UTF8StreamReader _utf8StreamReader = new UTF8StreamReader();
* Holds the factory (if any)
private final XMLInputFactoryImpl _factory;
* Default constructor.
public XMLStreamReaderImpl() {
* Factory-based constructor.
XMLStreamReaderImpl(XMLInputFactoryImpl factory) {
_factory = factory;
* Sets the input stream source for this XML stream reader
* (encoding retrieved from XML prolog if any). This method
* attempts to detect the encoding automatically.
* @param in the input source with unknown encoding.
public void setInput(InputStream in) throws XMLStreamException {
setInput(in, detectEncoding(in));
CharArray prologEncoding = getCharacterEncodingScheme();
// Checks if necessary to change the reader.
if ((prologEncoding != null) && !prologEncoding.equals(_encoding)
&& !(isUTF8(prologEncoding) && isUTF8(_encoding))) {
// Changes reader (keep characters already read).
int startOffset = _readCount;
_startOffset = startOffset;
setInput(in, prologEncoding.toString());
private static boolean isUTF8(Object encoding) {
return encoding.equals("utf-8") || encoding.equals("UTF-8")
|| encoding.equals("ASCII") || encoding.equals("utf8")
|| encoding.equals("UTF8");
* Sets the input stream source and encoding for this XML stream reader.
* @param in the input source.
* @param encoding the associated encoding.
public void setInput(InputStream in, String encoding)
throws XMLStreamException {
_encoding = encoding;
if (isUTF8(encoding)) { // Use our fast UTF-8 Reader.
} else {
try {
setInput(new InputStreamReader(in, encoding));
} catch (UnsupportedEncodingException e) {
throw new XMLStreamException(e);
* Sets the reader input source for this XML stream reader.
* This method reads the prolog (if any).
* @param reader the input source reader.
* @see javolution.io.UTF8StreamReader
* @see javolution.io.UTF8ByteBufferReader
* @see javolution.io.CharSequenceReader
public void setInput(Reader reader) throws XMLStreamException {
if (_reader != null)
throw new IllegalStateException("Reader not closed or reset");
_reader = reader;
try { // Reads prolog (if there)
int readCount = reader.read(_readBuffer, _startOffset,
_readBuffer.length - _startOffset);
_readCount = (readCount >= 0) ? readCount + _startOffset
: _startOffset;
if ((_readCount >= 5) && (_readBuffer[0] == '<')
&& (_readBuffer[1] == '?') && (_readBuffer[2] == 'x')
&& (_readBuffer[3] == 'm') && (_readBuffer[4] == 'l')
&& (_readBuffer[5] == ' ')) { // Prolog detected.
next(); // Processing instruction.
_prolog = this.getPIData();
_index = _prolog.offset() + _prolog.length(); // Keep prolog.
_start = _index; // Default state.
_eventType = START_DOCUMENT; // Resets to START_DOCUMENT.
} catch (IOException e) {
throw new XMLStreamException(e);
* Returns the current depth of the element. Outside the root element,
* the depth is 0. The depth is incremented by 1 when a start tag is
* reached. The depth is decremented AFTER the end tag event was observed.
* [code]
* <!-- outside --> 0
* <root> 1
* sometext 1
* <foobar> 2
* </foobar> 2
* </root> 1
* <!-- outside --> 0 [/code]
* @return the nesting depth.
public int getDepth() {
return _depth;
* Returns the qualified name of the current event.
* @return the qualified name.
* @throws IllegalStateException if this not a START_ELEMENT or END_ELEMENT.
public CharArray getQName() {
if ((_eventType != XMLStreamConstants.START_ELEMENT)
&& (_eventType != XMLStreamConstants.END_ELEMENT))
throw new IllegalStateException(
"Not a start element or an end element");
return _qName;
* Returns the qualified name of the element at the specified level.
* This method can be used to retrieve the XPath of the current element.
* @return the qualified name of the element at the specified level.
* @throws IllegalArgumentException if <code>depth > getDepth()</code>
public CharArray getQName(int depth) {
if (depth > this.getDepth())
throw new IllegalArgumentException();
return _elemStack[depth];
* Returns the current attributes (SAX2-Like).
* @return returns the number of attributes.
* @throws IllegalStateException if not a START_ELEMENT.
public Attributes getAttributes() {
if (_eventType != XMLStreamConstants.START_ELEMENT)
throw new IllegalStateException("Not a start element");
return _attributes;
* Defines a custom entities to replacement text mapping for this reader.
* For example:[code]
* FastMap<String, String> HTML_ENTITIES = new FastMap<String, String>();
* HTML_ENTITIES.put("nbsp", " ");
* HTML_ENTITIES.put("copy", "©");
* HTML_ENTITIES.put("eacute", "é");
* ...
* XMLStreamReaderImpl reader = new XMLStreamReaderImpl();
* reader.setEntities(HTML_ENTITIES);
* [/code]
* The entities mapping may be changed dynamically (e.g.
* after reading the DTD and all external entities references are resolved).
* @param entities the entities to replacement texts mapping
* (both must be <code>CharSequence</code> instances).
public void setEntities(Map<String, String> entities) {
* Returns the textual representation of this reader current state.
* @return the textual representation of the current state.
public String toString() {
return "XMLStreamReader - State: " + NAMES_OF_EVENTS[_eventType]
+ ", Location: " + _location.toString();
// Implements XMLStreamReader Interface.
public int next() throws XMLStreamException {
// Clears previous state.
if (_eventType == START_ELEMENT) {
if (_isEmpty) { // Previous empty tag, generates END_TAG automatically.
_isEmpty = false;
return _eventType = END_ELEMENT;
} else if (_eventType == END_ELEMENT) {
CharArray startElem = _elemStack[_depth--];
_start = _index = startElem.offset();
while (_seqs[--_seqsIndex] != startElem) { // Recycles CharArray instances.
// Reader loop.
while (true) {
// Main character reading block.
if ((_readIndex >= _readCount) && isEndOfStream())
return _eventType; // END_DOCUMENT or CHARACTERS.
char c = _readBuffer[_readIndex++];
if (c <= '&')
c = (c == '&') ? replaceEntity()
: (c < ' ') ? handleEndOfLine(c) : c;
_data[_index++] = c;
// Main processing.
switch (_state) {
while (true) { // Read characters data all at once.
if (c == '<') {
int length = _index - _start - 1;
if (length > 0) {
if (_charactersPending) {
_text.setArray(_data, _text.offset(),
_text.length() + length); // Coalescing.
} else {
_text = newSeq(_start, length);
_charactersPending = true;
_start = _index - 1; // Keeps '<' as part of markup.
_state = STATE_MARKUP;
// Local character reading block.
if ((_readIndex >= _readCount) && isEndOfStream())
return _eventType;
c = _readBuffer[_readIndex++];
if (c <= '&')
c = (c == '&') ? replaceEntity()
: (c < ' ') ? handleEndOfLine(c) : c;
_data[_index++] = c;
while (true) { // Reads CDATA all at once.
if ((c == '>') && (_index - _start >= 3)
&& (_data[_index - 2] == ']')
&& (_data[_index - 3] == ']')) {
_index -= 3;
int length = _index - _start;
if (length > 0) { // Not empty.
if (_charactersPending) {
_text.setArray(_data, _text.offset(),
_text.length() + length); // Coalescing.
} else {
_text = newSeq(_start, length);
_charactersPending = true;
_start = _index;
// Local character reading block.
if (_readIndex >= _readCount)
c = _readBuffer[_readIndex++];
if (c < ' ')
c = handleEndOfLine(c);
_data[_index++] = c;
if (c == '>') {
_text = newSeq(_start, _index - _start);
_index = _start; // Do not keep DTD.
return _eventType = DTD;
} else if (c == '[') {
if (c == ']') {
_state = STATE_DTD;
case STATE_MARKUP: // Starts with '<'
if (_index - _start == 2) {
if (c == '/') {
_start = _index = _index - 2;
_prefixSep = -1;
if (_charactersPending) { // Flush characters event.
_charactersPending = false;
return _eventType = CHARACTERS;
} else if (c == '?') {
_start = _index = _index - 2;
_state = STATE_PI;
if (_charactersPending) { // Flush characters event.
_charactersPending = false;
return _eventType = CHARACTERS;
} else if (c != '!') { // Element tag (first letter).
_data[_start] = c;
_index = _start + 1;
_prefixSep = -1;
if (_charactersPending) { // Flush character event.
_charactersPending = false;
return _eventType = CHARACTERS;
} else if ((_index - _start == 4)
&& (_data[_start + 1] == '!')
&& (_data[_start + 2] == '-')
&& (_data[_start + 3] == '-')) {
_start = _index = _index - 4; // Removes <!--
if (_charactersPending) { // Flush character event.
_charactersPending = false;
return _eventType = CHARACTERS;
} else if ((_index - _start == 9)
&& (_data[_start + 1] == '!')
&& (_data[_start + 2] == '[')
&& (_data[_start + 3] == 'C')
&& (_data[_start + 4] == 'D')
&& (_data[_start + 5] == 'A')
&& (_data[_start + 6] == 'T')
&& (_data[_start + 7] == 'A')
&& (_data[_start + 8] == '[')) {
_start = _index = _index - 9; // Do not keep <![CDATA[
_state = STATE_CDATA;
} else if ((_index - _start == 9)
&& (_data[_start + 1] == '!')
&& (_data[_start + 2] == 'D')
&& (_data[_start + 3] == 'O')
&& (_data[_start + 4] == 'C')
&& (_data[_start + 5] == 'T')
&& (_data[_start + 6] == 'Y')
&& (_data[_start + 7] == 'P')
&& (_data[_start + 8] == 'E')) {
// Keeps <!DOCTYPE as part of DTD.
_state = STATE_DTD;
} else {
// Ignores, e.g. <!ELEMENT <!ENTITY...
while (true) { // Read comment all at once.
if ((c == '>') && (_index - _start >= 3)
&& (_data[_index - 2] == '-')
&& (_data[_index - 3] == '-')) {
_index -= 3; // Removes -->
_text = newSeq(_start, _index - _start);
_index = _start; // Do not keep comments.
return _eventType = COMMENT;
// Local character reading block.
if (_readIndex >= _readCount)
c = _readBuffer[_readIndex++];
if (c < ' ')
c = handleEndOfLine(c);
_data[_index++] = c;
case STATE_PI:
if ((c == '>') && (_index - _start >= 2)
&& (_data[_index - 2] == '?')) {
_index -= 2; // Removes ?>
_text = newSeq(_start, _index - _start);
_index = _start; // Do not keep processing instructions.
while (true) { // Read element name all at once.
if (c < '@') { // Else avoid multiple checks.
if (c == '>') {
_qName = newSeq(_start, --_index - _start);
_start = _index;
_isEmpty = false;
return _eventType = START_ELEMENT;
} else if (c == '/') {
_qName = newSeq(_start, --_index - _start);
_start = _index;
} else if (c == ':') {
_prefixSep = _index - 1;
} else if (c <= ' ') {
_qName = newSeq(_start, --_index - _start);
if (_readIndex >= _readCount)
c = _data[_index++] = _readBuffer[_readIndex++];
if (c == '>') {
_start = --_index;
_isEmpty = false;
return _eventType = START_ELEMENT;
} else if (c == '/') {
} else if (c > ' ') {
_start = _index - 1; // Includes current character.
_attrPrefixSep = -1;
while (true) { // Read attribute name all at once.
if (c < '@') { // Else avoid multiple checks.
if (c <= ' ') {
_attrQName = newSeq(_start, --_index - _start);
} else if (c == '=') {
_attrQName = newSeq(_start, --_index - _start);
} else if (c == ':') {
_attrPrefixSep = _index - 1;
if (_readIndex >= _readCount)
_data[_index++] = c = _readBuffer[_readIndex++];
if (c == '=') {
} else if (c > ' ') { throw new XMLStreamException(
"'=' expected", _location); }
if (c == '\'') {
_start = --_index;
} else if (c == '\"') {
_start = --_index;
} else if (c > ' ') { throw new XMLStreamException(
"Quotes expected", _location); }
while (true) { // Read attribute value all at once.
if (c == '\'') {
_attrValue = newSeq(_start, --_index - _start);
// Local character reading block.
if (_readIndex >= _readCount)
c = _readBuffer[_readIndex++];
if (c == '&')
c = replaceEntity();
_data[_index++] = c;
while (true) { // Read attribute value all at once.
if (c == '\"') {
_attrValue = newSeq(_start, --_index - _start);
// Local character reading block.
if (_readIndex >= _readCount)
c = _readBuffer[_readIndex++];
if (c == '&')
c = replaceEntity();
_data[_index++] = c;
if (c == '>') {
_start = --_index;
_isEmpty = true;
return _eventType = START_ELEMENT;
} else {
throw new XMLStreamException("'>' expected", _location);
while (true) { // Element name can be read all at once.
if (c < '@') { // Else avoid multiple checks.
if (c == '>') {
_qName = newSeq(_start, --_index - _start);
_start = _index;
return _eventType = END_ELEMENT;
} else if (c == ':') {
_prefixSep = _index - 1;
} else if (c <= ' ') {
_qName = newSeq(_start, --_index - _start);
if (_readIndex >= _readCount)
c = _data[_index++] = _readBuffer[_readIndex++];
if (c == '>') {
_start = --_index;
return _eventType = END_ELEMENT;
} else if (c > ' ') { throw new XMLStreamException(
"'>' expected", _location); }
throw new XMLStreamException("State unknown: " + _state,
// Defines parsing states (keep values close together to avoid lookup).
private static final int STATE_CHARACTERS = 1;
private static final int STATE_MARKUP = 2;
private static final int STATE_COMMENT = 3;
private static final int STATE_PI = 4;
private static final int STATE_CDATA = 5;
private static final int STATE_OPEN_TAGxREAD_ELEM_NAME = 6;
private static final int STATE_OPEN_TAGxELEM_NAME_READ = 7;
private static final int STATE_OPEN_TAGxREAD_ATTR_NAME = 8;
private static final int STATE_OPEN_TAGxATTR_NAME_READ = 9;
private static final int STATE_OPEN_TAGxEQUAL_READ = 10;
private static final int STATE_OPEN_TAGxREAD_ATTR_VALUE_SIMPLE_QUOTE = 11;
private static final int STATE_OPEN_TAGxREAD_ATTR_VALUE_DOUBLE_QUOTE = 12;
private static final int STATE_OPEN_TAGxEMPTY_TAG = 13;
private static final int STATE_CLOSE_TAGxREAD_ELEM_NAME = 14;
private static final int STATE_CLOSE_TAGxELEM_NAME_READ = 15;
private static final int STATE_DTD = 16;
private static final int STATE_DTD_INTERNAL = 17;
* Reloads data buffer.
* @param detectEndOfStream indicates
* @return <code>true</code> if the buffer has been reloaded;
* <code>false</code> if the end of stream has being reached
* and the event type (CHARACTERS or END_DOCUMENT) has been set.
private void reloadBuffer() throws XMLStreamException {
if (_reader == null)
throw new XMLStreamException("Input not specified");
_location._column += _readIndex;
_location._charactersRead += _readIndex;
_readIndex = 0;
try {
_readCount = _reader.read(_readBuffer, 0, _readBuffer.length);
if ((_readCount <= 0)
&& ((_depth != 0) || (_state != STATE_CHARACTERS)))
throw new XMLStreamException("Unexpected end of document",
} catch (IOException e) {
throw new XMLStreamException(e);
while ((_index + _readCount) >= _data.length) { // Potential overflow.
* Detects end of stream.
* @return <code>true</code> if end of stream has being reached
* and the event type (CHARACTERS or END_DOCUMENT) has been set;
* <code>false</code> otherwise.
private boolean isEndOfStream() throws XMLStreamException {
if (_readIndex >= _readCount)
if (_readCount <= 0) {
// _state == STATE_CHARACTERS (otherwise reloadBuffer() exception)
if (_eventType == END_DOCUMENT)
throw new XMLStreamException(
"End document has already been reached");
int length = _index - _start;
if (length > 0) { // Flushes trailing characters.
if (_charactersPending) {
_text.setArray(_data, _text.offset(), _text.length()
+ length); // Coalescing.
} else {
_text = newSeq(_start, length);
_start = _index;
_eventType = CHARACTERS;
} else {
_eventType = END_DOCUMENT;
return true;
return false;
* Handles end of line as per XML Spec. 2.11
* @param c the potential end of line character.
* @return the replacement character for end of line.
private char handleEndOfLine(char c) throws XMLStreamException {
if (c == 0xD) { // Replaces #xD with #xA
// Unless next char is #xA, then skip,
// #xD#xA will be replaced by #xA
if (_readIndex >= _readCount)
if ((_readIndex < _readCount) && (_readBuffer[_readIndex] == 0xA))
_readIndex++; // Skips 0xD
c = (char) 0xA;
if (c == 0xA) {
_location._column = -_readIndex; // column = 0
} else if (c == 0x0) { throw new XMLStreamException(
"Illegal XML character U+0000", _location); }
return c;
* Replaces an entity if the current state allows it.
* @return the next character after the text replacement or '&' if no
* replacement took place.
private char replaceEntity() throws XMLStreamException {
if ((_state == STATE_COMMENT) || (_state == STATE_PI)
|| (_state == STATE_CDATA))
return '&'; // (&2.4)
int start = _index; // Index of first replacement character.
_data[_index++] = '&';
while (true) {
if (_readIndex >= _readCount)
char c = _data[_index++] = _readBuffer[_readIndex++];
if (c == ';')
if (c <= ' ')
throw new XMLStreamException("';' expected", _location);
// Ensures that the replacement string holds in the data buffer.
while (start + _entities.getMaxLength() >= _data.length) {
// Replaces the entity.
int length = _entities.replaceEntity(_data, start, _index - start);
// Returns the next character after entity unless ampersand.
_index = start + length;
// Local character reading block.
if (_readIndex >= _readCount)
char c = _readBuffer[_readIndex++];
return (c == '&') ? (c = replaceEntity()) : c;
* Processes the attribute just read.
private void processAttribute() throws XMLStreamException {
if (_attrPrefixSep < 0) { // No prefix.
if (isXMLNS(_attrQName)) { // Sets default namespace.
_namespaces.setPrefix(_namespaces._defaultNsPrefix, _attrValue);
} else {
_attributes.addAttribute(_attrQName, null, _attrQName,
} else { // Prefix.
final int offset = _attrQName.offset();
final int length = _attrQName.length();
CharArray prefix = newSeq(offset, _attrPrefixSep - offset);
CharArray localName = newSeq(_attrPrefixSep + 1, offset + length
- _attrPrefixSep - 1);
if (isXMLNS(prefix)) { // Namespace association.
_namespaces.setPrefix(localName, _attrValue);
} else {
_attributes.addAttribute(localName, prefix, _attrQName,
private static boolean isXMLNS(CharArray chars) {
return (chars.length() == 5) && (chars.charAt(0) == 'x')
&& (chars.charAt(1) == 'm') && (chars.charAt(2) == 'l')
&& (chars.charAt(3) == 'n') && (chars.charAt(4) == 's');
private void processEndTag() throws XMLStreamException {
if (!_qName.equals(_elemStack[_depth]))
throw new XMLStreamException("Unexpected end tag for " + _qName,
private void processStartTag() throws XMLStreamException {
if (++_depth >= _elemStack.length) {
_elemStack[_depth] = _qName;
// Implements Reusable.
public void reset() {
// Resets all members (alphabetically ordered).
_attrPrefixSep = 0;
_attrQName = null;
_attrValue = null;
_attrQName = null;
_charactersPending = false;
_encoding = null;
_eventType = START_DOCUMENT;
_index = 0;
_isEmpty = false;
_prolog = null;
_readCount = 0;
_reader = null;
_depth = 0;
_readIndex = 0;
_seqsIndex = 0;
_start = 0;
_startOffset = 0;
// Recycles if factory produced.
if (_factory != null)
// Returns a new character sequence from the pool.
private CharArray newSeq(int offset, int length) {
CharArray seq = (_seqsIndex < _seqsCapacity) ? _seqs[_seqsIndex++]
: newSeq2();
return seq.setArray(_data, offset, length);
private CharArray newSeq2() {
return _seqs[_seqsIndex++];
private final Runnable _createSeqLogic = new Runnable() {
public void run() {
if (_seqsCapacity >= _seqs.length) { // Resizes.
CharArray[] tmp = new CharArray[_seqs.length * 2];
System.arraycopy(_seqs, 0, tmp, 0, _seqs.length);
_seqs = tmp;
CharArray seq = new CharArray();
_seqs[_seqsCapacity++] = seq;
private CharArray[] _seqs = new CharArray[256];
private int _seqsIndex;
private int _seqsCapacity;
// Increases internal data buffer capacity.
private void increaseDataBuffer() {
// Note: The character data at any nesting level is discarded
// only when moving to outer nesting level (due to coalescing).
// This accumulation may cause resize of the data buffer if
// numerous elements at the same nesting level are separated by
// spaces or indentation.
char[] tmp = new char[_data.length * 2];
javolution.context.LogContext.info(new CharArray(
"XMLStreamReaderImpl: Data buffer increased to " + tmp.length));
System.arraycopy(_data, 0, tmp, 0, _data.length);
_data = tmp;
// Increases statck.
private void increaseStack() {
CharArray[] tmp = new CharArray[_elemStack.length * 2];
javolution.context.LogContext.info(new CharArray(
"XMLStreamReaderImpl: CharArray stack increased to "
+ tmp.length));
System.arraycopy(_elemStack, 0, tmp, 0, _elemStack.length);
_elemStack = tmp;
* This inner class represents the parser location.
private final class LocationImpl implements Location {
int _column;
int _line;
int _charactersRead;
public int getLineNumber() {
return _line + 1;
public int getColumnNumber() {
return _column + _readIndex;
public int getCharacterOffset() {
return _charactersRead + _readIndex;
public String getPublicId() {
return null; // Not available.
public String getSystemId() {
return null; // Not available.
public String toString() {
return "Line " + getLineNumber() + ", Column " + getColumnNumber();
public void reset() {
_line = 0;
_column = 0;
_charactersRead = 0;
// Implements XMLStreamReader Interface //
// Implements XMLStreamReader Interface.
public void require(int type, CharSequence namespaceURI,
CharSequence localName) throws XMLStreamException {
if (_eventType != type)
throw new XMLStreamException("Expected event: "
+ NAMES_OF_EVENTS[type] + ", found event: "
+ NAMES_OF_EVENTS[_eventType]);
if ((namespaceURI != null) && !getNamespaceURI().equals(namespaceURI))
throw new XMLStreamException("Expected namespace URI: "
+ namespaceURI + ", found: " + getNamespaceURI());
if ((localName != null) && !getLocalName().equals(localName))
throw new XMLStreamException("Expected local name: " + localName
+ ", found: " + getLocalName());
// Implements XMLStreamReader Interface.
public CharArray getElementText() throws XMLStreamException {
// Derived from interface specification code.
if (getEventType() != XMLStreamConstants.START_ELEMENT) { throw new XMLStreamException(
"Parser must be on START_ELEMENT to read next text",
getLocation()); }
CharArray text = null;
int eventType = next();
while (eventType != XMLStreamConstants.END_ELEMENT) {
if (eventType == XMLStreamConstants.CHARACTERS) {
if (text == null) {
text = getText();
} else { // Merge (adjacent text, comments and PI are not kept).
text.setArray(_data, text.offset(), text.length()
+ getText().length());
} else if (eventType == XMLStreamConstants.PROCESSING_INSTRUCTION
|| eventType == XMLStreamConstants.COMMENT) {
// Skips (not kept).
} else if (eventType == XMLStreamConstants.END_DOCUMENT) {
throw new XMLStreamException(
"Unexpected end of document when reading element text content",
} else if (eventType == XMLStreamConstants.START_ELEMENT) {
throw new XMLStreamException(
"Element text content may not contain START_ELEMENT",
} else {
throw new XMLStreamException("Unexpected event type "
+ NAMES_OF_EVENTS[eventType], getLocation());
eventType = next();
return (text != null) ? text : newSeq(0, 0);
// Implements XMLStreamReader Interface.
public Object getProperty(String name) throws IllegalArgumentException {
if (name.equals(XMLInputFactory.IS_COALESCING)) {
return Boolean.TRUE;
} else if (name.equals(XMLInputFactory.ENTITIES)) {
return _entities.getEntitiesMapping();
} else {
throw new IllegalArgumentException("Property: " + name
+ " not supported");
// Implements XMLStreamReader Interface.
public void close() throws XMLStreamException {
public int getAttributeCount() {
if (_eventType != XMLStreamConstants.START_ELEMENT)
throw illegalState("Not a start element");
return _attributes.getLength();
public CharArray getAttributeLocalName(int index) {
if (_eventType != XMLStreamConstants.START_ELEMENT)
throw illegalState("Not a start element");
return _attributes.getLocalName(index);
public CharArray getAttributeNamespace(int index) {
if (_eventType != XMLStreamConstants.START_ELEMENT)
throw illegalState("Not a start element");
CharArray prefix = _attributes.getPrefix(index);
return _namespaces.getNamespaceURINullAllowed(prefix);
public CharArray getAttributePrefix(int index) {
if (_eventType != XMLStreamConstants.START_ELEMENT)
throw illegalState("Not a start element");
return _attributes.getPrefix(index);
public CharArray getAttributeType(int index) {
if (_eventType != XMLStreamConstants.START_ELEMENT)
throw illegalState("Not a start element");
return _attributes.getType(index);
public CharArray getAttributeValue(CharSequence uri, CharSequence localName) {
if (_eventType != XMLStreamConstants.START_ELEMENT)
throw illegalState("Not a start element");
return (uri == null) ? _attributes.getValue(localName) : _attributes
.getValue(uri, localName);
public CharArray getAttributeValue(int index) {
if (_eventType != XMLStreamConstants.START_ELEMENT)
throw illegalState("Not a start element");
return _attributes.getValue(index);
public CharArray getCharacterEncodingScheme() {
return readPrologAttribute(ENCODING);
private static final CharArray ENCODING = new CharArray("encoding");
public String getEncoding() {
return _encoding;
public int getEventType() {
return _eventType;
public CharArray getLocalName() {
if ((_eventType != XMLStreamConstants.START_ELEMENT)
&& (_eventType != XMLStreamConstants.END_ELEMENT))
throw illegalState("Not a start or end element");
if (_prefixSep < 0)
return _qName;
CharArray localName = newSeq(_prefixSep + 1,
_qName.offset() + _qName.length() - _prefixSep - 1);
return localName;
public Location getLocation() {
return _location;
public int getNamespaceCount() {
if ((_eventType != XMLStreamConstants.START_ELEMENT)
&& (_eventType != XMLStreamConstants.END_ELEMENT))
throw illegalState("Not a start or end element");
return _namespaces._namespacesCount[_depth];
public CharArray getNamespacePrefix(int index) {
if ((_eventType != XMLStreamConstants.START_ELEMENT)
&& (_eventType != XMLStreamConstants.END_ELEMENT))
throw illegalState("Not a start or end element");
return _namespaces._prefixes[index];
public CharArray getNamespaceURI(CharSequence prefix) {
if ((_eventType != XMLStreamConstants.START_ELEMENT)
&& (_eventType != XMLStreamConstants.END_ELEMENT))
throw illegalState("Not a start or end element");
return _namespaces.getNamespaceURI(prefix);
public CharArray getNamespaceURI(int index) {
if ((_eventType != XMLStreamConstants.START_ELEMENT)
&& (_eventType != XMLStreamConstants.END_ELEMENT))
throw illegalState("Not a start or end element");
return _namespaces._namespaces[index];
public NamespaceContext getNamespaceContext() {
return _namespaces;
public CharArray getNamespaceURI() {
return _namespaces.getNamespaceURINullAllowed(getPrefix());
public CharArray getPrefix() {
if ((_eventType != XMLStreamConstants.START_ELEMENT)
&& (_eventType != XMLStreamConstants.END_ELEMENT))
throw illegalState("Not a start or end element");
if (_prefixSep < 0)
return null;
int offset = _qName.offset();
CharArray prefix = newSeq(offset, _prefixSep - offset);
return prefix;
public CharArray getPIData() {
if (_eventType != XMLStreamConstants.PROCESSING_INSTRUCTION)
throw illegalState("Not a processing instruction");
int offset = _text.indexOf(' ') + _text.offset() + 1;
CharArray piData = newSeq(offset, _text.length() - offset);
return piData;
public CharArray getPITarget() {
if (_eventType != XMLStreamConstants.PROCESSING_INSTRUCTION)
throw illegalState("Not a processing instruction");
CharArray piTarget = newSeq(_text.offset(),
_text.indexOf(' ') + _text.offset());
return piTarget;
public CharArray getText() {
if ((_eventType != XMLStreamConstants.CHARACTERS)
&& (_eventType != XMLStreamConstants.COMMENT)
&& (_eventType != XMLStreamConstants.DTD))
throw illegalState("Not a text event");
return _text;
public char[] getTextCharacters() {
return getText().array();
public int getTextCharacters(int sourceStart, char[] target,
int targetStart, int length) throws XMLStreamException {
CharArray text = getText();
int copyLength = Math.min(length, text.length());
System.arraycopy(text.array(), sourceStart + text.offset(), target,
targetStart, copyLength);
return copyLength;
public int getTextLength() {
return getText().length();
public int getTextStart() {
return getText().offset();
public CharArray getVersion() {
return readPrologAttribute(VERSION);
private static final CharArray VERSION = new CharArray("version");
public boolean isStandalone() {
CharArray standalone = readPrologAttribute(STANDALONE);
return (standalone != null) ? standalone.equals("no") : true;
public boolean standaloneSet() {
return readPrologAttribute(STANDALONE) != null;
private static final CharArray STANDALONE = new CharArray("standalone");
public boolean hasName() {
return (_eventType == XMLStreamConstants.START_ELEMENT)
|| (_eventType == XMLStreamConstants.END_ELEMENT);
public boolean hasNext() throws XMLStreamException {
return _eventType != XMLStreamConstants.END_DOCUMENT;
public boolean hasText() {
return ((_eventType == XMLStreamConstants.CHARACTERS)
|| (_eventType == XMLStreamConstants.COMMENT) || (_eventType == XMLStreamConstants.DTD))
&& (_text.length() > 0);
public boolean isAttributeSpecified(int index) {
if (_eventType != XMLStreamConstants.START_ELEMENT)
throw new IllegalStateException("Not a start element");
return _attributes.getValue(index) != null;
public boolean isCharacters() {
return _eventType == XMLStreamConstants.CHARACTERS;
public boolean isEndElement() {
return _eventType == XMLStreamConstants.END_ELEMENT;
public boolean isStartElement() {
return _eventType == XMLStreamConstants.START_ELEMENT;
public boolean isWhiteSpace() {
if (isCharacters()) {
char[] chars = _text.array();
for (int i = _text.offset(), end = _text.offset() + _text.length(); i < end;) {
if (!isWhiteSpace(chars[i++]))
return false;
return true;
return false;
// Whitespaces according to XML 1.1 Specification.
private static boolean isWhiteSpace(char c) {
return (c == 0x20) || (c == 0x9) || (c == 0xD) || (c == 0xA);
public int nextTag() throws XMLStreamException {
int eventType = next();
while (eventType == XMLStreamConstants.COMMENT
|| eventType == XMLStreamConstants.PROCESSING_INSTRUCTION
|| eventType == XMLStreamConstants.DTD
|| (eventType == XMLStreamConstants.CHARACTERS && isWhiteSpace())) {
eventType = next();
if (eventType != XMLStreamConstants.START_ELEMENT
&& eventType != XMLStreamConstants.END_ELEMENT)
throw new XMLStreamException("Tag expected (but found "
+ NAMES_OF_EVENTS[_eventType] + ")");
return eventType;
private IllegalStateException illegalState(String msg) {
return new IllegalStateException(msg + " ("
+ NAMES_OF_EVENTS[_eventType] + ")");
private String detectEncoding(InputStream input) throws XMLStreamException {
// Autodetect encoding (see http://en.wikipedia.org/wiki/UTF-16)
int byte0;
try {
byte0 = input.read();
} catch (IOException e) {
throw new XMLStreamException(e);
if (byte0 == -1)
throw new XMLStreamException("Premature End-Of-File");
if (byte0 == '<') { // UTF-8 or compatible encoding.
_readBuffer[_startOffset++] = '<';
return "UTF-8";
} else {
int byte1;
try {
byte1 = input.read();
} catch (IOException e) {
throw new XMLStreamException(e);
if (byte1 == -1)
throw new XMLStreamException("Premature End-Of-File");
if ((byte0 == 0) && (byte1 == '<')) { // UTF-16 BIG ENDIAN
_readBuffer[_startOffset++] = '<';
return "UTF-16BE";
} else if ((byte0 == '<') && (byte1 == 0)) { // UTF-16 LITTLE ENDIAN
_readBuffer[_startOffset++] = '<';
return "UTF-16LE";
} else if ((byte0 == 0xFF) && (byte1 == 0xFE)) { // BOM for UTF-16 LITTLE ENDIAN
return "UTF-16";
} else if ((byte0 == 0xFE) && (byte1 == 0xFF)) { // BOM for UTF-16 BIG ENDIAN
return "UTF-16";
} else { // Encoding unknown (or no prolog) assumes UTF-8
_readBuffer[_startOffset++] = (char) byte0;
_readBuffer[_startOffset++] = (char) byte1;
return "UTF-8";
private final CharArray readPrologAttribute(CharSequence name) {
if (_prolog == null)
return null;
final int READ_EQUAL = 0;
final int READ_QUOTE = 1;
final int VALUE_SIMPLE_QUOTE = 2;
final int VALUE_DOUBLE_QUOTE = 3;
int i = _prolog.indexOf(name);
if (i >= 0) {
i += _prolog.offset();
int maxIndex = _prolog.offset() + _prolog.length();
i += name.length();
int state = READ_EQUAL;
int valueOffset = 0;
while (i < maxIndex) {
char c = _prolog.array()[i++];
switch (state) {
if (c == '=') {
state = READ_QUOTE;
if (c == '"') {
valueOffset = i;
} else if (c == '\'') {
valueOffset = i;
if (c == '\'')
return newSeq(valueOffset, i - valueOffset - 1);
if (c == '"')
return newSeq(valueOffset, i - valueOffset - 1);
return null;