/*
* Copyright (c) 2003-2004, KNOPFLERFISH project
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* - Neither the name of the KNOPFLERFISH project nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.knopflerfish.shared.cm;
// TODO
// - Start-Tag, End-Tag, EmptyElement-Tag
// - positionAtXXX naming of methods
// - readToNextTag vs readStartOfTag
import java.io.PushbackReader;
import java.util.Dictionary;
import java.util.Hashtable;
public class XmlReader {
protected final static String EOF = "EOF";
protected void startElement(String elementType, Dictionary attributes)
throws Exception {
}
protected void endElement(String elementType, Dictionary attributes,
String content) throws Exception {
}
protected void read(PushbackReader r) throws Exception {
while (readToNextTag(r)) {
if (tagShouldBeIgnored(r)) {
ignoreTag(r);
} else if (isEndTag(r)) {
return;
} else {
readElement(r);
}
}
}
boolean readToNextTag(PushbackReader r) throws Exception {
boolean foundATag = true;
try {
char c = readAndPushbackNextNonWhitespaceChar(r);
throwIfNotExpectedChar(c, '<', r);
} catch (Exception e) {
if (EOF.equals(e.getMessage())) {
foundATag = false;
} else {
throw e;
}
}
return foundATag;
}
boolean tagShouldBeIgnored(PushbackReader r) throws Exception {
char first = readNextChar(r);
char second = readNextChar(r);
r.unread(second);
r.unread(first);
return second == '!' || second == '?';
}
void ignoreTag(PushbackReader r) throws Exception {
char c = readNextChar(r);
throwIfNotExpectedChar(c, '<');
do {
c = readNextChar(r);
} while (c != '>');
throwIfNotExpectedChar(c, '>');
}
boolean isEndTag(PushbackReader r) throws Exception {
char first = readNextChar(r);
char second = readNextChar(r);
r.unread(second);
r.unread(first);
return first == '<' && second == '/';
}
// REFACTOR TO:
// notEmptyElement = readStartTag(r)
// if(notEmptyElement) readContent(r)
// readEndTag
void readElement(PushbackReader r) throws Exception {
readStartOfTag(r);
String elementType = readElementType(r);
Dictionary attributes = readAttributes(r);
boolean notEmptyElement = readEndOfStartTag(r);
startElement(elementType, attributes);
String content = null;
if (notEmptyElement) {
content = readContent(r, elementType, attributes);
readEndTag(elementType, r);
} else {
}
endElement(elementType, attributes, content);
}
void readEndTag(String startTagElementType, PushbackReader r)
throws Exception {
readStartOfEndTag(r);
readAndMatchElementType(startTagElementType, r);
readEndOfTag(r);
}
void readAndMatchElementType(String elementType, PushbackReader r)
throws Exception {
readAndMatchXMLName(elementType, r);
char c = readAndPushbackNextChar(r);
// Turn into a method
if (c != '>' && c != '/' && isNotXMLWhitespace(c)) {
throwMessage("Error while reading element type after: "
+ elementType);
}
}
void readAndMatchXMLName(String elementType, PushbackReader r)
throws Exception {
int pos = 0;
char c = readNextNonWhitespaceChar(r);
if (isXMLNameStartChar(c)) {
throwIfNotExpectedChar(c, elementType.charAt(pos++));
} else {
throwMessage("Error while reading XML name: " + c
+ " is not a valid start char.");
}
c = readNextChar(r);
while (isXMLNameChar(c)) {
throwIfNotExpectedChar(c, elementType.charAt(pos++));
c = readNextChar(r);
}
r.unread(c);
}
String readElementType(PushbackReader r) throws Exception {
String elementType = readXMLName(r);
char c = readAndPushbackNextChar(r);
// Turn into a method
if (c != '>' && c != '/' && isNotXMLWhitespace(c)) {
throwMessage("Error while reading element type after: "
+ elementType);
}
return elementType;
}
String readXMLName(PushbackReader r) throws Exception {
char c = readNextNonWhitespaceChar(r);
StringBuffer xmlName = new StringBuffer();
if (isXMLNameStartChar(c)) {
xmlName.append(c);
} else {
throwMessage("Error while reading XML name: " + c
+ " is not a valid start char.");
}
c = readNextChar(r);
while (isXMLNameChar(c)) {
xmlName.append(c);
c = readNextChar(r);
}
r.unread(c);
return xmlName.toString();
}
Dictionary readAttributes(PushbackReader r) throws Exception {
Dictionary attributes = null;
while (nextNonWhitespaceIsANameStartChar(r)) {
String name = readXMLName(r);
char c = readNextChar(r);
throwIfNotExpectedChar(c, '=');
String value = readAttributeValue(r);
if (attributes == null) {
attributes = new Hashtable();
}
attributes.put(name, value);
}
return attributes;
}
String readAttributeValue(PushbackReader r) throws Exception {
char c = readNextChar(r);
throwIfNotExpectedChar(c, '\"');
StringBuffer value = new StringBuffer();
c = readNextChar(r);
while (isXMLAttributeValueChar(c)) {
if (isXMLEscapeCharacter(c)) {
c = readEscapedCharacter(r);
}
value.append(c);
c = readNextChar(r);
}
throwIfNotExpectedChar(c, '\"');
return value.toString();
}
boolean nextNonWhitespaceIsANameStartChar(PushbackReader r)
throws Exception {
char c = readAndPushbackNextNonWhitespaceChar(r);
return isXMLNameStartChar(c);
}
void readStartOfTag(PushbackReader r) throws Exception {
char c = readNextNonWhitespaceChar(r);
throwIfNotExpectedChar(c, '<');
}
void readStartOfEndTag(PushbackReader r) throws Exception {
char c = readNextNonWhitespaceChar(r);
throwIfNotExpectedChar(c, '<');
c = readNextChar(r);
throwIfNotExpectedChar(c, '/');
}
void readEndOfTag(PushbackReader r) throws Exception {
char c = readNextNonWhitespaceChar(r);
throwIfNotExpectedChar(c, '>');
}
boolean readEndOfStartTag(PushbackReader r) throws Exception {
char c = readNextNonWhitespaceChar(r);
boolean emptyElement = c == '/';
if (emptyElement) {
c = readNextChar(r);
}
throwIfNotExpectedChar(c, '>');
return !emptyElement;
}
String readContent(PushbackReader r, String elementType,
Dictionary attributes) throws Exception {
char c = readAndPushbackNextNonWhitespaceChar(r);
if (c == '<') {
read(r);
return "";
}
return readText(r);
}
String readText(PushbackReader r) throws Exception {
char c = readNextNonWhitespaceChar(r);
StringBuffer text = null;
while (c != '<') {
if (text == null) {
text = new StringBuffer();
}
if (isXMLEscapeCharacter(c)) {
c = readEscapedCharacter(r);
}
text.append(c);
c = readNextChar(r);
}
r.unread(c);
if (text == null) {
return null;
}
return text.toString().trim();
}
char readEscapedCharacter(PushbackReader r) throws Exception {
StringBuffer escapeCode = new StringBuffer();
char first = readNextChar(r);
boolean isCharCode = first == '#';
if (!isCharCode) {
escapeCode.append(first);
}
char second = readNextChar(r);
boolean isHexCode = isCharCode && second == 'x';
if (!isHexCode) {
escapeCode.append(second);
}
char next = readNextChar(r);
while (next != ';') {
escapeCode.append(next);
next = readNextChar(r);
}
String s = escapeCode.toString();
char c = 0;
if (isHexCode) {
c = (char) Integer.parseInt(s, 16);
} else if (isCharCode) {
c = (char) Integer.parseInt(s);
} else if ("amp".equals(s)) {
c = '&';
} else if ("lt".equals(s)) {
c = '<';
} else if ("gt".equals(s)) {
c = '>';
} else if ("apos".equals(s)) {
c = '\'';
} else if ("quot".equals(s)) {
c = '\"';
} else {
throwMessage("Invalid or unsupported escape character: " + s);
}
return c;
}
char readNextNonWhitespaceChar(PushbackReader r) throws Exception {
char c = readNextChar(r);
while (isXMLWhitespace(c)) {
c = readNextChar(r);
}
return c;
}
char readAndPushbackNextNonWhitespaceChar(PushbackReader r)
throws Exception {
char c = readNextNonWhitespaceChar(r);
r.unread(c);
return c;
}
static char readAndPushbackNextChar(PushbackReader r) throws Exception {
char c = readNextChar(r);
r.unread(c);
return c;
}
static char readNextChar(PushbackReader r) throws Exception {
int c = r.read();
if (c == -1) {
throw new Exception(EOF);
}
return (char) c;
}
// Character classification
boolean isXMLNameStartChar(char c) {
return isXMLLetter(c) || ('_' == c) || (':' == c);
}
boolean isXMLNameChar(char c) {
return isXMLLetter(c) || isXMLDigit(c) || ('.' == c) || ('-' == c)
|| ('_' == c) || (':' == c);
// Missing CombiningChar and Extender in XML 1.0 spec
}
boolean isXMLAttributeValueChar(char c) {
return c != '\"';
}
boolean isXMLLetter(char c) {
return Character.isLetter(c);
// Needs to be verified against XML 1.0 spec
}
boolean isXMLDigit(char c) {
return Character.isDigit(c);
// Needs to be verified against XML 1.0 spec
}
boolean isXMLWhitespace(char c) {
return Character.isWhitespace(c);
// Needs to be verified against XML 1.0 spec
}
boolean isNotXMLWhitespace(char c) {
return !isXMLWhitespace(c);
}
boolean isXMLEscapeCharacter(char c) {
return c == '&';
}
// Error helpers
protected void throwIfNotExpectedChar(char c, char expected)
throws Exception {
if (c != expected) {
throw new Exception("Expected " + expected + " but found " + c); // TODO
}
}
protected void throwIfNotExpectedChar(char c, char expected,
PushbackReader pbr) throws Exception {
if (c != expected) {
StringBuffer msg = new StringBuffer();
msg.append("Expected " + expected + " but found " + c + "\n");
msg.append("At:");
for (int i = 0; i < 20; ++i) {
int rc = pbr.read();
if (rc == -1) {
break;
}
msg.append((char) rc);
}
throw new Exception(msg.toString()); // TODO
}
}
protected void throwMessage(String message) throws Exception {
throw new Exception(message); // TODO
}
protected void throwIfMissingAttribute(String element, String name,
String value) throws Exception {
if (value == null) {
throwMessage("Missing " + name + " attribute in <" + element
+ "> tag.");
}
}
protected void throwMisplacedTagException(String element) throws Exception {
throw new Exception("Misplaced <" + element + "> tag.");
}
}