Package org.knopflerfish.shared.cm

Source Code of org.knopflerfish.shared.cm.XmlReader

/*
* Copyright (c) 2003-2004, KNOPFLERFISH project
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
*   notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
*   copyright notice, this list of conditions and the following
*   disclaimer in the documentation and/or other materials
*   provided with the distribution.
*
* - Neither the name of the KNOPFLERFISH project nor the names of its
*   contributors may be used to endorse or promote products derived
*   from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*/

package org.knopflerfish.shared.cm;

// TODO
// - Start-Tag, End-Tag, EmptyElement-Tag
// - positionAtXXX naming of methods
// - readToNextTag vs readStartOfTag

import java.io.PushbackReader;
import java.util.Dictionary;
import java.util.Hashtable;

public class XmlReader {
    protected final static String EOF = "EOF";

    protected void startElement(String elementType, Dictionary attributes)
            throws Exception {
    }

    protected void endElement(String elementType, Dictionary attributes,
            String content) throws Exception {
    }

    protected void read(PushbackReader r) throws Exception {
        while (readToNextTag(r)) {

            if (tagShouldBeIgnored(r)) {
                ignoreTag(r);
            } else if (isEndTag(r)) {
                return;
            } else {
                readElement(r);
            }
        }
    }

    boolean readToNextTag(PushbackReader r) throws Exception {
        boolean foundATag = true;
        try {
            char c = readAndPushbackNextNonWhitespaceChar(r);
            throwIfNotExpectedChar(c, '<', r);
        } catch (Exception e) {
            if (EOF.equals(e.getMessage())) {
                foundATag = false;
            } else {
                throw e;
            }
        }
        return foundATag;
    }

    boolean tagShouldBeIgnored(PushbackReader r) throws Exception {
        char first = readNextChar(r);
        char second = readNextChar(r);
        r.unread(second);
        r.unread(first);
        return second == '!' || second == '?';
    }

    void ignoreTag(PushbackReader r) throws Exception {
        char c = readNextChar(r);
        throwIfNotExpectedChar(c, '<');
        do {
            c = readNextChar(r);
        } while (c != '>');
        throwIfNotExpectedChar(c, '>');
    }

    boolean isEndTag(PushbackReader r) throws Exception {
        char first = readNextChar(r);
        char second = readNextChar(r);
        r.unread(second);
        r.unread(first);
        return first == '<' && second == '/';
    }

    // REFACTOR TO:
    // notEmptyElement = readStartTag(r)
    // if(notEmptyElement) readContent(r)
    // readEndTag
    void readElement(PushbackReader r) throws Exception {
        readStartOfTag(r);
        String elementType = readElementType(r);

        Dictionary attributes = readAttributes(r);
        boolean notEmptyElement = readEndOfStartTag(r);

        startElement(elementType, attributes);
        String content = null;
        if (notEmptyElement) {
            content = readContent(r, elementType, attributes);
            readEndTag(elementType, r);
        } else {
        }
        endElement(elementType, attributes, content);
    }

    void readEndTag(String startTagElementType, PushbackReader r)
            throws Exception {
        readStartOfEndTag(r);
        readAndMatchElementType(startTagElementType, r);
        readEndOfTag(r);
    }

    void readAndMatchElementType(String elementType, PushbackReader r)
            throws Exception {
        readAndMatchXMLName(elementType, r);
        char c = readAndPushbackNextChar(r);
        // Turn into a method
        if (c != '>' && c != '/' && isNotXMLWhitespace(c)) {
            throwMessage("Error while reading element type after: "
                    + elementType);
        }
    }

    void readAndMatchXMLName(String elementType, PushbackReader r)
            throws Exception {
        int pos = 0;
        char c = readNextNonWhitespaceChar(r);
        if (isXMLNameStartChar(c)) {
            throwIfNotExpectedChar(c, elementType.charAt(pos++));
        } else {
            throwMessage("Error while reading XML name: " + c
                    + " is not a valid start char.");
        }
        c = readNextChar(r);
        while (isXMLNameChar(c)) {
            throwIfNotExpectedChar(c, elementType.charAt(pos++));
            c = readNextChar(r);
        }
        r.unread(c);
    }

    String readElementType(PushbackReader r) throws Exception {
        String elementType = readXMLName(r);
        char c = readAndPushbackNextChar(r);
        // Turn into a method
        if (c != '>' && c != '/' && isNotXMLWhitespace(c)) {
            throwMessage("Error while reading element type after: "
                    + elementType);
        }
        return elementType;
    }

    String readXMLName(PushbackReader r) throws Exception {
        char c = readNextNonWhitespaceChar(r);
        StringBuffer xmlName = new StringBuffer();
        if (isXMLNameStartChar(c)) {
            xmlName.append(c);
        } else {
            throwMessage("Error while reading XML name: " + c
                    + " is not a valid start char.");
        }
        c = readNextChar(r);
        while (isXMLNameChar(c)) {
            xmlName.append(c);
            c = readNextChar(r);
        }
        r.unread(c);
        return xmlName.toString();
    }

    Dictionary readAttributes(PushbackReader r) throws Exception {
        Dictionary attributes = null;
        while (nextNonWhitespaceIsANameStartChar(r)) {
            String name = readXMLName(r);
            char c = readNextChar(r);
            throwIfNotExpectedChar(c, '=');
            String value = readAttributeValue(r);
            if (attributes == null) {
                attributes = new Hashtable();
            }
            attributes.put(name, value);
        }
        return attributes;
    }

    String readAttributeValue(PushbackReader r) throws Exception {
        char c = readNextChar(r);
        throwIfNotExpectedChar(c, '\"');
        StringBuffer value = new StringBuffer();
        c = readNextChar(r);
        while (isXMLAttributeValueChar(c)) {
            if (isXMLEscapeCharacter(c)) {
                c = readEscapedCharacter(r);
            }
            value.append(c);
            c = readNextChar(r);
        }
        throwIfNotExpectedChar(c, '\"');
        return value.toString();
    }

    boolean nextNonWhitespaceIsANameStartChar(PushbackReader r)
            throws Exception {
        char c = readAndPushbackNextNonWhitespaceChar(r);
        return isXMLNameStartChar(c);
    }

    void readStartOfTag(PushbackReader r) throws Exception {
        char c = readNextNonWhitespaceChar(r);
        throwIfNotExpectedChar(c, '<');
    }

    void readStartOfEndTag(PushbackReader r) throws Exception {
        char c = readNextNonWhitespaceChar(r);
        throwIfNotExpectedChar(c, '<');
        c = readNextChar(r);
        throwIfNotExpectedChar(c, '/');
    }

    void readEndOfTag(PushbackReader r) throws Exception {
        char c = readNextNonWhitespaceChar(r);
        throwIfNotExpectedChar(c, '>');
    }

    boolean readEndOfStartTag(PushbackReader r) throws Exception {
        char c = readNextNonWhitespaceChar(r);
        boolean emptyElement = c == '/';
        if (emptyElement) {
            c = readNextChar(r);
        }
        throwIfNotExpectedChar(c, '>');
        return !emptyElement;
    }

    String readContent(PushbackReader r, String elementType,
            Dictionary attributes) throws Exception {
        char c = readAndPushbackNextNonWhitespaceChar(r);
        if (c == '<') {
            read(r);
            return "";
        }
        return readText(r);
    }

    String readText(PushbackReader r) throws Exception {
        char c = readNextNonWhitespaceChar(r);
        StringBuffer text = null;
        while (c != '<') {
            if (text == null) {
                text = new StringBuffer();
            }
            if (isXMLEscapeCharacter(c)) {
                c = readEscapedCharacter(r);
            }
            text.append(c);
            c = readNextChar(r);
        }
        r.unread(c);
        if (text == null) {
            return null;
        }
        return text.toString().trim();
    }

    char readEscapedCharacter(PushbackReader r) throws Exception {
        StringBuffer escapeCode = new StringBuffer();
        char first = readNextChar(r);
        boolean isCharCode = first == '#';
        if (!isCharCode) {
            escapeCode.append(first);
        }
        char second = readNextChar(r);
        boolean isHexCode = isCharCode && second == 'x';
        if (!isHexCode) {
            escapeCode.append(second);
        }

        char next = readNextChar(r);
        while (next != ';') {
            escapeCode.append(next);
            next = readNextChar(r);
        }

        String s = escapeCode.toString();
        char c = 0;
        if (isHexCode) {
            c = (char) Integer.parseInt(s, 16);
        } else if (isCharCode) {
            c = (char) Integer.parseInt(s);
        } else if ("amp".equals(s)) {
            c = '&';
        } else if ("lt".equals(s)) {
            c = '<';
        } else if ("gt".equals(s)) {
            c = '>';
        } else if ("apos".equals(s)) {
            c = '\'';
        } else if ("quot".equals(s)) {
            c = '\"';
        } else {
            throwMessage("Invalid or unsupported escape character: " + s);
        }
        return c;
    }

    char readNextNonWhitespaceChar(PushbackReader r) throws Exception {
        char c = readNextChar(r);
        while (isXMLWhitespace(c)) {
            c = readNextChar(r);
        }
        return c;
    }

    char readAndPushbackNextNonWhitespaceChar(PushbackReader r)
            throws Exception {
        char c = readNextNonWhitespaceChar(r);
        r.unread(c);
        return c;
    }

    static char readAndPushbackNextChar(PushbackReader r) throws Exception {
        char c = readNextChar(r);
        r.unread(c);
        return c;
    }

    static char readNextChar(PushbackReader r) throws Exception {
        int c = r.read();
        if (c == -1) {
            throw new Exception(EOF);
        }
        return (char) c;
    }

    // Character classification
    boolean isXMLNameStartChar(char c) {
        return isXMLLetter(c) || ('_' == c) || (':' == c);
    }

    boolean isXMLNameChar(char c) {
        return isXMLLetter(c) || isXMLDigit(c) || ('.' == c) || ('-' == c)
                || ('_' == c) || (':' == c);
        // Missing CombiningChar and Extender in XML 1.0 spec
    }

    boolean isXMLAttributeValueChar(char c) {
        return c != '\"';
    }

    boolean isXMLLetter(char c) {
        return Character.isLetter(c);
        // Needs to be verified against XML 1.0 spec
    }

    boolean isXMLDigit(char c) {
        return Character.isDigit(c);
        // Needs to be verified against XML 1.0 spec
    }

    boolean isXMLWhitespace(char c) {
        return Character.isWhitespace(c);
        // Needs to be verified against XML 1.0 spec
    }

    boolean isNotXMLWhitespace(char c) {
        return !isXMLWhitespace(c);
    }

    boolean isXMLEscapeCharacter(char c) {
        return c == '&';
    }

    // Error helpers
    protected void throwIfNotExpectedChar(char c, char expected)
            throws Exception {
        if (c != expected) {
            throw new Exception("Expected " + expected + " but found " + c); // TODO
        }
    }

    protected void throwIfNotExpectedChar(char c, char expected,
            PushbackReader pbr) throws Exception {
        if (c != expected) {
            StringBuffer msg = new StringBuffer();
            msg.append("Expected " + expected + " but found " + c + "\n");
            msg.append("At:");
            for (int i = 0; i < 20; ++i) {
                int rc = pbr.read();
                if (rc == -1) {
                    break;
                }
                msg.append((char) rc);
            }
            throw new Exception(msg.toString()); // TODO
        }
    }

    protected void throwMessage(String message) throws Exception {
        throw new Exception(message); // TODO
    }

    protected void throwIfMissingAttribute(String element, String name,
            String value) throws Exception {
        if (value == null) {
            throwMessage("Missing " + name + " attribute in <" + element
                    + "> tag.");
        }
    }

    protected void throwMisplacedTagException(String element) throws Exception {
        throw new Exception("Misplaced <" + element + "> tag.");
    }
}
TOP

Related Classes of org.knopflerfish.shared.cm.XmlReader

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.