/*
* $Id: Parser.java,v 1.9 2002/09/16 08:05:03 jkl Exp $
*
* Copyright (c) 2002 Njet Communications Ltd. All Rights Reserved.
*
* Use is subject to license terms, as defined in
* Anvil Sofware License, Version 1.1. See LICENSE
* file, or http://njet.org/license-1.1.txt
*/
package anvil.parser;
import anvil.Location;
import java.io.InputStream;
import java.io.IOException;
import java.io.PushbackInputStream;
import java.net.URL;
/**
* class Parser
*
* @author: Jani Lehtim�ki
*/
public class Parser
implements Locator
{
private URL _url = null;
private PushbackInputStream _inputStream = null;
private StringBuffer _buffer = new StringBuffer();
private DocumentHandler _documentHandler;
private int _line = 1;
private int _column = 1;
private int _previousColumn = 0;
private int _startLine = 1;
private int _startColumn = 1;
public Parser()
{
}
protected int read() throws IOException
{
int ch;
ch = _inputStream.read();
_previousColumn = _column;
if (ch == '\n') {
_line++;
_column = 1;
} else {
_column++;
}
return ch;
}
protected void unread(int ch) throws IOException
{
if (ch == '\n') {
_line--;
_column = _previousColumn;
} else {
_column--;
}
_inputStream.unread(ch);
}
protected void readCDataSection() throws IOException
{
StringBuffer buffer = _buffer;
int dashcount = 0;
int count = 0;
int ch;
buffer.setLength(0);
// <![
if ((ch = read()) != 'C') {
unread(ch);
readComment("[");
return;
}
if ((ch = read()) != 'D') {
unread(ch);
readComment("[C");
return;
}
if ((ch = read()) != 'A') {
unread(ch);
readComment("[CD");
return;
}
if ((ch = read()) != 'T') {
unread(ch);
readComment("[CDA");
return;
}
if ((ch = read()) != 'A') {
unread(ch);
readComment("[CDAT");
return;
}
if ((ch = read()) != '[') {
unread(ch);
readComment("[CDATA");
return;
}
int state = 0;
out:
while(true) {
ch = read();
switch(ch) {
case -1:
break out;
case ']':
if (state == 0) {
state = 1;
break;
} else if (state == 1) {
state = 2;
break;
}
case '>':
if (state == 2) {
break out;
}
default:
if (state != 0) {
if (state > 0) {
buffer.append(']');
if (state > 1) {
buffer.append(']');
}
}
}
state = 0;
buffer.append((char)ch);
break;
}
}
if (_documentHandler != null) {
_documentHandler.handleCharacters(buffer.toString());
}
}
protected void readComment(String start) throws IOException
{
StringBuffer buffer = _buffer;
int dashcount = 0;
int count = 0;
int ch;
buffer.setLength(0);
ch = read();
if (start == null) {
while(dashcount < 2 && ch == '-') {
buffer.append((char)ch);
dashcount++;
ch = read();
}
} else {
buffer.append(start);
}
while(ch != -1) {
if (ch == '-') {
if (dashcount > 0) {
count++;
}
} else if (ch == '>') {
if (count >= dashcount) {
break;
}
} else {
count = 0;
}
buffer.append((char)ch);
ch = read();
}
if (_documentHandler != null) {
_documentHandler.handleComment(buffer.toString());
}
}
protected int skipSpaces() throws IOException
{
int ch;
do {
ch = read();
} while(ch != -1 && Character.isWhitespace((char)ch));
return (int)ch;
}
protected void readCharacters(int ch) throws IOException
{
StringBuffer buffer = _buffer;
buffer.setLength(0);
while(ch != -1 && ch != '<') {
buffer.append((char)ch);
ch = read();
}
if (ch == '<') {
unread(ch);
}
if (_documentHandler != null) {
_documentHandler.handleCharacters(buffer.toString());
}
}
public void readPI() throws IOException
{
StringBuffer buffer = _buffer;
int ch;
int state = 0;
buffer.setLength(0);
while((ch = read()) != -1) {
if (state == 0 && ch == '?') {
state = 1;
} else if (state == 1 && ch == '>') {
break;
} else {
if (state == 1) {
buffer.append('?');
state = 0;
}
buffer.append((char)ch);
}
}
if (_documentHandler != null) {
_documentHandler.handleProcessingInstruction(buffer.toString());
}
}
protected Tag readTag(int ch) throws IOException
{
StringBuffer buffer = _buffer;
boolean hasEndSlash = false;
Tag tag = null;
buffer.setLength(0);
while(ch != -1 && ch != '>' && !Character.isWhitespace((char)ch)) {
if (ch == '/') {
ch = read();
if (ch == '>') {
hasEndSlash = true;
break;
} else {
unread(ch);
ch = '/';
}
}
buffer.append((char)ch);
ch = read();
}
if (ch == -1) {
return null;
}
tag = new Tag(buffer.toString());
if (hasEndSlash) {
tag.enableEndSlash();
}
if (ch == '>') {
return tag;
}
while(true) {
String name;
ch = skipSpaces();
if (ch == '/') {
ch = read();
if (ch == '>') {
tag.enableEndSlash();
} else {
unread(ch);
ch = '/';
}
}
if (ch == '>' || ch == -1) {
return tag;
}
buffer.setLength(0);
while(ch != -1 && ch != '>' && ch != '=' && !Character.isWhitespace((char)ch)) {
if (ch == '/') {
ch = read();
if (ch == '>') {
tag.enableEndSlash();
break;
} else {
unread(ch);
ch = '/';
}
}
buffer.append((char)ch);
ch = read();
}
name = buffer.toString();
if (ch == -1 || ch == '>') {
tag.add(name);
return tag;
} else if (Character.isWhitespace((char)ch)) {
ch = skipSpaces();
if (ch == -1 || ch == '>') {
tag.add(name);
return tag;
}
}
if (ch == '=') {
ch = skipSpaces();
if (ch == -1 || ch == '>') {
return tag;
}
if (ch == '"') {
buffer.setLength(0);
ch = read();
while(ch != -1 && ch != '"') {
buffer.append((char)ch);
ch = read();
}
tag.add(name, buffer.toString());
} else if (ch == '\'') {
buffer.setLength(0);
ch = read();
while(ch != -1 && ch != '\'') {
buffer.append((char)ch);
ch = read();
}
tag.add(name, buffer.toString());
} else if (ch == '`') {
buffer.setLength(0);
ch = read();
while(ch != -1 && ch != '`') {
buffer.append((char)ch);
ch = read();
}
tag.add(name, buffer.toString());
} else {
buffer.setLength(0);
do {
if (ch == '/') {
ch = read();
if (ch == '>') {
tag.enableEndSlash();
break;
} else {
unread(ch);
ch = '/';
}
}
buffer.append((char)ch);
ch = read();
} while(ch != -1 && ch != '>' && !Character.isWhitespace((char)ch));
tag.add(name, buffer.toString());
if (ch == '>') {
return tag;
}
}
if (ch == -1) {
return tag;
}
} else {
unread(ch);
tag.add(name);
}
}
}
protected void dispatchElements() throws IOException
{
int ch;
if (_documentHandler != null) {
_documentHandler.setDocumentLocator(this);
_documentHandler.startDocument();
}
while(true) {
_startLine = _line;
_startColumn = _column;
ch = read();
if (ch == -1) {
break;
} else if (ch == '<') {
ch = read();
if (ch == '!') {
ch = read();
if (ch == '[') {
readCDataSection();
} else {
unread(ch);
readComment(null);
}
} else if (ch == '?') {
readPI();
} else {
Tag tag = readTag(ch);
if (_documentHandler != null) {
_documentHandler.handleTag(tag);
}
}
} else {
readCharacters(ch);
}
}
if (_documentHandler != null) {
_documentHandler.endDocument();
}
}
public Location getLocation()
{
return new Location(_url, _startLine, _startColumn);
}
public void parse(DocumentHandler documentHandler, InputSource source) throws IOException
{
parse(documentHandler, source.getURL(), source.getInputStream());
}
public void parse(DocumentHandler documentHandler, URL url, InputStream input) throws IOException
{
_documentHandler = documentHandler;
_url = url;
try {
_inputStream = new PushbackInputStream(input);
dispatchElements();
_inputStream.close();
} catch (IOException exception) {
if (_inputStream != null) {
_inputStream.close();
}
exception.fillInStackTrace();
throw exception;
} finally {
_inputStream = null;
}
}
public static String getAttributeValue(String data, String attribute)
{
int length = data.length();
int attrLength = attribute.length();
int i = data.indexOf(" " + attribute + "=");
if ((i > 0) && ((i + attrLength + 2) < length)) {
StringBuffer buffer = new StringBuffer();
char ch;
i += attrLength + 2;
char quote = data.charAt(i);
if ((quote == '\'') || (quote == '"')) {
i++;
while((i < length) && ((ch = data.charAt(i++)) != quote)) {
buffer.append(ch);
}
} else {
while((i < length) && !Character.isWhitespace(ch = data.charAt(i++))) {
buffer.append(ch);
}
}
return buffer.toString();
} else {
return null;
}
}
}