/*
* Copyright (C) Chaperon. All rights reserved.
* -------------------------------------------------------------------------
* This software is published under the terms of the Apache Software License
* version 1.1, a copy of which has been included with this distribution in
* the LICENSE file.
*/
package net.sourceforge.chaperon.process;
import net.sourceforge.chaperon.common.Decoder;
import net.sourceforge.chaperon.common.IntegerList;
import org.apache.commons.logging.Log;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.ext.LexicalHandler;
import org.xml.sax.helpers.AttributesImpl;
import org.xml.sax.helpers.LocatorImpl;
import java.util.Stack;
/**
* This class represents a simulation of a pushdown automata using the parser automaton class.
*
* @author <a href="mailto:stephan@apache.org">Stephan Michels </a>
* @version CVS $Id: ParserProcessor.java,v 1.28 2004/01/08 11:30:52 benedikta Exp $
*/
public class ParserProcessor implements ContentHandler, LexicalHandler
{
/** Namespace for the generated SAX events. */
public static final String NS_OUTPUT = "http://chaperon.sourceforge.net/schema/syntaxtree/1.0";
public static final String OUTPUT = "output";
public static final String ERROR = "error";
private boolean flatten = false;
private boolean localizable = false;
private String source;
private int lineNumber;
private int columnNumber;
private ContentHandler contentHandler = null;
private LexicalHandler lexicalHandler = null;
private Locator locator = null;
private LocatorImpl locatorImpl = null;
private static final int STATE_OUTSIDE = 0;
private static final int STATE_INSIDE = 1;
private static final int STATE_LEXEME = 2;
private static final int STATE_GROUP = 3;
private static final int STATE_ERROR = 4;
private int state = STATE_OUTSIDE;
private ParserAutomaton automaton;
private IntegerList statestack = new IntegerList();
private Stack treestack = new Stack();
private Log log;
private StringBuffer lineSnippet = new StringBuffer();
private boolean unrecoverable = false;
private ParseException exception = null;
/**
* Create a new parser processor.
*/
public ParserProcessor() {}
/**
* Create a new parser processor.
*
* @param automaton Parser automaton, which the processor should ues.
* @param handler Handler, which should receives the parser events.
* @param log Log, which should used.
*/
public ParserProcessor(ParserAutomaton automaton, Log log)
{
this.automaton = automaton;
this.log = log;
}
/**
* Set the parser automaton for the processor.
*
* @param automaton Parser automaton.
*/
public void setParserAutomaton(ParserAutomaton automaton)
{
this.automaton = automaton;
}
/**
* Set the <code>ContentHandler</code> that will receive XML data.
*/
public void setContentHandler(ContentHandler handler)
{
this.contentHandler = handler;
}
/**
* Set the <code>LexicalHandler</code> that will receive XML data.
*/
public void setLexicalHandler(LexicalHandler handler)
{
this.lexicalHandler = handler;
}
/**
* Provide processor with a log.
*
* @param log The log.
*/
public void setLog(Log log)
{
this.log = log;
}
/**
* If <code>true</code>, the line and column number information are let in the XML output for
* each token.
*
* @param localizable If the XML may be localizable.
*/
public void setLocalizable(boolean localizable)
{
this.localizable = localizable;
}
/**
* If the adapter should produce a more flatten XML hirachy, which means elements which the same
* name will be collapsed
*
* @param flatten True, if a more flatten hirachy should be produced.
*/
public void setFlatten(boolean flatten)
{
this.flatten = flatten;
}
/**
* Receive an object for locating the origin of SAX document events.
*/
public void setDocumentLocator(Locator locator)
{
this.locator = locator;
this.locatorImpl = null;
if (locator!=null)
{
this.locatorImpl = new LocatorImpl(locator);
contentHandler.setDocumentLocator(locatorImpl);
}
}
/**
* Receive notification of the beginning of a document.
*/
public void startDocument() throws SAXException
{
contentHandler.startDocument();
state = STATE_OUTSIDE;
}
/**
* Receive notification of the end of a document.
*/
public void endDocument() throws SAXException
{
if (state==STATE_OUTSIDE)
contentHandler.endDocument();
}
/**
* Receive notification of the beginning of an element.
*/
public void startElement(String namespaceURI, String localName, String qName, Attributes atts)
throws SAXException
{
if (state==STATE_OUTSIDE)
{
if ((namespaceURI!=null) && (namespaceURI.equals(LexicalProcessor.NS_OUTPUT)) &&
(localName.equals(LexicalProcessor.OUTPUT)))
{
handleStartDocument();
state = STATE_INSIDE;
if (atts.getValue("source")!=null)
source = atts.getValue("source");
else if (locator!=null)
source = locator.getSystemId();
else
source = "unknown";
}
else
contentHandler.startElement(namespaceURI, localName, qName, atts);
}
else if (state==STATE_INSIDE)
{
if ((namespaceURI!=null) && (namespaceURI.equals(LexicalProcessor.NS_OUTPUT)) &&
(localName.equals(LexicalProcessor.LEXEME)))
{
if (atts.getValue("column")!=null)
columnNumber = Integer.parseInt(atts.getValue("column"));
else if (locator!=null)
columnNumber = locator.getColumnNumber();
else
columnNumber = 1;
if (atts.getValue("line")!=null)
lineNumber = Integer.parseInt(atts.getValue("line"));
else if (locator!=null)
lineNumber = locator.getLineNumber();
else
lineNumber = 1;
handleLexeme(atts.getValue("symbol"), atts.getValue("text"));
state = STATE_LEXEME;
}
else if ((namespaceURI!=null) && (namespaceURI.equals(LexicalProcessor.NS_OUTPUT)) &&
(localName.equals(ERROR)))
{
if (atts.getValue("column")!=null)
columnNumber = Integer.parseInt(atts.getValue("column"));
else if (locator!=null)
columnNumber = locator.getColumnNumber();
else
columnNumber = 1;
if (atts.getValue("line")!=null)
lineNumber = Integer.parseInt(atts.getValue("line"));
else if (locator!=null)
lineNumber = locator.getLineNumber();
else
lineNumber = 1;
handleLexeme("error", atts.getValue("text"));
state = STATE_ERROR;
}
else
throw new SAXException("Unexpected start element.");
}
else if (state==STATE_LEXEME)
{
if ((namespaceURI!=null) && (namespaceURI.equals(LexicalProcessor.NS_OUTPUT)) &&
(localName.equals(LexicalProcessor.GROUP)))
state = STATE_GROUP;
else
throw new SAXException("Unexpected start element.");
}
else if ((state==STATE_ERROR) || (state==STATE_GROUP))
throw new SAXException("Unexpected start element.");
}
/**
* Receive notification of the end of an element.
*/
public void endElement(String namespaceURI, String localName, String qName)
throws SAXException
{
if (state==STATE_OUTSIDE)
contentHandler.endElement(namespaceURI, localName, qName);
else if (state==STATE_INSIDE)
{
if ((namespaceURI!=null) && (namespaceURI.equals(LexicalProcessor.NS_OUTPUT)) &&
(localName.equals(LexicalProcessor.OUTPUT)))
{
contentHandler.startPrefixMapping("", NS_OUTPUT);
contentHandler.startElement(NS_OUTPUT, OUTPUT, OUTPUT, new AttributesImpl());
handleEndDocument();
contentHandler.endElement(NS_OUTPUT, OUTPUT, OUTPUT);
contentHandler.endPrefixMapping("");
state = STATE_OUTSIDE;
}
else
throw new SAXException("Unexpected end element.");
}
else if ((state==STATE_LEXEME) || (state==STATE_ERROR))
state = STATE_INSIDE;
else if (state==STATE_GROUP)
state = STATE_LEXEME;
}
/**
* Receive notification of character data.
*/
public void characters(char[] ch, int start, int length)
throws SAXException
{
if (state==STATE_OUTSIDE)
contentHandler.characters(ch, start, length);
}
/**
* Receive notification of ignorable whitespace in element content.
*/
public void ignorableWhitespace(char[] ch, int start, int length)
throws SAXException
{
if (state==STATE_OUTSIDE)
contentHandler.ignorableWhitespace(ch, start, length);
}
/**
* Begin the scope of a prefix-URI Namespace mapping.
*/
public void startPrefixMapping(String prefix, String uri)
throws SAXException
{
contentHandler.startPrefixMapping(prefix, uri);
}
/**
* End the scope of a prefix-URI mapping.
*/
public void endPrefixMapping(String prefix) throws SAXException
{
contentHandler.endPrefixMapping(prefix);
}
/**
* Receive notification of a processing instruction.
*/
public void processingInstruction(String target, String data)
throws SAXException
{
if (locatorImpl!=null)
{
locatorImpl.setLineNumber(locator.getLineNumber());
locatorImpl.setColumnNumber(locator.getColumnNumber());
}
if (state==STATE_OUTSIDE)
contentHandler.processingInstruction(target, data);
}
/**
* Receive notification of a skipped entity.
*/
public void skippedEntity(String name) throws SAXException
{
if (locatorImpl!=null)
{
locatorImpl.setLineNumber(locator.getLineNumber());
locatorImpl.setColumnNumber(locator.getColumnNumber());
}
if (state==STATE_OUTSIDE)
contentHandler.skippedEntity(name);
}
/**
* Report the start of DTD declarations, if any.
*/
public void startDTD(String name, String publicId, String systemId)
throws SAXException
{
if (lexicalHandler!=null)
lexicalHandler.startDTD(name, publicId, systemId);
}
/**
* Report the end of DTD declarations.
*/
public void endDTD() throws SAXException
{
if (lexicalHandler!=null)
lexicalHandler.endDTD();
}
/**
* Report the beginning of an entity.
*/
public void startEntity(String name) throws SAXException
{
if (lexicalHandler!=null)
lexicalHandler.startEntity(name);
}
/**
* Report the end of an entity.
*/
public void endEntity(String name) throws SAXException
{
if (lexicalHandler!=null)
lexicalHandler.endEntity(name);
}
/**
* Report the start of a CDATA section.
*/
public void startCDATA() throws SAXException
{
if (lexicalHandler!=null)
lexicalHandler.startCDATA();
}
/**
* Report the end of a CDATA section.
*/
public void endCDATA() throws SAXException
{
if (lexicalHandler!=null)
lexicalHandler.endCDATA();
}
/**
* Report an XML comment anywhere in the document.
*/
public void comment(char[] ch, int start, int len) throws SAXException
{
if (lexicalHandler!=null)
lexicalHandler.comment(ch, start, len);
}
private String getLocation()
{
if (locator==null)
return "unknown";
return locator.getSystemId()+":"+locator.getLineNumber()+":"+locator.getColumnNumber();
}
/**
* Receives the notification, that the lexical processor starts reading a new document.
*
* @throws Exception If a exception occurs.
*/
private void handleStartDocument()
{
statestack.clear();
statestack.push(0); // First state is zero
treestack.clear();
lineSnippet = new StringBuffer();
unrecoverable = false;
exception = null;
}
/**
* Receives the notification, that the lexical processor has recognized a lexeme.
*
* @param symbol Symbol of the lexeme.
* @param text Recognized text.
*
* @throws Exception If a exception occurs.
*/
private void handleLexeme(String symbolname, String text)
throws SAXException
{
if (unrecoverable)
return;
int symbol = -1; // Index of the token from the input
for (int i = 0; (i<automaton.getTerminalCount()) && (symbol==-1); i++)
if (automaton.getTerminal(i).equals(symbolname))
symbol = i;
int state = statestack.peek();
if (symbol==-1)
{
for (int i = 0; (i<automaton.getTerminalCount()) && (symbol==-1); i++)
if (automaton.getTerminal(i).equals("error"))
symbol = i;
if (symbol==-1)
{
if ((log!=null) && (log.isDebugEnabled()))
log.debug("State "+state+" unexpected token "+Decoder.toString(text)+"("+symbolname+")");
StringBuffer message = new StringBuffer();
message.append("Unexpected token ");
message.append(symbolname);
message.append("[\"");
message.append(text);
message.append("\"], expected tokens: ");
for (symbol = 0; symbol<automaton.getTerminalCount(); symbol++)
if (!automaton.isErrorAction(state, symbol))
{
if (symbol>0)
message.append(", ");
message.append(automaton.getTerminal(symbol));
}
unrecoverable = true;
exception =
new ParseException(message.toString(), symbolname, text, lineSnippet.toString(), source,
lineNumber, columnNumber);
return;
}
else
symbolname = "error";
}
/* ============================ Reduce =================================== */
while (automaton.isReduceAction(state, symbol))
{
int production = automaton.getReduceProduction(state, symbol);
if ((log!=null) && (log.isDebugEnabled()))
log.debug("State "+state+" reduce "+
automaton.getNonterminal(automaton.getProductionSymbol(production))+" ("+
production+")");
ProductionNode productionnode =
new ProductionNode(automaton.getNonterminal(automaton.getProductionSymbol(production)));
TreeNode node = null;
for (int i = 0; i<automaton.getProductionLength(production); i++)
{
statestack.pop();
productionnode.insert((node = (TreeNode)treestack.pop()));
}
if (node!=null)
{
productionnode.linenumber = node.linenumber;
productionnode.columnnumber = node.columnnumber;
}
treestack.push(productionnode);
statestack.push(automaton.getTransition(statestack.peek(),
automaton.getProductionSymbol(production)));
state = statestack.peek();
}
/* ================================== Error =================================== */
if (automaton.isErrorAction(state, symbol))
{
if ((log!=null) && (log.isDebugEnabled()))
log.debug("State "+state+" error token "+Decoder.toString(text)+"("+symbolname+")");
StringBuffer errortext = new StringBuffer();
// Remove states from stack, until error token is found
while ((statestack.getCount()>1) &&
(((automaton.isErrorAction(state, symbol)) &&
(automaton.getErrorTransition(state, symbol)==0)) &&
(!automaton.isShiftAction(state, symbol))))
{
statestack.pop();
TreeNode node = (TreeNode)treestack.pop();
errortext.insert(0, node.getText());
state = statestack.peek();
}
if (((!symbolname.equals("error")) && (!automaton.isErrorAction(state, symbol))) ||
((symbolname.equals("error")) && (!automaton.isShiftAction(state, symbol))))
throw new SAXException("Couldn't accept input "+symbolname+"["+Decoder.toString(text)+
"] at "+getLocation());
if (automaton.isErrorAction(state, symbol))
statestack.push(automaton.getErrorTransition(state, symbol));
else
statestack.push(automaton.getShiftTransition(state, symbol));
state = statestack.peek();
if (automaton.isErrorAction(state, symbol))
errortext.append(text);
// push error token on top of the stack
TokenNode tokennode = new TokenNode("error", errortext.toString());
if (locator!=null)
{
tokennode.linenumber = lineNumber;
tokennode.columnnumber = columnNumber;
}
treestack.push(tokennode);
if (automaton.isErrorAction(state, symbol))
return;
}
/* ============================ Reduce =================================== */
while (automaton.isReduceAction(state, symbol))
{
int production = automaton.getReduceProduction(state, symbol);
if ((log!=null) && (log.isDebugEnabled()))
log.debug("State "+state+" reduce "+
automaton.getNonterminal(automaton.getProductionSymbol(production))+" ("+
production+")");
ProductionNode productionnode =
new ProductionNode(automaton.getNonterminal(automaton.getProductionSymbol(production)));
TreeNode node = null;
for (int i = 0; i<automaton.getProductionLength(production); i++)
{
statestack.pop();
productionnode.insert((node = (TreeNode)treestack.pop()));
}
if (node!=null)
{
productionnode.linenumber = node.linenumber;
productionnode.columnnumber = node.columnnumber;
}
treestack.push(productionnode);
statestack.push(automaton.getTransition(statestack.peek(),
automaton.getProductionSymbol(production)));
state = statestack.peek();
}
/* ==================================== Shift =================================== */
if (automaton.isShiftAction(state, symbol))
{
if ((log!=null) && (log.isDebugEnabled()))
log.debug("State "+state+" shift token "+symbolname+" ("+symbol+")");
statestack.push(automaton.getShiftTransition(state, symbol));
TokenNode tokennode = new TokenNode(symbolname, text);
if (locator!=null)
{
tokennode.linenumber = lineNumber;
tokennode.columnnumber = columnNumber;
}
treestack.push(tokennode);
if ((text.lastIndexOf("\n")>=0) || (text.lastIndexOf("\r")>=0))
{
lineSnippet = new StringBuffer();
lineSnippet.append(text.substring(Math.max(text.lastIndexOf("\n"), text.lastIndexOf("\r"))));
}
else
lineSnippet.append(text);
}
}
/**
* Receives the notification, that the lexical processor accepted the complete document, and
* stops with reading.
*
* @throws Exception If a exception occurs.
*/
private void handleEndDocument() throws SAXException
{
if (unrecoverable)
{
fireException();
return;
}
int state = statestack.peek();
/* ============================ Error =================================== */
if (automaton.isErrorAction(state))
{
if ((log!=null) && (log.isDebugEnabled()))
log.debug("State "+state+" error unexpected end of file");
StringBuffer errortext = new StringBuffer();
// Remove states from stack, until error token is found
while ((statestack.getCount()>1) && (automaton.getErrorTransition(state)==0))
{
statestack.pop();
TreeNode node = (TreeNode)treestack.pop();
errortext.insert(0, node.getText());
state = statestack.peek();
}
// push error token on top of the stack
statestack.push(automaton.getErrorTransition(state));
state = statestack.peek();
// if not transition possible, then ignore terminal by error token
if (automaton.isErrorAction(state))
throw new SAXException("Couldn't accept end of document at "+getLocation());
TokenNode tokennode = new TokenNode("error", errortext.toString());
if (locator!=null)
{
tokennode.linenumber = lineNumber;
tokennode.columnnumber = columnNumber;
}
treestack.push(tokennode);
}
/* ============================ Reduce & Accept =================================== */
while (automaton.isReduceAction(state) || automaton.isAcceptAction(state))
{
int production = automaton.getReduceProduction(state);
if ((log!=null) && (log.isDebugEnabled()))
log.debug("State "+state+" reduce "+
automaton.getNonterminal(automaton.getProductionSymbol(production))+" ("+
production+")");
ProductionNode productionnode =
new ProductionNode(automaton.getNonterminal(automaton.getProductionSymbol(production)));
TreeNode node = null;
for (int i = 0; i<automaton.getProductionLength(production); i++)
{
statestack.pop();
productionnode.insert((node = (TreeNode)treestack.pop()));
}
if (node!=null)
{
productionnode.linenumber = node.linenumber;
productionnode.columnnumber = node.columnnumber;
}
treestack.push(productionnode);
/* ================================== Accept =================================== */
if ((automaton.isAcceptAction(state)) && (statestack.getCount()==1))
{
if ((log!=null) && (log.isDebugEnabled()))
log.debug("State "+state+" accept");
if (locatorImpl!=null)
locatorImpl.setSystemId(source);
fireEvents(productionnode);
return;
}
else
statestack.push(automaton.getTransition(statestack.peek(),
automaton.getProductionSymbol(production)));
state = statestack.peek();
}
if ((automaton.isErrorAction(state)) && (statestack.getCount()>1))
{
if ((log!=null) && (log.isDebugEnabled()))
log.debug("State "+state+" error unexpected end of file");
StringBuffer message = new StringBuffer();
message.append("Unexpected end of file, expected tokens: ");
for (int i = 0; i<automaton.getTerminalCount(); i++)
if (!automaton.isErrorAction(state, i))
{
message.append(automaton.getTerminal(i));
message.append(" ");
}
exception =
new ParseException(message.toString(), "", "", lineSnippet.toString(), source, lineNumber,
columnNumber);
fireException();
}
}
/**
* Fire the SAX events by traverseing the hirachy.
*
* @param node Current node.
*/
private void fireEvents(TreeNode node) throws SAXException
{
Stack stack = new Stack();
ProductionNode previous = null;
TreeNode next = node;
do
{
while (next!=null)
{
stack.push(next);
if (locatorImpl!=null)
{
locatorImpl.setLineNumber(next.linenumber);
locatorImpl.setColumnNumber(next.columnnumber);
}
if ((!flatten) || (previous==null) || (!previous.symbol.equals(next.symbol)))
{
AttributesImpl atts = new AttributesImpl();
if (localizable)
{
atts.addAttribute("", "line", "line", "CDATA", String.valueOf(next.linenumber));
atts.addAttribute("", "column", "column", "CDATA", String.valueOf(next.columnnumber));
}
contentHandler.startElement(NS_OUTPUT, next.symbol, next.symbol, atts);
}
if (next instanceof ProductionNode)
{
ProductionNode production = (ProductionNode)next;
previous = production;
next = production.firstchild;
}
else
{
TokenNode token = (TokenNode)next;
contentHandler.characters(token.text.toCharArray(), 0, token.text.length());
next = null;
}
}
next = (TreeNode)stack.pop();
previous = stack.isEmpty() ? null : (ProductionNode)stack.peek();
if (locatorImpl!=null)
{
locatorImpl.setLineNumber(next.linenumber);
locatorImpl.setColumnNumber(next.columnnumber);
}
if ((!flatten) || (previous==null) || (!previous.symbol.equals(next.symbol)))
contentHandler.endElement(NS_OUTPUT, next.symbol, next.symbol);
next = next.nextsibling;
}
while (!stack.isEmpty());
}
private void fireException() throws SAXException
{
AttributesImpl atts = new AttributesImpl();
atts.addAttribute("", "symbol", "symbol", "CDATA", exception.getSymbol());
atts.addAttribute("", "text", "text", "CDATA", exception.getText());
atts.addAttribute("", "line-snippet", "line-snippet", "CDATA", exception.getLineSnippet());
atts.addAttribute("", "localized", "localized", "CDATA", String.valueOf(exception.isLocalized()));
atts.addAttribute("", "line-number", "line-number", "CDATA",
String.valueOf(exception.getLineNumber()));
atts.addAttribute("", "column-number", "column-number", "CDATA",
String.valueOf(exception.getColumnNumber()));
contentHandler.startElement(NS_OUTPUT, "exception", "exception", atts);
contentHandler.endElement(NS_OUTPUT, "exception", "exception");
}
private abstract class TreeNode
{
public String symbol = null;
public int linenumber = 1;
public int columnnumber = 1;
public TreeNode previoussibling = null;
public TreeNode nextsibling = null;
public abstract String getText();
public String toString()
{
return symbol;
}
}
private class TokenNode extends TreeNode
{
public TokenNode(String symbol, String text)
{
this.symbol = symbol;
this.text = text;
}
public String text = null;
public String getText()
{
return text+((nextsibling!=null) ? nextsibling.getText() : "");
}
}
private class ProductionNode extends TreeNode
{
public ProductionNode(String symbol)
{
this.symbol = symbol;
}
public TreeNode firstchild = null;
public TreeNode lastchild = null;
public void insert(TreeNode node)
{
if (firstchild==null)
{
firstchild = node;
lastchild = node;
}
else
{
firstchild.previoussibling = node;
node.nextsibling = firstchild;
firstchild = node;
}
}
public void insertChilds(ProductionNode production)
{
if (firstchild==null)
{
firstchild = production.firstchild;
lastchild = production.lastchild;
}
else
{
firstchild.previoussibling = production.lastchild;
production.lastchild.nextsibling = firstchild;
firstchild = production.firstchild;
}
}
public String getText()
{
return ((firstchild!=null) ? firstchild.getText() : "")+
((nextsibling!=null) ? nextsibling.getText() : "");
}
}
}