/*
* Copyright (C) Chaperon. All rights reserved.
* -------------------------------------------------------------------------
* This software is published under the terms of the Apache Software License
* version 1.1, a copy of which has been included with this distribution in
* the LICENSE file.
*/
package net.sourceforge.chaperon.process.extended;
import net.sourceforge.chaperon.common.Decoder;
import net.sourceforge.chaperon.model.Violations;
import net.sourceforge.chaperon.model.extended.ExtendedGrammar;
import net.sourceforge.chaperon.model.extended.Pattern;
import net.sourceforge.chaperon.model.extended.PatternIterator;
import org.apache.commons.logging.Log;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.ext.LexicalHandler;
import org.xml.sax.helpers.AttributesImpl;
import org.xml.sax.helpers.LocatorImpl;
import java.util.Stack;
/**
* This class represents a simulation of a pushdown automata using the parser automaton class.
*
* @author <a href="mailto:stephan@apache.org">Stephan Michels</a>
* @version CVS $Id: ExtendedDirectParserProcessor.java,v 1.12 2004/01/09 10:34:51 benedikta Exp $
*/
public class ExtendedDirectParserProcessor implements ContentHandler, LexicalHandler
{
private static final String NS = "http://chaperon.sourceforge.net/schema/text/1.0";
private static final String TEXT = "text";
/** Namespace for the generated SAX events. */
public static final String NS_OUTPUT = "http://chaperon.sourceforge.net/schema/syntaxtree/2.0";
private static final String OUTPUT = "output";
// SAX variables
private ContentHandler contentHandler = null;
private LexicalHandler lexicalHandler = null;
private Locator locator = null;
private LocatorImpl locatorImpl = null;
// State of SAX events
private static final int STATE_OUTER = 0;
private static final int STATE_INNER = 1;
private int state = STATE_OUTER;
// Help variables
private ExtendedGrammar grammar;
private boolean flatten = false;
private StackNodeSet current = new StackNodeSet();
private StackNodeSet next = new StackNodeSet();
private Log log;
private StackNodeList root;
private int line = 1;
private int column = 1;
private static final int MAXWATCHDOG = 1000;
/**
* Create a new parser processor.
*/
public ExtendedDirectParserProcessor() {}
/**
* Create a new parser processor.
*
* @param automaton Parser automaton, which the processor should ues.
* @param handler Handler, which should receives the parser events.
* @param log Log, which should used.
*/
public ExtendedDirectParserProcessor(ExtendedGrammar grammar, Log log)
{
setExtendedGrammar(grammar);
this.log = log;
}
/**
* Set the parser automaton for the processor.
*
* @param automaton Parser automaton.
*/
public void setExtendedGrammar(ExtendedGrammar grammar)
{
this.grammar = grammar;
Violations violations = grammar.validate();
if ((violations!=null) && (violations.getViolationCount()>0))
throw new IllegalArgumentException("Grammar is not valid: "+violations.getViolation(0));
if ((log!=null) && (log.isDebugEnabled()))
log.debug("grammar:\n"+grammar);
grammar.update();
if ((log!=null) && (log.isDebugEnabled()))
{
StringBuffer buffer = new StringBuffer();
buffer.append("Successors:\n");
for(PatternIterator i=grammar.getAllPattern().getPattern(); i.hasNext();)
{
Pattern pattern = i.next();
if (pattern.getSuccessors().hasNext())
{
buffer.append(pattern+"->{");
for(PatternIterator j=pattern.getSuccessors(); j.hasNext();)
{
buffer.append(j.next());
if (j.hasNext())
buffer.append(",");
}
buffer.append("}\n");
}
}
buffer.append("\nAscending successors:\n");
for(PatternIterator i=grammar.getAllPattern().getPattern(); i.hasNext();)
{
Pattern pattern = i.next();
if (pattern.getAscendingSuccessors().hasNext())
{
buffer.append(pattern+"->{");
for(PatternIterator j=pattern.getAscendingSuccessors(); j.hasNext();)
{
buffer.append(j.next());
if (j.hasNext())
buffer.append(",");
}
buffer.append("}\n");
}
}
buffer.append("\nDescending successors:\n");
for(PatternIterator i=grammar.getAllPattern().getPattern(); i.hasNext();)
{
Pattern pattern = i.next();
if (pattern.getDescendingSuccessors().hasNext())
{
buffer.append(pattern+"->{");
for(PatternIterator j=pattern.getDescendingSuccessors(); j.hasNext();)
{
buffer.append(j.next());
if (j.hasNext())
buffer.append(",");
}
buffer.append("}\n");
}
}
log.debug(buffer.toString());
}
}
/**
* Set the <code>ContentHandler</code> that will receive XML data.
*/
public void setContentHandler(ContentHandler handler)
{
this.contentHandler = handler;
}
/**
* Set the <code>LexicalHandler</code> that will receive XML data.
*/
public void setLexicalHandler(LexicalHandler handler)
{
this.lexicalHandler = handler;
}
/**
* Provide processor with a log.
*
* @param log The log.
*/
public void setLog(Log log)
{
this.log = log;
}
/**
* If the adapter should produce a more flatten XML hirachy, which means elements which the same
* name will be collapsed
*
* @param flatten True, if a more flatten hirachy should be produced.
*/
public void setFlatten(boolean flatten)
{
this.flatten = flatten;
}
/**
* Receive an object for locating the origin of SAX document events.
*/
public void setDocumentLocator(Locator locator)
{
this.locator = locator;
if (locator!=null)
{
this.locatorImpl = new LocatorImpl(locator);
contentHandler.setDocumentLocator(locatorImpl);
}
}
/**
* Receive notification of the beginning of a document.
*/
public void startDocument() throws SAXException
{
locatorImpl.setLineNumber(locator.getLineNumber());
locatorImpl.setColumnNumber(locator.getColumnNumber());
contentHandler.startDocument();
state = STATE_OUTER;
}
/**
* Receive notification of the beginning of an element.
*/
public void startElement(String namespaceURI, String localName, String qName, Attributes atts)
throws SAXException
{
locatorImpl.setLineNumber(locator.getLineNumber());
locatorImpl.setColumnNumber(locator.getColumnNumber());
if (state==STATE_INNER)
throw new SAXException("Unexpected element "+qName);
if (state==STATE_OUTER)
{
if ((namespaceURI!=null) && (namespaceURI.equals(NS)))
{
if (!localName.equals(TEXT))
throw new SAXException("Unknown element "+qName);
}
else
{
contentHandler.startElement(namespaceURI, localName, qName, atts);
return;
}
}
state = STATE_INNER;
// ======================= Start Text Document =======================
current.clear();
current.push(new TerminalStackNode(null, 0, grammar.getStartPattern(), null));
next.clear();
line = 1;
column = 1;
}
/**
* Receive notification of character data.
*/
public void characters(char[] text, int textstart, int textlength)
throws SAXException
{
locatorImpl.setLineNumber(locator.getLineNumber());
locatorImpl.setColumnNumber(locator.getColumnNumber());
if (state==STATE_OUTER)
{
contentHandler.characters(text, textstart, textlength);
return;
}
for (int position = textstart; position<(textstart+textlength); position++)
{
if ((log!=null) && (log.isDebugEnabled()))
log.debug("===================================\nProcess "+Decoder.toChar(text[position]));
if (current.isEmpty())
throw new IllegalStateException("Parsing process is aborted");
if ((log!=null) && (log.isDebugEnabled()))
log.debug(getStatesAsString());
while (!current.isEmpty())
{
StackNode node = current.pop();
for (PatternIterator nextPattern = node.pattern.getDescendingSuccessors();
nextPattern.hasNext();)
if (nextPattern.next().contains(text[position]))
{
reduce(node.pattern.getDefinition().getSymbol(), node, null);
break;
}
reduceEmpty(node);
shift(node, text, position);
if ((current.watchdog>MAXWATCHDOG) || (next.watchdog>MAXWATCHDOG))
{
if ((log!=null) && (log.isInfoEnabled()))
log.info(getStatesAsString());
throw new IllegalStateException("Aborted parsing because of a high ambiguous grammar"+
" ["+line+":"+column+"]");
}
}
if ((log!=null) && (log.isDebugEnabled()))
log.debug(getStatesAsString());
if (next.isEmpty())
{
if ((log!=null) && (log.isInfoEnabled()))
log.info(getStatesAsString());
throw new IllegalArgumentException("Character '"+text[position]+"' is not expected"+" ["+
line+":"+column+"]");
}
swapStacks();
increasePosition(text, position, position+1);
}
}
/**
* Receive notification of the end of an element.
*/
public void endElement(String namespaceURI, String localName, String qName)
throws SAXException
{
locatorImpl.setLineNumber(locator.getLineNumber());
locatorImpl.setColumnNumber(locator.getColumnNumber());
if (state==STATE_OUTER)
{
contentHandler.endElement(namespaceURI, localName, qName);
return;
}
if (state==STATE_INNER)
{
if ((namespaceURI!=null) && (namespaceURI.equals(NS)))
{
if (!localName.equals(TEXT))
throw new SAXException("Unknown element "+qName);
}
else
throw new SAXException("Unexpected element "+qName);
}
state = STATE_OUTER;
// ======================= End Text Document =======================
if ((log!=null) && (log.isDebugEnabled()))
log.debug("===================================\nProcess end of text");
root = null;
Pattern eot = grammar.getEndPattern();
while (!current.isEmpty())
{
StackNode node = current.pop();
for (PatternIterator nextPattern = node.pattern.getDescendingSuccessors();
nextPattern.hasNext();)
if (nextPattern.next()==eot)
{
reduce(node.pattern.getDefinition().getSymbol(), node, null);
break;
}
reduceEmpty(node);
if ((current.watchdog>MAXWATCHDOG) || (next.watchdog>MAXWATCHDOG))
{
if ((log!=null) && (log.isInfoEnabled()))
log.info(getStatesAsString());
throw new IllegalStateException("Aborted parsing because of a high ambiguous grammar"+" ["+
line+":"+column+"]");
}
}
if ((log!=null) && (log.isDebugEnabled()))
log.debug(getStatesAsString());
if (root==null)
{
if ((log!=null) && (log.isInfoEnabled()))
log.info(getStatesAsString());
throw new IllegalStateException("Unexpected end of text"+" ["+line+":"+column+"]");
}
fireEvents();
}
/**
* Receive notification of ignorable whitespace in element content.
*/
public void ignorableWhitespace(char[] ch, int start, int length)
throws SAXException
{
locatorImpl.setLineNumber(locator.getLineNumber());
locatorImpl.setColumnNumber(locator.getColumnNumber());
if (state==STATE_OUTER)
contentHandler.ignorableWhitespace(ch, start, length);
}
/**
* Begin the scope of a prefix-URI Namespace mapping.
*/
public void startPrefixMapping(String prefix, String uri)
throws SAXException
{
locatorImpl.setLineNumber(locator.getLineNumber());
locatorImpl.setColumnNumber(locator.getColumnNumber());
contentHandler.startPrefixMapping(prefix, uri);
}
/**
* End the scope of a prefix-URI mapping.
*/
public void endPrefixMapping(String prefix) throws SAXException
{
locatorImpl.setLineNumber(locator.getLineNumber());
locatorImpl.setColumnNumber(locator.getColumnNumber());
contentHandler.endPrefixMapping(prefix);
}
/**
* Receive notification of a processing instruction.
*/
public void processingInstruction(String target, String data)
throws SAXException
{
locatorImpl.setLineNumber(locator.getLineNumber());
locatorImpl.setColumnNumber(locator.getColumnNumber());
if (state==STATE_OUTER)
contentHandler.processingInstruction(target, data);
}
/**
* Receive notification of a skipped entity.
*/
public void skippedEntity(String name) throws SAXException
{
locatorImpl.setLineNumber(locator.getLineNumber());
locatorImpl.setColumnNumber(locator.getColumnNumber());
if (state==STATE_OUTER)
contentHandler.skippedEntity(name);
}
/**
* Receive notification of the end of a document.
*/
public void endDocument() throws SAXException
{
locatorImpl.setLineNumber(locator.getLineNumber());
locatorImpl.setColumnNumber(locator.getColumnNumber());
contentHandler.endDocument();
}
/**
* Report the start of DTD declarations, if any.
*/
public void startDTD(String name, String publicId, String systemId)
throws SAXException
{
lexicalHandler.startDTD(name, publicId, systemId);
}
/**
* Report the end of DTD declarations.
*/
public void endDTD() throws SAXException
{
lexicalHandler.endDTD();
}
/**
* Report the beginning of an entity.
*/
public void startEntity(String name) throws SAXException
{
if (lexicalHandler!=null)
lexicalHandler.startEntity(name);
}
/**
* Report the end of an entity.
*/
public void endEntity(String name) throws SAXException
{
if (lexicalHandler!=null)
lexicalHandler.endEntity(name);
}
/**
* Report the start of a CDATA section.
*/
public void startCDATA() throws SAXException
{
if (lexicalHandler!=null)
lexicalHandler.startCDATA();
}
/**
* Report the end of a CDATA section.
*/
public void endCDATA() throws SAXException
{
if (lexicalHandler!=null)
lexicalHandler.endCDATA();
}
/**
* Report an XML comment anywhere in the document.
*/
public void comment(char[] ch, int start, int len) throws SAXException
{
if (lexicalHandler!=null)
lexicalHandler.comment(ch, start, len);
}
private String getStatesAsString()
{
StringBuffer buffer = new StringBuffer();
buffer.append("current:\n");
buffer.append(current);
buffer.append(current.dump());
buffer.append("Count of states:");
buffer.append(current.size());
buffer.append("\nnext:\n");
buffer.append(next);
buffer.append(next.dump());
buffer.append("Count of states:");
buffer.append(next.size());
return buffer.toString();
}
private void swapStacks()
{
StackNodeSet dummy = next;
next = current;
current = dummy;
next.clear();
}
private void shift(StackNode node, char[] text, int position)
{
for (PatternIterator i = node.pattern.getSuccessors(); i.hasNext();)
{
Pattern nextPattern = i.next();
if (nextPattern.contains(text[position]))
{
if (node instanceof NonterminalStackNode)
{
for (PatternIterator j = node.last.pattern.getSuccessors(); j.hasNext();)
if (j.next().contains(text[position]))
return;
for (PatternIterator j = node.last.pattern.getAscendingSuccessors(); j.hasNext();)
if (j.next().contains(text[position]))
return;
}
StackNode newNode = new TerminalStackNode(text, position, nextPattern, node);
if ((log!=null) && (log.isDebugEnabled()))
log.debug("shift "+newNode);
next.push(newNode);
}
}
for (PatternIterator i = node.pattern.getAscendingSuccessors(); i.hasNext();)
{
Pattern firstPattern = i.next();
if (firstPattern.contains(text[position]))
{
if (node instanceof NonterminalStackNode)
{
for (PatternIterator j = node.last.pattern.getSuccessors(); j.hasNext();)
if (j.next().contains(text[position]))
return;
for (PatternIterator j = node.last.pattern.getAscendingSuccessors(); j.hasNext();)
if (j.next().contains(text[position]))
return;
}
StackNode newNode = new TerminalStackNode(text, position, firstPattern, node);
if ((log!=null) && (log.isDebugEnabled()))
log.debug("shift "+newNode);
next.push(newNode);
}
}
}
private void reduce(String symbol, StackNode node, StackNodeList list)
{
if (node.sibling!=null)
reduce(symbol, node.sibling, list);
list = new StackNodeList(node, list);
while(node.ancestor.pattern.hasSuccessor(node.pattern))
{
node = node.ancestor;
if (node.sibling!=null)
reduce(symbol, node.sibling, list);
list = new StackNodeList(node, list);
}
for (PatternIterator i = node.ancestor.pattern.getSuccessors(); i.hasNext();)
{
Pattern nextPattern = i.next();
if (symbol.equals(nextPattern.getSymbol()))
{
StackNode newNode = new NonterminalStackNode(list, nextPattern, node.ancestor);
if ((log!=null) && (log.isDebugEnabled()))
log.debug("reduce "+newNode+" with "+list);
current.push(newNode);
}
}
for (PatternIterator i = node.ancestor.pattern.getAscendingSuccessors(); i.hasNext();)
{
Pattern firstPattern = i.next();
if (symbol.equals(firstPattern.getSymbol()))
{
StackNode newNode = new NonterminalStackNode(list, firstPattern, node.ancestor);
if ((log!=null) && (log.isDebugEnabled()))
log.debug("reduce "+newNode+" with "+list);
current.push(newNode);
}
}
if ((root==null) && (node.ancestor.pattern==grammar.getStartPattern()) &&
(symbol.equals(grammar.getStartSymbol())))
{
root = list;
if ((log!=null) && (log.isDebugEnabled()))
log.debug("accept "+symbol+" with "+list);
}
}
private void reduceEmpty(StackNode node)
{
for (PatternIterator i = node.pattern.getSuccessors(); i.hasNext();)
{
Pattern nextPattern = i.next();
if ((nextPattern.getSymbol()!=null) && (grammar.isNullable(nextPattern.getSymbol())))
{
StackNode newNode = new NonterminalStackNode(null, nextPattern, node);
if ((log!=null) && (log.isDebugEnabled()))
log.debug("reduce "+newNode);
current.push(newNode);
}
}
for (PatternIterator i = node.pattern.getAscendingSuccessors(); i.hasNext();)
{
Pattern firstPattern = i.next();
if ((firstPattern.getSymbol()!=null) && (grammar.isNullable(firstPattern.getSymbol())))
{
// TODO: check for empty elements, which can occur in the ascending successors
if (firstPattern==node.pattern)
{
//System.out.println("prevent empty element "+firstPattern);
continue;
}
StackNode newNode = new NonterminalStackNode(null, firstPattern, node);
if ((log!=null) && (log.isDebugEnabled()))
log.debug("reduce "+newNode);
current.push(newNode);
}
}
}
private void increasePosition(char[] text, int position, int lastposition)
{
for (int i = position; i<lastposition; i++)
{
if (text[i]=='\n')
{
column = 1;
line++;
}
else if ((text[i]=='\r') && ((i==(text.length-1)) || (text[i+1]!='\n')))
{
column = 1;
line++;
}
else
column++;
}
}
private void fireEvents() throws SAXException
{
contentHandler.startPrefixMapping("", NS_OUTPUT);
contentHandler.startElement(NS_OUTPUT, OUTPUT, OUTPUT, new AttributesImpl());
String symbol = grammar.getStartSymbol();
contentHandler.startElement(NS_OUTPUT, symbol, symbol, new AttributesImpl());
Stack stack = new Stack();
StackNodeList next = root;
char[] text = null;
int position = 0;
int lastposition = 0;
line = 1;
column = 1;
if (locatorImpl!=null)
{
locatorImpl.setLineNumber(line);
locatorImpl.setColumnNumber(column);
}
while (next!=null)
{
if (next.node instanceof NonterminalStackNode)
{
if (text!=null)
{
contentHandler.characters(text, position, (lastposition+1)-position);
increasePosition(text, position, (lastposition+1)-position);
if (locatorImpl!=null)
{
locatorImpl.setLineNumber(line);
locatorImpl.setColumnNumber(column);
}
text = null;
}
NonterminalStackNode nonterminal = (NonterminalStackNode)next.node;
AttributesImpl atts = new AttributesImpl();
/*if (localizable)
{
atts.addAttribute("", "line", "line", "CDATA", String.valueOf(next.linenumber));
atts.addAttribute("", "column", "column", "CDATA", String.valueOf(next.columnnumber));
}*/
contentHandler.startElement(NS_OUTPUT, next.node.pattern.getSymbol(),
next.node.pattern.getSymbol(), atts);
stack.push(next);
next = nonterminal.definition;
}
else
{
TerminalStackNode terminal = (TerminalStackNode)next.node;
if (text==null)
{
text = terminal.text;
position = terminal.position;
}
else if (text!=terminal.text)
{
contentHandler.characters(text, position, (lastposition+1)-position);
increasePosition(text, position, (lastposition+1)-position);
if (locatorImpl!=null)
{
locatorImpl.setLineNumber(line);
locatorImpl.setColumnNumber(column);
}
text = terminal.text;
position = terminal.position;
}
lastposition = terminal.position;
next = next.next;
}
while ((next==null) && (!stack.isEmpty()))
{
next = (StackNodeList)stack.pop();
if (text!=null)
{
contentHandler.characters(text, position, (lastposition+1)-position);
increasePosition(text, position, (lastposition+1)-position);
if (locatorImpl!=null)
{
locatorImpl.setLineNumber(line);
locatorImpl.setColumnNumber(column);
}
text = null;
}
contentHandler.endElement(NS_OUTPUT, next.node.pattern.getSymbol(),
next.node.pattern.getSymbol());
next = next.next;
}
}
if (text!=null)
{
contentHandler.characters(text, position, (lastposition+1)-position);
increasePosition(text, position, (lastposition+1)-position);
if (locatorImpl!=null)
{
locatorImpl.setLineNumber(line);
locatorImpl.setColumnNumber(column);
}
text = null;
}
contentHandler.endElement(NS_OUTPUT, symbol, symbol);
contentHandler.endElement(NS_OUTPUT, OUTPUT, OUTPUT);
contentHandler.endPrefixMapping("");
}
}