locatorImpl.setColumnNumber(columnNumber);
}
contentHandler.startPrefixMapping("", NS_OUTPUT);
AttributesImpl atts = new AttributesImpl();
if (localizable)
atts.addAttribute("", "source", "source", "CDATA", source);
contentHandler.startElement(NS_OUTPUT, OUTPUT, OUTPUT, new AttributesImpl());
StringBuffer unrecognized = new StringBuffer();
while (position<text.length)
{
String tokensymbol = null;
String tokentext = null;
for (int lexemeindex = automaton.getLexemeCount()-1; lexemeindex>=0; lexemeindex--)
{
processor.setPatternAutomaton(automaton.getLexemeDefinition(lexemeindex));
if ((processor.match(text, position)) &&
((tokentext==null) || (processor.getGroup().length()>=tokentext.length())))
{
tokensymbol = automaton.getLexemeSymbol(lexemeindex);
tokentext = processor.getGroup();
}
}
if ((tokentext!=null) && (tokentext.length()==0))
log.warn("Lexical processor recognized empty lexeme '"+tokensymbol+"'");
if ((tokentext!=null) && (tokentext.length()>0))
{
if (unrecognized.length()>0)
{
if (log!=null)
log.debug("Text was not recognized "+Decoder.toString(unrecognized.toString()));
atts = new AttributesImpl();
atts.addAttribute("", "text", "text", "CDATA", unrecognized.toString());
if (localizable)
{
atts.addAttribute("", "line", "line", "CDATA", String.valueOf(lineNumber));
atts.addAttribute("", "column", "column", "CDATA", String.valueOf(columnNumber));
}
contentHandler.startElement(NS_OUTPUT, ERROR, ERROR, atts);
contentHandler.endElement(NS_OUTPUT, ERROR, ERROR);
increasePosition(position-unrecognized.length(), unrecognized.length());
unrecognized = new StringBuffer();
}
if (tokensymbol!=null)
{
if (log!=null)
log.debug("Recognize token "+tokensymbol+" with "+Decoder.toString(tokentext));
if (locatorImpl!=null)
{
locatorImpl.setLineNumber(locator.getLineNumber());
locatorImpl.setColumnNumber(locator.getColumnNumber());
}
atts = new AttributesImpl();
atts.addAttribute("", "symbol", "symbol", "CDATA", tokensymbol);
atts.addAttribute("", "text", "text", "CDATA", tokentext);
if (localizable)
{
atts.addAttribute("", "line", "line", "CDATA", String.valueOf(lineNumber));
atts.addAttribute("", "column", "column", "CDATA", String.valueOf(columnNumber));
}
contentHandler.startElement(NS_OUTPUT, LEXEME, LEXEME, atts);
if (grouping)
for (int i = 1; i<processor.getGroupCount(); i++)
{
AttributesImpl groupatts = new AttributesImpl();
groupatts.addAttribute("", "text", "text", "CDATA", processor.getGroup(i));
contentHandler.startElement(NS_OUTPUT, GROUP, GROUP, groupatts);
contentHandler.endElement(NS_OUTPUT, GROUP, GROUP);
}
contentHandler.endElement(NS_OUTPUT, LEXEME, LEXEME);
}
else if (log!=null)
log.debug("Ignore lexeme with "+Decoder.toString(tokentext));
if (locatorImpl!=null)
{
locatorImpl.setColumnNumber(columnNumber);
locatorImpl.setLineNumber(lineNumber);
}
position += tokentext.length();
increasePosition(position-tokentext.length(), tokentext.length());
}
else
{
if (locatorImpl!=null)
{
locatorImpl.setColumnNumber(columnNumber);
locatorImpl.setLineNumber(lineNumber);
}
unrecognized.append(text[position]);
position++;
}
}
if (unrecognized.length()>0)
{
if (log!=null)
log.debug("Text was not recognized "+Decoder.toString(unrecognized.toString()));
atts = new AttributesImpl();
atts.addAttribute("", "text", "text", "CDATA", unrecognized.toString());
if (localizable)
{
atts.addAttribute("", "line", "line", "CDATA", String.valueOf(lineNumber));
atts.addAttribute("", "column", "column", "CDATA", String.valueOf(columnNumber));