/*
* Copyright (C) Chaperon. All rights reserved.
* -------------------------------------------------------------------------
* This software is published under the terms of the Apache Software License
* version 1.1, a copy of which has been included with this distribution in
* the LICENSE file.
*/
package net.sourceforge.chaperon.model.lexicon;
import net.sourceforge.chaperon.model.pattern.*;
import net.sourceforge.chaperon.model.symbol.Terminal;
import org.xml.sax.*;
import org.xml.sax.helpers.*;
import java.util.Stack;
/**
* This class should generate a lexicon from a SAX stream
*
* @author <a href="mailto:stephan@apache.org">Stephan Michels </a>
* @version CVS $Id: LexiconFactory.java,v 1.3 2003/12/09 19:55:52 benedikta Exp $
*/
public class LexiconFactory extends DefaultHandler
{
/** The namspace of the lexicon configuration */
public static final String NS = "http://chaperon.sourceforge.net/schema/lexicon/1.0";
/** Element name */
public static final String LEXEME_ELEMENT = "lexeme";
/** Attribute name of the symbol property */
public static final String SYMBOL_ATTRIBUTE = "symbol";
/** Element name */
public static final String LEXICON_ELEMENT = "lexicon";
/** Element name */
public static final String ALTERNATION_ELEMENT = "alt";
/** Element name */
public static final String BEGINOFLINE_ELEMENT = "bol";
/** Element name */
public static final String CHARACTERCLASS_ELEMENT = "cclass";
/** Attribute name of the exclusive property */
public static final String EXCLUSIVE_ATTRIBUTE = "exclusive";
/** Element name */
public static final String CHARACTERINTERVAL_ELEMENT = "cinterval";
/** Attribute name of the min property */
public static final String CHARACTERINTERVAL_MIN_ATTRIBUTE = "min";
/** Attribute name of the max property */
public static final String CHARACTERINTERVAL_MAX_ATTRIBUTE = "max";
/** Element name */
public static final String CHARACTERSTRING_ELEMENT = "cstring";
/** Attribute name of the sequence property */
public static final String CHARACTERSTRING_SEQUENCE_ATTRIBUTE = "content";
/** Element name */
public static final String CHARACTERSET_ELEMENT = "cset";
/** Attribute name of the characters property */
public static final String CHARACTERSET_CHARACTERS_ATTRIBUTE = "content";
/* public final static String CHARACTERGENERIC_ELEMENT = "cgeneric";
public final static String CHARACTERGENERIC_CODE_ATTRIBUTE = "code";*/
public static final String CODE_ATTRIBUTE = "code";
/** Element name */
public static final String CONCATENATION_ELEMENT = "concat";
/** Element name */
public static final String GROUP_ELEMENT = "group";
/** Element name */
public static final String UNIVERSALCHARACTER_ELEMENT = "cuniversal";
/** Element name */
public static final String ENDOFLINE_ELEMENT = "eol";
/** Attribute name of the minOccurs property */
public static final String MINOCCURS_ATTRIBUTE = "minOccurs";
/** Attribute name of the minOccurs property */
public static final String MAXOCCURS_ATTRIBUTE = "maxOccurs";
private static final int STATE_OUTER = 0;
private static final int STATE_LEXICON = 1;
private static final int STATE_LEXEME = 2;
private static final int STATE_CHARACTERCLASS = 3;
private static final int STATE_CHARACTERCLASSELEMENT = 4;
private int state = STATE_OUTER;
private Lexicon lexicon;
private Locator locator = null;
private Stack stack;
/**
* Returns the generated lexicon
*
* @return Lexicon
*/
public Lexicon getLexicon()
{
return lexicon;
}
private String getLocation()
{
if (locator==null)
return "unknown";
return locator.getSystemId()+":"+locator.getLineNumber()+":"+locator.getColumnNumber();
}
/**
* Receive an object for locating the origin of SAX document events.
*/
public void setDocumentLocator(Locator locator)
{
this.locator = locator;
}
/**
* Receive notification of the beginning of a document.
*/
public void startDocument()
{
stack = new Stack();
}
/**
* Return the content of the minOccurs attribute
*
* @param atts Attributes of an element
*
* @return minOccurs attribute
*/
private int getMinOccursFromAttributes(Attributes atts)
{
int minOccurs = 1;
String attribute = atts.getValue(MINOCCURS_ATTRIBUTE);
if ((attribute!=null) && (attribute.length()>0))
{
try
{
minOccurs = Integer.parseInt(attribute);
}
catch (NumberFormatException e)
{
// System.err.println("error: "+attribute+" ist not an integer number");
minOccurs = 1;
}
if (minOccurs<0)
minOccurs = 0;
}
return minOccurs;
}
/**
* Return the content of the maxOccurs attribute
*
* @param atts Attributes of an element
*
* @return maxOccurs attribute
*/
private int getMaxOccursFromAttributes(Attributes atts)
{
int maxOccurs = 1;
String attribute = atts.getValue(MAXOCCURS_ATTRIBUTE);
if ((attribute!=null) && (attribute.length()>0))
{
if (attribute.equals("*"))
maxOccurs = Integer.MAX_VALUE;
else
{
try
{
maxOccurs = Integer.parseInt(attribute);
}
catch (NumberFormatException e)
{
// System.err.println("error: "+attribute+" ist not an integer number");
maxOccurs = 1;
}
if (maxOccurs<1)
maxOccurs = 1;
}
}
return maxOccurs;
}
/**
* @param atts
*
* @return
*/
private boolean getExclusiveFromAttributes(Attributes atts)
{
String attribute = atts.getValue(EXCLUSIVE_ATTRIBUTE);
if ((attribute!=null) && (attribute.length()>0))
{
boolean value = false;
try
{
value = Boolean.valueOf(attribute).booleanValue();
return value;
}
catch (Exception e)
{
return false;
}
}
return false;
}
/**
* Receive notification of the beginning of an element.
*
* @param namespaceURI The Namespace URI, or the empty string if the element has no Namespace URI
* or if Namespace processing is not being performed.
* @param localName The local name (without prefix), or the empty string if Namespace processing
* is not being performed.
* @param qName The raw XML 1.0 name (with prefix), or the empty string if raw names are not
* available.
* @param atts The attributes attached to the element. If there are no attributes, it shall be an
* empty Attributes object.
*/
public void startElement(String namespaceURI, String localName, String qName, Attributes atts)
throws SAXException
{
if (namespaceURI.equals(NS))
{
if ((localName.equals(LEXICON_ELEMENT)) && (state==STATE_OUTER))
{
Lexicon lexicon = new Lexicon();
lexicon.setLocation(getLocation());
stack.push(lexicon);
state = STATE_LEXICON;
}
else if ((localName.equals(LEXEME_ELEMENT)) && (state==STATE_LEXICON))
{
Lexeme lexeme = new Lexeme();
lexeme.setLocation(getLocation());
if (atts.getValue(SYMBOL_ATTRIBUTE)!=null)
lexeme.setSymbol(new Terminal(atts.getValue(SYMBOL_ATTRIBUTE)));
stack.push(lexeme);
state = STATE_LEXEME;
}
else if ((localName.equals(ALTERNATION_ELEMENT)) && (state==STATE_LEXEME))
{
Alternation alternation = new Alternation();
alternation.setLocation(getLocation());
alternation.setMinOccurs(getMinOccursFromAttributes(atts));
alternation.setMaxOccurs(getMaxOccursFromAttributes(atts));
stack.push(alternation);
}
else if ((localName.equals(CONCATENATION_ELEMENT)) && (state==STATE_LEXEME))
{
Concatenation concatenation = new Concatenation();
concatenation.setLocation(getLocation());
concatenation.setMinOccurs(getMinOccursFromAttributes(atts));
concatenation.setMaxOccurs(getMaxOccursFromAttributes(atts));
stack.push(concatenation);
}
else if ((localName.equals(CHARACTERSTRING_ELEMENT)) && (state==STATE_LEXEME))
{
CharacterString characterstring = new CharacterString();
characterstring.setLocation(getLocation());
characterstring.setMinOccurs(getMinOccursFromAttributes(atts));
characterstring.setMaxOccurs(getMaxOccursFromAttributes(atts));
if (atts.getValue(CODE_ATTRIBUTE)!=null)
{
char character = (char)Integer.parseInt(atts.getValue(CODE_ATTRIBUTE));
characterstring.setString(String.valueOf(character));
}
else
characterstring.setString(atts.getValue(CHARACTERSTRING_SEQUENCE_ATTRIBUTE));
stack.push(characterstring);
}
else if ((localName.equals(GROUP_ELEMENT)) && (state==STATE_LEXEME))
{
PatternGroup group = new PatternGroup();
group.setLocation(getLocation());
group.setMinOccurs(getMinOccursFromAttributes(atts));
group.setMaxOccurs(getMaxOccursFromAttributes(atts));
stack.push(group);
}
else if ((localName.equals(UNIVERSALCHARACTER_ELEMENT)) && (state==STATE_LEXEME))
{
UniversalCharacter uni = new UniversalCharacter();
uni.setLocation(getLocation());
uni.setMinOccurs(getMinOccursFromAttributes(atts));
uni.setMaxOccurs(getMaxOccursFromAttributes(atts));
stack.push(uni);
}
else if ((localName.equals(BEGINOFLINE_ELEMENT)) && (state==STATE_LEXEME))
{
BeginOfLine bol = new BeginOfLine();
bol.setLocation(getLocation());
stack.push(bol);
}
else if ((localName.equals(ENDOFLINE_ELEMENT)) && (state==STATE_LEXEME))
{
EndOfLine eol = new EndOfLine();
stack.push(eol);
}
else if ((localName.equals(CHARACTERCLASS_ELEMENT)) && (state==STATE_LEXEME))
{
CharacterClass characterclass = new CharacterClass();
characterclass.setLocation(getLocation());
characterclass.setExclusive(getExclusiveFromAttributes(atts));
characterclass.setMinOccurs(getMinOccursFromAttributes(atts));
characterclass.setMaxOccurs(getMaxOccursFromAttributes(atts));
stack.push(characterclass);
state = STATE_CHARACTERCLASS;
}
else if ((localName.equals(CHARACTERSET_ELEMENT)) && (state==STATE_CHARACTERCLASS))
{
CharacterSet characterset = new CharacterSet();
characterset.setLocation(getLocation());
if (atts.getValue(CODE_ATTRIBUTE)!=null)
{
char character = (char)Integer.decode(atts.getValue(CODE_ATTRIBUTE)).intValue();
characterset.setCharacters(String.valueOf(character));
}
else
characterset.setCharacters(atts.getValue(CHARACTERSET_CHARACTERS_ATTRIBUTE));
stack.push(characterset);
state = STATE_CHARACTERCLASSELEMENT;
}
else if ((localName.equals(CHARACTERINTERVAL_ELEMENT)) && (state==STATE_CHARACTERCLASS))
{
CharacterInterval characterinterval = new CharacterInterval();
characterinterval.setLocation(getLocation());
characterinterval.setMinimum(atts.getValue(CHARACTERINTERVAL_MIN_ATTRIBUTE).charAt(0));
characterinterval.setMaximum(atts.getValue(CHARACTERINTERVAL_MAX_ATTRIBUTE).charAt(0));
stack.push(characterinterval);
state = STATE_CHARACTERCLASSELEMENT;
}
else
throw new SAXException("Unexpected element "+qName+" at "+getLocation());
}
else
throw new SAXException("Unexpected element "+qName+" at "+getLocation());
}
/**
* Receive notification of the end of an element.
*
* @param namespaceURI The Namespace URI, or the empty string if the element has no Namespace URI
* or if Namespace processing is not being performed.
* @param localName The local name (without prefix), or the empty string if Namespace processing
* is not being performed.
* @param qName The raw XML 1.0 name (with prefix), or the empty string if raw names are not
* available.
*
* @throws SAXException
*/
public void endElement(String namespaceURI, String localName, String qName)
throws SAXException
{
if (namespaceURI.equals(NS))
{
if ((localName.equals(LEXICON_ELEMENT)) && (state==STATE_LEXICON))
{
lexicon = (Lexicon)stack.pop();
state = STATE_OUTER;
}
else if ((localName.equals(LEXEME_ELEMENT)) && (state==STATE_LEXEME))
{
Lexeme lexeme = (Lexeme)stack.pop();
Lexicon lexicon = (Lexicon)stack.peek();
lexicon.addLexeme(lexeme);
state = STATE_LEXICON;
}
else if (((localName.equals(ALTERNATION_ELEMENT)) ||
(localName.equals(CONCATENATION_ELEMENT)) ||
(localName.equals(CHARACTERSTRING_ELEMENT)) || (localName.equals(GROUP_ELEMENT)) ||
(localName.equals(UNIVERSALCHARACTER_ELEMENT)) ||
(localName.equals(BEGINOFLINE_ELEMENT)) || (localName.equals(ENDOFLINE_ELEMENT))) &&
(state==STATE_LEXEME))
{
Pattern patternelement = (Pattern)stack.pop();
if (stack.peek() instanceof Alternation)
{
Alternation alternation = (Alternation)stack.peek();
alternation.addPattern(patternelement);
}
else if (stack.peek() instanceof Concatenation)
{
Concatenation concatenation = (Concatenation)stack.peek();
concatenation.addPattern(patternelement);
}
else if (stack.peek() instanceof PatternGroup)
{
PatternGroup group = (PatternGroup)stack.peek();
group.addPattern(patternelement);
}
else if (stack.peek() instanceof Lexeme)
{
Lexeme lexeme = (Lexeme)stack.peek();
lexeme.setDefinition(patternelement);
}
}
else if ((localName.equals(CHARACTERCLASS_ELEMENT)) && (state==STATE_CHARACTERCLASS))
{
Pattern patternelement = (Pattern)stack.pop();
if (stack.peek() instanceof Alternation)
{
Alternation alternation = (Alternation)stack.peek();
alternation.addPattern(patternelement);
}
else if (stack.peek() instanceof Concatenation)
{
Concatenation concatenation = (Concatenation)stack.peek();
concatenation.addPattern(patternelement);
}
else if (stack.peek() instanceof PatternGroup)
{
PatternGroup group = (PatternGroup)stack.peek();
group.addPattern(patternelement);
}
else if (stack.peek() instanceof Lexeme)
{
Lexeme lexeme = (Lexeme)stack.peek();
lexeme.setDefinition(patternelement);
}
state = STATE_LEXEME;
}
else if (((localName.equals(CHARACTERSET_ELEMENT)) ||
(localName.equals(CHARACTERINTERVAL_ELEMENT))) &&
(state==STATE_CHARACTERCLASSELEMENT))
{
CharacterClassElement characterclasselement = (CharacterClassElement)stack.pop();
CharacterClass characterclass = (CharacterClass)stack.peek();
characterclass.addCharacterClassElement(characterclasselement);
state = STATE_CHARACTERCLASS;
}
else
throw new SAXException("Unexpected element "+qName+" at "+getLocation());
}
else
throw new SAXException("Unexpected element "+qName+" at "+getLocation());
}
}