/*
* Copyright (C) Chaperon. All rights reserved.
* -------------------------------------------------------------------------
* This software is published under the terms of the Apache Software License
* version 1.1, a copy of which has been included with this distribution in
* the LICENSE file.
*/
package net.sourceforge.chaperon.build;
import net.sourceforge.chaperon.model.Violations;
import net.sourceforge.chaperon.model.pattern.Alternation;
import net.sourceforge.chaperon.model.pattern.BeginOfLine;
import net.sourceforge.chaperon.model.pattern.CharacterClass;
import net.sourceforge.chaperon.model.pattern.CharacterInterval;
import net.sourceforge.chaperon.model.pattern.CharacterSet;
import net.sourceforge.chaperon.model.pattern.CharacterString;
import net.sourceforge.chaperon.model.pattern.Concatenation;
import net.sourceforge.chaperon.model.pattern.EndOfLine;
import net.sourceforge.chaperon.model.pattern.Pattern;
import net.sourceforge.chaperon.model.pattern.PatternGroup;
import net.sourceforge.chaperon.model.pattern.UniversalCharacter;
import net.sourceforge.chaperon.process.PatternAutomaton;
/**
* This class represents a builder for the pattern automata.
*
* @author <a href="mailto:stephan@apache.org">Stephan Michels</a>
* @version CVS $Id: PatternAutomatonBuilder.java,v 1.7 2003/12/09 19:55:52 benedikta Exp $
*/
public class PatternAutomatonBuilder
{
private Pattern pattern = null;
private PatternAutomaton automaton = null;
private Violations violations = new Violations();
private int statecount = 0;
private int stateindex = 0;
private int groupcount = 0;
private int groupindex = 0;
/**
* Create a builder for the pattern automata.
*
* @param pattern Pattern, which should be used to build the pattern automaton.
*/
public PatternAutomatonBuilder(Pattern pattern)
{
violations.addViolations(pattern.validate());
if ((violations!=null) && (violations.getViolationCount()>0))
throw new IllegalArgumentException("Pattern is not valid: "+violations.getViolation(0));
this.pattern = pattern;
statecount = getStateCount(pattern)+3;
stateindex = statecount-1;
groupcount = getGroupCount(pattern);
groupindex = groupcount;
PatternAutomaton automaton = new PatternAutomaton(statecount);
automaton.setGroupCount(groupcount+1);
int finalstate = stateindex--;
automaton.setFinalState(finalstate);
automaton.setType(stateindex, PatternAutomaton.TYPE_GROUPEND);
automaton.setGroupIndex(stateindex, 0);
automaton.setTransitions(stateindex, new int[]{finalstate});
int state = stateindex--;
state = traverse(automaton, pattern, state);
automaton.setType(stateindex, PatternAutomaton.TYPE_GROUPSTART);
automaton.setGroupIndex(stateindex, 0);
automaton.setTransitions(stateindex, new int[]{state});
automaton.setFirstState(stateindex);
this.automaton = automaton;
}
/**
* Return the builded automaton. This method will return null, if an error occurs.
*
* @return Pattern automaton.
*/
public PatternAutomaton getPatternAutomaton()
{
return automaton;
}
/**
* Calculates the count of group in the pattern.
*
* @param element Root pattern.
*
* @return Count of groups.
*/
private int getGroupCount(Pattern element)
{
int groupcount = 0;
if (element instanceof Alternation)
{
Alternation alternation = (Alternation)element;
for (int i = 0; i<alternation.getPatternCount(); i++)
groupcount += getGroupCount(alternation.getPattern(i));
}
else if (element instanceof Concatenation)
{
Concatenation concatenation = (Concatenation)element;
for (int i = 0; i<concatenation.getPatternCount(); i++)
groupcount += getGroupCount(concatenation.getPattern(i));
}
else if (element instanceof PatternGroup)
{
groupcount++;
PatternGroup group = (PatternGroup)element;
for (int i = 0; i<group.getPatternCount(); i++)
groupcount += getGroupCount(group.getPattern(i));
}
return groupcount;
}
/**
* Calculates the count of states.
*
* @param element Root pattern.
*
* @return Count of states.
*/
private int getStateCount(Pattern element)
{
int factor = 1;
int offset = 0;
int statecount = 0;
// generate closure for p
if ((element.getMinOccurs()==1) && (element.getMaxOccurs()==1))
{
// nothing
}
// generate closure for p?
else if ((element.getMinOccurs()==0) && (element.getMaxOccurs()==1))
offset = 1;
// generate closure for p+
else if ((element.getMinOccurs()==1) && (element.getMaxOccurs()==Integer.MAX_VALUE))
offset = 1;
// generate closure for p*
else if ((element.getMinOccurs()==0) && (element.getMaxOccurs()==Integer.MAX_VALUE))
offset = 2;
// generate closure for p{n,m}
else
{
factor = element.getMaxOccurs();
offset = 1;
}
if (element instanceof Alternation)
{
Alternation alternation = (Alternation)element;
for (int i = 0; i<alternation.getPatternCount(); i++)
statecount += getStateCount(alternation.getPattern(i));
if (alternation.getPatternCount()>1)
statecount++;
}
else if (element instanceof BeginOfLine)
statecount = 1;
else if (element instanceof CharacterClass)
{
CharacterClass characterclass = (CharacterClass)element;
for (int i = 0; i<characterclass.getCharacterClassElementCount(); i++)
{
if (characterclass.getCharacterClassElement(i) instanceof CharacterInterval)
statecount++;
else if (characterclass.getCharacterClassElement(i) instanceof CharacterSet)
{
CharacterSet set = (CharacterSet)characterclass.getCharacterClassElement(i);
statecount += set.getCharacters().length();
}
}
statecount++;
}
else if (element instanceof CharacterString)
{
CharacterString string = (CharacterString)element;
statecount += string.getString().length();
}
else if (element instanceof Concatenation)
{
Concatenation concatenation = (Concatenation)element;
for (int i = 0; i<concatenation.getPatternCount(); i++)
statecount += getStateCount(concatenation.getPattern(i));
}
else if (element instanceof EndOfLine)
statecount = 1;
else if (element instanceof PatternGroup)
{
statecount = 2;
PatternGroup group = (PatternGroup)element;
for (int i = 0; i<group.getPatternCount(); i++)
statecount += getStateCount(group.getPattern(i));
}
else if (element instanceof UniversalCharacter)
statecount = 1;
else
throw new IllegalArgumentException("Pattern element not recognized");
return (factor*statecount)+offset;
}
/*
* @param laststate First state of the following pattern
*
* @return First state of the current pattern
*/
/**
* Build the automaton by traversing the pattern tree.
*
* @param automaton The current automaton.
* @param element The current pattern element.
* @param laststate Last used state in the automaton.
*
* @return New last state in the automaton.
*/
private int traverse(PatternAutomaton automaton, Pattern element, int laststate)
{
int firststate;
// generate closure for p
if ((element.getMinOccurs()==1) && (element.getMaxOccurs()==1))
firststate = evalPattern(automaton, element, laststate);
// generate closure for p?
else if ((element.getMinOccurs()==0) && (element.getMaxOccurs()==1))
{
int s1 = evalPattern(automaton, element, laststate);
automaton.setTransitions(stateindex, new int[]{s1, laststate});
firststate = stateindex--;
}
// generate closure for p+
else if ((element.getMinOccurs()==1) && (element.getMaxOccurs()==Integer.MAX_VALUE))
{
int s1 = stateindex--;
firststate = evalPattern(automaton, element, s1);
automaton.setTransitions(s1, new int[]{firststate, laststate});
}
// generate closure for p*
else if ((element.getMinOccurs()==0) && (element.getMaxOccurs()==Integer.MAX_VALUE))
{
int s2 = stateindex--;
int s1 = evalPattern(automaton, element, s2);
automaton.setTransitions(s2, new int[]{s1, laststate});
firststate = stateindex--;
automaton.setTransitions(firststate, new int[]{s1, laststate});
}
// generate closure for p{n,m}
else
{
int s2 = laststate;
for (int i = 0; i<element.getMinOccurs(); i++)
s2 = evalPattern(automaton, element, s2);
int s1 = s2;
for (int i = element.getMinOccurs(); i<element.getMaxOccurs(); i++)
{
s1 = evalPattern(automaton, element, s1);
if (i>element.getMinOccurs())
automaton.addTransition(s1, s2);
}
firststate = stateindex--;
automaton.setTransitions(firststate, new int[]{s1, s2});
}
if (element instanceof PatternGroup)
groupindex--;
return firststate;
}
/**
* Evalutates a pattern element.
*
* @param automaton The current automaton.
* @param element The current pattern element.
* @param laststate Last used state in the automaton.
*
* @return New last state in the automaton.
*/
private int evalPattern(PatternAutomaton automaton, Pattern element, int laststate)
{
if (element instanceof Alternation)
return evalAlternation(automaton, (Alternation)element, laststate);
else if (element instanceof BeginOfLine)
return evalBeginOfLine(automaton, (BeginOfLine)element, laststate);
else if (element instanceof CharacterClass)
return evalCharacterClass(automaton, (CharacterClass)element, laststate);
else if (element instanceof CharacterString)
return evalCharacterString(automaton, (CharacterString)element, laststate);
else if (element instanceof Concatenation)
return evalConcatenation(automaton, (Concatenation)element, laststate);
else if (element instanceof EndOfLine)
return evalEndOfLine(automaton, (EndOfLine)element, laststate);
else if (element instanceof PatternGroup)
return evalPatternGroup(automaton, (PatternGroup)element, laststate);
else if (element instanceof UniversalCharacter)
return evalUniversalCharacter(automaton, (UniversalCharacter)element, laststate);
else
throw new IllegalArgumentException("Pattern element not recognized");
}
/**
* Create the states and transitions for an alternation
*
* @param automaton The current automaton.
* @param element The current pattern element.
* @param laststate Last used state in the automaton.
*
* @return New last state in the automaton.
*/
private int evalAlternation(PatternAutomaton automaton, Alternation element, int laststate)
{
if (element.getPatternCount()==1)
return traverse(automaton, element.getPattern(0), laststate);
else
{
int nextstate = stateindex--;
int state;
for (int i = element.getPatternCount()-1; i>=0; i--)
{
state = traverse(automaton, element.getPattern(i), laststate);
automaton.addTransition(nextstate, state);
}
return nextstate;
}
}
/**
* Create the states and transitions for a pattern that matches the begin of line.
*
* @param automaton The current automaton.
* @param element The current pattern element.
* @param laststate Last used state in the automaton.
*
* @return New last state in the automaton.
*/
private int evalBeginOfLine(PatternAutomaton automaton, BeginOfLine element, int laststate)
{
automaton.setType(stateindex, PatternAutomaton.TYPE_BOL);
automaton.setTransitions(stateindex, new int[]{laststate});
return stateindex--;
}
/**
* Create the states and transition for a character class.
*
* @param automaton The current automaton.
* @param element The current pattern element.
* @param laststate Last used state in the automaton.
*
* @return New last state in the automaton.
*/
private int evalCharacterClass(PatternAutomaton automaton, CharacterClass element, int laststate)
{
int state;
if (!element.isExclusive())
{
int firststate = stateindex--;
for (int i = 0; i<element.getCharacterClassElementCount(); i++)
{
if (element.getCharacterClassElement(i) instanceof CharacterInterval)
{
CharacterInterval interval = (CharacterInterval)element.getCharacterClassElement(i);
automaton.setType(stateindex, PatternAutomaton.TYPE_MATCH);
automaton.setInterval(stateindex, interval.getMinimum(), interval.getMaximum());
automaton.addTransition(stateindex, laststate);
state = stateindex--;
automaton.addTransition(firststate, state);
}
else if (element.getCharacterClassElement(i) instanceof CharacterSet)
{
CharacterSet set = (CharacterSet)element.getCharacterClassElement(i);
String chars = set.getCharacters();
for (int j = 0; j<chars.length(); j++)
{
automaton.setType(stateindex, PatternAutomaton.TYPE_MATCH);
automaton.setInterval(stateindex, chars.charAt(j), chars.charAt(j));
automaton.addTransition(stateindex, laststate);
state = stateindex--;
automaton.addTransition(firststate, state);
}
}
}
return firststate;
}
else
{
state = stateindex--;
automaton.setType(state, PatternAutomaton.TYPE_MATCHANY);
automaton.setTransitions(state, new int[]{laststate});
for (int i = element.getCharacterClassElementCount()-1; i>=0; i--)
{
if (element.getCharacterClassElement(i) instanceof CharacterInterval)
{
CharacterInterval interval = (CharacterInterval)element.getCharacterClassElement(i);
automaton.setType(stateindex, PatternAutomaton.TYPE_MATCH);
automaton.setInterval(stateindex, interval.getMinimum(), interval.getMaximum());
automaton.setTransitions(stateindex, new int[]{state});
state = stateindex--;
}
else if (element.getCharacterClassElement(i) instanceof CharacterSet)
{
CharacterSet set = (CharacterSet)element.getCharacterClassElement(i);
String chars = set.getCharacters();
for (int j = 0; j<chars.length(); j++)
{
automaton.setType(stateindex, PatternAutomaton.TYPE_EXMATCH);
automaton.setInterval(stateindex, chars.charAt(j), chars.charAt(j));
automaton.setType(stateindex, PatternAutomaton.TYPE_EXMATCH);
automaton.setTransitions(stateindex, new int[]{state});
state = stateindex--;
}
}
}
return state;
}
}
/**
* Create the states and transitions for a string of characters.
*
* @param automaton The current automaton.
* @param element The current pattern element.
* @param laststate Last used state in the automaton.
*
* @return New last state in the automaton.
*/
private int evalCharacterString(PatternAutomaton automaton, CharacterString element, int laststate)
{
int state = laststate;
for (int i = element.getString().length()-1; i>=0; i--)
{
automaton.setType(stateindex, PatternAutomaton.TYPE_MATCH);
automaton.setInterval(stateindex, element.getString().charAt(i), element.getString().charAt(i));
automaton.setTransitions(stateindex, new int[]{state});
state = stateindex--;
}
return state;
}
/**
* Create the states and transitions for a catenation of pattern
*
* @param automaton The current automaton.
* @param element The current pattern element.
* @param laststate Last used state in the automaton.
*
* @return New last state in the automaton.
*/
private int evalConcatenation(PatternAutomaton automaton, Concatenation element, int laststate)
{
int state = laststate;
for (int i = element.getPatternCount()-1; i>=0; i--)
state = traverse(automaton, element.getPattern(i), state);
return state;
}
/**
* Create the states and transitions for a pattern that matches the end of line.
*
* @param automaton The current automaton.
* @param element The current pattern element.
* @param laststate Last used state in the automaton.
*
* @return New last state in the automaton.
*/
private int evalEndOfLine(PatternAutomaton automaton, EndOfLine element, int laststate)
{
automaton.setType(stateindex, PatternAutomaton.TYPE_EOL);
automaton.setTransitions(stateindex, new int[]{laststate});
return stateindex--;
}
/**
* Create the states and transitions for a pattern group.
*
* @param automaton The current automaton.
* @param element The current pattern element.
* @param laststate Last used state in the automaton.
*
* @return New last state in the automaton.
*/
private int evalPatternGroup(PatternAutomaton automaton, PatternGroup element, int laststate)
{
int endstate = stateindex--;
automaton.setType(endstate, PatternAutomaton.TYPE_GROUPEND);
automaton.setGroupIndex(endstate, groupindex);
automaton.setTransitions(endstate, new int[]{laststate});
int nextstate = endstate;
for (int i = element.getPatternCount()-1; i>=0; i--)
nextstate = traverse(automaton, element.getPattern(i), nextstate);
automaton.setGroupIndex(endstate, groupindex);
automaton.setType(stateindex, PatternAutomaton.TYPE_GROUPSTART);
automaton.setGroupIndex(stateindex, groupindex);
automaton.setTransitions(stateindex, new int[]{nextstate});
return stateindex--;
}
/**
* Create the states and transition for an universal character.
*
* @param automaton The current automaton.
* @param element The current pattern element.
* @param laststate Last used state in the automaton.
*
* @return New last state in the automaton.
*/
private int evalUniversalCharacter(PatternAutomaton automaton, UniversalCharacter element,
int laststate)
{
automaton.setType(stateindex, PatternAutomaton.TYPE_MATCHANY);
automaton.setTransitions(stateindex, new int[]{laststate});
return stateindex--;
}
}