Package net.sourceforge.chaperon.build

Source Code of net.sourceforge.chaperon.build.PatternAutomatonBuilder

/*
*  Copyright (C) Chaperon. All rights reserved.
*  -------------------------------------------------------------------------
*  This software is published under the terms of the Apache Software License
*  version 1.1, a copy of which has been included  with this distribution in
*  the LICENSE file.
*/

package net.sourceforge.chaperon.build;

import net.sourceforge.chaperon.model.Violations;
import net.sourceforge.chaperon.model.pattern.Alternation;
import net.sourceforge.chaperon.model.pattern.BeginOfLine;
import net.sourceforge.chaperon.model.pattern.CharacterClass;
import net.sourceforge.chaperon.model.pattern.CharacterInterval;
import net.sourceforge.chaperon.model.pattern.CharacterSet;
import net.sourceforge.chaperon.model.pattern.CharacterString;
import net.sourceforge.chaperon.model.pattern.Concatenation;
import net.sourceforge.chaperon.model.pattern.EndOfLine;
import net.sourceforge.chaperon.model.pattern.Pattern;
import net.sourceforge.chaperon.model.pattern.PatternGroup;
import net.sourceforge.chaperon.model.pattern.UniversalCharacter;
import net.sourceforge.chaperon.process.PatternAutomaton;

/**
* This class represents a builder for the pattern automata.
*
* @author <a href="mailto:stephan@apache.org">Stephan Michels</a>
* @version CVS $Id: PatternAutomatonBuilder.java,v 1.7 2003/12/09 19:55:52 benedikta Exp $
*/
public class PatternAutomatonBuilder
{
  private Pattern pattern = null;
  private PatternAutomaton automaton = null;
  private Violations violations = new Violations();
  private int statecount = 0;
  private int stateindex = 0;
  private int groupcount = 0;
  private int groupindex = 0;

  /**
   * Create a builder for the pattern automata.
   *
   * @param pattern Pattern, which should be used to build the pattern automaton.
   */
  public PatternAutomatonBuilder(Pattern pattern)
  {
    violations.addViolations(pattern.validate());

    if ((violations!=null) && (violations.getViolationCount()>0))
      throw new IllegalArgumentException("Pattern is not valid: "+violations.getViolation(0));

    this.pattern = pattern;

    statecount = getStateCount(pattern)+3;
    stateindex = statecount-1;

    groupcount = getGroupCount(pattern);
    groupindex = groupcount;

    PatternAutomaton automaton = new PatternAutomaton(statecount);

    automaton.setGroupCount(groupcount+1);

    int finalstate = stateindex--;

    automaton.setFinalState(finalstate);

    automaton.setType(stateindex, PatternAutomaton.TYPE_GROUPEND);
    automaton.setGroupIndex(stateindex, 0);
    automaton.setTransitions(stateindex, new int[]{finalstate});

    int state = stateindex--;

    state = traverse(automaton, pattern, state);

    automaton.setType(stateindex, PatternAutomaton.TYPE_GROUPSTART);
    automaton.setGroupIndex(stateindex, 0);
    automaton.setTransitions(stateindex, new int[]{state});

    automaton.setFirstState(stateindex);

    this.automaton = automaton;
  }

  /**
   * Return the builded automaton. This method will return null, if an error occurs.
   *
   * @return Pattern automaton.
   */
  public PatternAutomaton getPatternAutomaton()
  {
    return automaton;
  }

  /**
   * Calculates the count of group in the pattern.
   *
   * @param element Root pattern.
   *
   * @return Count of groups.
   */
  private int getGroupCount(Pattern element)
  {
    int groupcount = 0;

    if (element instanceof Alternation)
    {
      Alternation alternation = (Alternation)element;

      for (int i = 0; i<alternation.getPatternCount(); i++)
        groupcount += getGroupCount(alternation.getPattern(i));
    }
    else if (element instanceof Concatenation)
    {
      Concatenation concatenation = (Concatenation)element;

      for (int i = 0; i<concatenation.getPatternCount(); i++)
        groupcount += getGroupCount(concatenation.getPattern(i));
    }
    else if (element instanceof PatternGroup)
    {
      groupcount++;

      PatternGroup group = (PatternGroup)element;

      for (int i = 0; i<group.getPatternCount(); i++)
        groupcount += getGroupCount(group.getPattern(i));
    }

    return groupcount;
  }

  /**
   * Calculates the count of states.
   *
   * @param element Root pattern.
   *
   * @return Count of states.
   */
  private int getStateCount(Pattern element)
  {
    int factor = 1;
    int offset = 0;
    int statecount = 0;

    // generate closure for p
    if ((element.getMinOccurs()==1) && (element.getMaxOccurs()==1))
    {
      // nothing
    }

    // generate closure for p?
    else if ((element.getMinOccurs()==0) && (element.getMaxOccurs()==1))
      offset = 1;

    // generate closure for p+
    else if ((element.getMinOccurs()==1) && (element.getMaxOccurs()==Integer.MAX_VALUE))
      offset = 1;

    // generate closure for p*
    else if ((element.getMinOccurs()==0) && (element.getMaxOccurs()==Integer.MAX_VALUE))
      offset = 2;

    // generate closure for p{n,m}
    else
    {
      factor = element.getMaxOccurs();
      offset = 1;
    }

    if (element instanceof Alternation)
    {
      Alternation alternation = (Alternation)element;

      for (int i = 0; i<alternation.getPatternCount(); i++)
        statecount += getStateCount(alternation.getPattern(i));

      if (alternation.getPatternCount()>1)
        statecount++;
    }
    else if (element instanceof BeginOfLine)
      statecount = 1;
    else if (element instanceof CharacterClass)
    {
      CharacterClass characterclass = (CharacterClass)element;

      for (int i = 0; i<characterclass.getCharacterClassElementCount(); i++)
      {
        if (characterclass.getCharacterClassElement(i) instanceof CharacterInterval)
          statecount++;
        else if (characterclass.getCharacterClassElement(i) instanceof CharacterSet)
        {
          CharacterSet set = (CharacterSet)characterclass.getCharacterClassElement(i);

          statecount += set.getCharacters().length();
        }
      }

      statecount++;
    }
    else if (element instanceof CharacterString)
    {
      CharacterString string = (CharacterString)element;

      statecount += string.getString().length();
    }
    else if (element instanceof Concatenation)
    {
      Concatenation concatenation = (Concatenation)element;

      for (int i = 0; i<concatenation.getPatternCount(); i++)
        statecount += getStateCount(concatenation.getPattern(i));
    }
    else if (element instanceof EndOfLine)
      statecount = 1;
    else if (element instanceof PatternGroup)
    {
      statecount = 2;

      PatternGroup group = (PatternGroup)element;

      for (int i = 0; i<group.getPatternCount(); i++)
        statecount += getStateCount(group.getPattern(i));
    }
    else if (element instanceof UniversalCharacter)
      statecount = 1;
    else
      throw new IllegalArgumentException("Pattern element not recognized");

    return (factor*statecount)+offset;
  }

  /*
   * @param laststate First state of the following pattern
   *
   * @return First state of the current pattern
   */

  /**
   * Build the automaton by traversing the pattern tree.
   *
   * @param automaton The current automaton.
   * @param element The current pattern element.
   * @param laststate Last used state in the automaton.
   *
   * @return New last state in the automaton.
   */
  private int traverse(PatternAutomaton automaton, Pattern element, int laststate)
  {
    int firststate;

    // generate closure for p
    if ((element.getMinOccurs()==1) && (element.getMaxOccurs()==1))
      firststate = evalPattern(automaton, element, laststate);

    // generate closure for p?
    else if ((element.getMinOccurs()==0) && (element.getMaxOccurs()==1))
    {
      int s1 = evalPattern(automaton, element, laststate);
      automaton.setTransitions(stateindex, new int[]{s1, laststate});
      firststate = stateindex--;
    }

    // generate closure for p+
    else if ((element.getMinOccurs()==1) && (element.getMaxOccurs()==Integer.MAX_VALUE))
    {
      int s1 = stateindex--;
      firststate = evalPattern(automaton, element, s1);
      automaton.setTransitions(s1, new int[]{firststate, laststate});
    }

    // generate closure for p*
    else if ((element.getMinOccurs()==0) && (element.getMaxOccurs()==Integer.MAX_VALUE))
    {
      int s2 = stateindex--;
      int s1 = evalPattern(automaton, element, s2);
      automaton.setTransitions(s2, new int[]{s1, laststate});

      firststate = stateindex--;
      automaton.setTransitions(firststate, new int[]{s1, laststate});
    }

    // generate closure for p{n,m}
    else
    {
      int s2 = laststate;
      for (int i = 0; i<element.getMinOccurs(); i++)
        s2 = evalPattern(automaton, element, s2);

      int s1 = s2;

      for (int i = element.getMinOccurs(); i<element.getMaxOccurs(); i++)
      {
        s1 = evalPattern(automaton, element, s1);
        if (i>element.getMinOccurs())
          automaton.addTransition(s1, s2);
      }

      firststate = stateindex--;
      automaton.setTransitions(firststate, new int[]{s1, s2});
    }

    if (element instanceof PatternGroup)
      groupindex--;

    return firststate;
  }

  /**
   * Evalutates a pattern element.
   *
   * @param automaton The current automaton.
   * @param element The current pattern element.
   * @param laststate Last used state in the automaton.
   *
   * @return New last state in the automaton.
   */
  private int evalPattern(PatternAutomaton automaton, Pattern element, int laststate)
  {
    if (element instanceof Alternation)
      return evalAlternation(automaton, (Alternation)element, laststate);
    else if (element instanceof BeginOfLine)
      return evalBeginOfLine(automaton, (BeginOfLine)element, laststate);
    else if (element instanceof CharacterClass)
      return evalCharacterClass(automaton, (CharacterClass)element, laststate);
    else if (element instanceof CharacterString)
      return evalCharacterString(automaton, (CharacterString)element, laststate);
    else if (element instanceof Concatenation)
      return evalConcatenation(automaton, (Concatenation)element, laststate);
    else if (element instanceof EndOfLine)
      return evalEndOfLine(automaton, (EndOfLine)element, laststate);
    else if (element instanceof PatternGroup)
      return evalPatternGroup(automaton, (PatternGroup)element, laststate);
    else if (element instanceof UniversalCharacter)
      return evalUniversalCharacter(automaton, (UniversalCharacter)element, laststate);
    else
      throw new IllegalArgumentException("Pattern element not recognized");
  }

  /**
   * Create the states and transitions for an alternation
   *
   * @param automaton The current automaton.
   * @param element The current pattern element.
   * @param laststate Last used state in the automaton.
   *
   * @return New last state in the automaton.
   */
  private int evalAlternation(PatternAutomaton automaton, Alternation element, int laststate)
  {
    if (element.getPatternCount()==1)
      return traverse(automaton, element.getPattern(0), laststate);
    else
    {
      int nextstate = stateindex--;
      int state;

      for (int i = element.getPatternCount()-1; i>=0; i--)
      {
        state = traverse(automaton, element.getPattern(i), laststate);
        automaton.addTransition(nextstate, state);
      }

      return nextstate;
    }
  }

  /**
   * Create the states and transitions for a pattern that matches  the begin of line.
   *
   * @param automaton The current automaton.
   * @param element The current pattern element.
   * @param laststate Last used state in the automaton.
   *
   * @return New last state in the automaton.
   */
  private int evalBeginOfLine(PatternAutomaton automaton, BeginOfLine element, int laststate)
  {
    automaton.setType(stateindex, PatternAutomaton.TYPE_BOL);
    automaton.setTransitions(stateindex, new int[]{laststate});
    return stateindex--;
  }

  /**
   * Create the states and transition for a character class.
   *
   * @param automaton The current automaton.
   * @param element The current pattern element.
   * @param laststate Last used state in the automaton.
   *
   * @return New last state in the automaton.
   */
  private int evalCharacterClass(PatternAutomaton automaton, CharacterClass element, int laststate)
  {
    int state;

    if (!element.isExclusive())
    {
      int firststate = stateindex--;

      for (int i = 0; i<element.getCharacterClassElementCount(); i++)
      {
        if (element.getCharacterClassElement(i) instanceof CharacterInterval)
        {
          CharacterInterval interval = (CharacterInterval)element.getCharacterClassElement(i);

          automaton.setType(stateindex, PatternAutomaton.TYPE_MATCH);
          automaton.setInterval(stateindex, interval.getMinimum(), interval.getMaximum());
          automaton.addTransition(stateindex, laststate);
          state = stateindex--;
          automaton.addTransition(firststate, state);
        }
        else if (element.getCharacterClassElement(i) instanceof CharacterSet)
        {
          CharacterSet set = (CharacterSet)element.getCharacterClassElement(i);
          String chars = set.getCharacters();

          for (int j = 0; j<chars.length(); j++)
          {
            automaton.setType(stateindex, PatternAutomaton.TYPE_MATCH);
            automaton.setInterval(stateindex, chars.charAt(j), chars.charAt(j));
            automaton.addTransition(stateindex, laststate);
            state = stateindex--;
            automaton.addTransition(firststate, state);
          }
        }
      }

      return firststate;
    }
    else
    {
      state = stateindex--;
      automaton.setType(state, PatternAutomaton.TYPE_MATCHANY);
      automaton.setTransitions(state, new int[]{laststate});
      for (int i = element.getCharacterClassElementCount()-1; i>=0; i--)
      {
        if (element.getCharacterClassElement(i) instanceof CharacterInterval)
        {
          CharacterInterval interval = (CharacterInterval)element.getCharacterClassElement(i);

          automaton.setType(stateindex, PatternAutomaton.TYPE_MATCH);
          automaton.setInterval(stateindex, interval.getMinimum(), interval.getMaximum());
          automaton.setTransitions(stateindex, new int[]{state});
          state = stateindex--;
        }
        else if (element.getCharacterClassElement(i) instanceof CharacterSet)
        {
          CharacterSet set = (CharacterSet)element.getCharacterClassElement(i);
          String chars = set.getCharacters();

          for (int j = 0; j<chars.length(); j++)
          {
            automaton.setType(stateindex, PatternAutomaton.TYPE_EXMATCH);
            automaton.setInterval(stateindex, chars.charAt(j), chars.charAt(j));
            automaton.setType(stateindex, PatternAutomaton.TYPE_EXMATCH);
            automaton.setTransitions(stateindex, new int[]{state});
            state = stateindex--;
          }
        }
      }

      return state;
    }
  }

  /**
   * Create the states and transitions for a string of characters.
   *
   * @param automaton The current automaton.
   * @param element The current pattern element.
   * @param laststate Last used state in the automaton.
   *
   * @return New last state in the automaton.
   */
  private int evalCharacterString(PatternAutomaton automaton, CharacterString element, int laststate)
  {
    int state = laststate;

    for (int i = element.getString().length()-1; i>=0; i--)
    {
      automaton.setType(stateindex, PatternAutomaton.TYPE_MATCH);
      automaton.setInterval(stateindex, element.getString().charAt(i), element.getString().charAt(i));
      automaton.setTransitions(stateindex, new int[]{state});
      state = stateindex--;
    }

    return state;
  }

  /**
   * Create the states and transitions for a catenation of pattern
   *
   * @param automaton The current automaton.
   * @param element The current pattern element.
   * @param laststate Last used state in the automaton.
   *
   * @return New last state in the automaton.
   */
  private int evalConcatenation(PatternAutomaton automaton, Concatenation element, int laststate)
  {
    int state = laststate;

    for (int i = element.getPatternCount()-1; i>=0; i--)
      state = traverse(automaton, element.getPattern(i), state);

    return state;
  }

  /**
   * Create the states and transitions for a pattern that matches  the end of line.
   *
   * @param automaton The current automaton.
   * @param element The current pattern element.
   * @param laststate Last used state in the automaton.
   *
   * @return New last state in the automaton.
   */
  private int evalEndOfLine(PatternAutomaton automaton, EndOfLine element, int laststate)
  {
    automaton.setType(stateindex, PatternAutomaton.TYPE_EOL);
    automaton.setTransitions(stateindex, new int[]{laststate});
    return stateindex--;
  }

  /**
   * Create the states and transitions for a pattern group.
   *
   * @param automaton The current automaton.
   * @param element The current pattern element.
   * @param laststate Last used state in the automaton.
   *
   * @return New last state in the automaton.
   */
  private int evalPatternGroup(PatternAutomaton automaton, PatternGroup element, int laststate)
  {
    int endstate = stateindex--;

    automaton.setType(endstate, PatternAutomaton.TYPE_GROUPEND);
    automaton.setGroupIndex(endstate, groupindex);
    automaton.setTransitions(endstate, new int[]{laststate});

    int nextstate = endstate;

    for (int i = element.getPatternCount()-1; i>=0; i--)
      nextstate = traverse(automaton, element.getPattern(i), nextstate);

    automaton.setGroupIndex(endstate, groupindex);

    automaton.setType(stateindex, PatternAutomaton.TYPE_GROUPSTART);
    automaton.setGroupIndex(stateindex, groupindex);
    automaton.setTransitions(stateindex, new int[]{nextstate});
    return stateindex--;
  }

  /**
   * Create the states and transition for an universal character.
   *
   * @param automaton The current automaton.
   * @param element The current pattern element.
   * @param laststate Last used state in the automaton.
   *
   * @return New last state in the automaton.
   */
  private int evalUniversalCharacter(PatternAutomaton automaton, UniversalCharacter element,
                                     int laststate)
  {
    automaton.setType(stateindex, PatternAutomaton.TYPE_MATCHANY);
    automaton.setTransitions(stateindex, new int[]{laststate});
    return stateindex--;
  }
}
TOP

Related Classes of net.sourceforge.chaperon.build.PatternAutomatonBuilder

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.