Source Code of org.antlr.analysis.Label

/*
 * [The "BSD license"]
 *  Copyright (c) 2010 Terence Parr
 *  All rights reserved.
 *
 *  Redistribution and use in source and binary forms, with or without
 *  modification, are permitted provided that the following conditions
 *  are met:
 *  1. Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *  2. Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *  3. The name of the author may not be used to endorse or promote products
 *      derived from this software without specific prior written permission.
 *
 *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
package org.antlr.analysis;


import org.antlr.misc.IntSet;
import org.antlr.misc.IntervalSet;
import org.antlr.tool.Grammar;


/** A state machine transition label.  A label can be either a simple
 *  label such as a token or character.  A label can be a set of char or
 *  tokens.  It can be an epsilon transition.  It can be a semantic predicate
 *  (which assumes an epsilon transition) or a tree of predicates (in a DFA).
 *  Special label types have to be &lt; 0 to avoid conflict with char.
 */
public class Label implements Comparable<Label>, Cloneable {
    public static final int INVALID = -7;


  public static final int ACTION = -6;
  
  public static final int EPSILON = -5;


    public static final String EPSILON_STR = "<EPSILON>";


    /** label is a semantic predicate; implies label is epsilon also */
    public static final int SEMPRED = -4;


    /** label is a set of tokens or char */
    public static final int SET = -3;


    /** End of Token is like EOF for lexer rules.  It implies that no more
     *  characters are available and that NFA conversion should terminate
     *  for this path.  For example
     *
     *  A : 'a' 'b' | 'a' ;
     *
     *  yields a DFA predictor:
     *
     *  o-a-&gt;o-b-&gt;1   predict alt 1
     *       |
     *       |-EOT-&gt;o predict alt 2
     *
     *  To generate code for EOT, treat it as the "default" path, which
     *  implies there is no way to mismatch a char for the state from
     *  which the EOT emanates.
     */
    public static final int EOT = -2;


    public static final int EOF = -1;


  /** We have labels like EPSILON that are below 0; it's hard to
   *  store them in an array with negative index so use this
   *  constant as an index shift when accessing arrays based upon
   *  token type.  If real token type is i, then array index would be
   *  NUM_FAUX_LABELS + i.
   */
  public static final int NUM_FAUX_LABELS = -INVALID;


    /** Anything at this value or larger can be considered a simple atom int
     *  for easy comparison during analysis only; faux labels are not used
   *  during parse time for real token types or char values.
     */
    public static final int MIN_ATOM_VALUE = EOT;


    // TODO: is 0 a valid unicode char? max is FFFF -1, right?
    public static final int MIN_CHAR_VALUE = '\u0000';
    public static final int MAX_CHAR_VALUE = '\uFFFF';


  /** End of rule token type; imaginary token type used only for
   *  local, partial FOLLOW sets to indicate that the local FOLLOW
   *  hit the end of rule.  During error recovery, the local FOLLOW
   *  of a token reference may go beyond the end of the rule and have
   *  to use FOLLOW(rule).  I have to just shift the token types to 2..n
   *  rather than 1..n to accommodate this imaginary token in my bitsets.
   *  If I didn't use a bitset implementation for runtime sets, I wouldn't
   *  need this.  EOF is another candidate for a run time token type for
   *  parsers.  Follow sets are not computed for lexers so we do not have
   *  this issue.
   */
  public static final int EOR_TOKEN_TYPE =
    org.antlr.runtime.Token.EOR_TOKEN_TYPE;


  public static final int DOWN = org.antlr.runtime.Token.DOWN;
  public static final int UP = org.antlr.runtime.Token.UP;


    /** tokens and char range overlap; tokens are MIN_TOKEN_TYPE..n */
  public static final int MIN_TOKEN_TYPE =
    org.antlr.runtime.Token.MIN_TOKEN_TYPE;


    /** The wildcard '.' char atom implies all valid characters==UNICODE */
    //public static final IntSet ALLCHAR = IntervalSet.of(MIN_CHAR_VALUE,MAX_CHAR_VALUE);


    /** The token type or character value; or, signifies special label. */
    protected int label;


    /** A set of token types or character codes if label==SET */
  // TODO: try IntervalSet for everything
    protected IntSet labelSet;


    public Label(int label) {
        this.label = label;
    }


    /** Make a set label */
    public Label(IntSet labelSet) {
    if ( labelSet==null ) {
      this.label = SET;
      this.labelSet = IntervalSet.of(INVALID);
      return;
    }
    int singleAtom = labelSet.getSingleElement();
        if ( singleAtom!=INVALID ) {
            // convert back to a single atomic element if |labelSet|==1
            label = singleAtom;
            return;
        }
        this.label = SET;
        this.labelSet = labelSet;
    }


  @Override
  public Object clone() {
    Label l;
    try {
      l = (Label)super.clone();
      l.label = this.label;
            l.labelSet = new IntervalSet();
      l.labelSet.addAll(this.labelSet);
    }
    catch (CloneNotSupportedException e) {
      throw new InternalError();
    }
    return l;
  }


  public void add(Label a) {
    if ( isAtom() ) {
      labelSet = IntervalSet.of(label);
      label=SET;
      if ( a.isAtom() ) {
        labelSet.add(a.getAtom());
      }
      else if ( a.isSet() ) {
        labelSet.addAll(a.getSet());
      }
      else {
        throw new IllegalStateException("can't add element to Label of type "+label);
      }
      return;
    }
    if ( isSet() ) {
      if ( a.isAtom() ) {
        labelSet.add(a.getAtom());
      }
      else if ( a.isSet() ) {
        labelSet.addAll(a.getSet());
      }
      else {
        throw new IllegalStateException("can't add element to Label of type "+label);
      }
      return;
    }
    throw new IllegalStateException("can't add element to Label of type "+label);
  }


    public boolean isAtom() {
        return label>=MIN_ATOM_VALUE;
    }


    public boolean isEpsilon() {
        return label==EPSILON;
    }


  public boolean isSemanticPredicate() {
    return false;
  }


  public boolean isAction() {
    return false;
  }


    public boolean isSet() {
        return label==SET;
    }


    /** return the single atom label or INVALID if not a single atom */
    public int getAtom() {
        if ( isAtom() ) {
            return label;
        }
        return INVALID;
    }


    public IntSet getSet() {
        if ( label!=SET ) {
            // convert single element to a set if they ask for it.
            return IntervalSet.of(label);
        }
        return labelSet;
    }


    public void setSet(IntSet set) {
        label=SET;
        labelSet = set;
    }


    public SemanticContext getSemanticContext() {
        return null;
    }


  public boolean matches(int atom) {
    if ( label==atom ) {
      return true; // handle the single atom case efficiently
    }
    if ( isSet() ) {
      return labelSet.member(atom);
    }
    return false;
  }


  public boolean matches(IntSet set) {
    if ( isAtom() ) {
      return set.member(getAtom());
    }
    if ( isSet() ) {
      // matches if intersection non-nil
      return !getSet().and(set).isNil();
    }
    return false;
  }




  public boolean matches(Label other) {
    if ( other.isSet() ) {
      return matches(other.getSet());
    }
    if ( other.isAtom() ) {
      return matches(other.getAtom());
    }
    return false;
  }


  @Override
    public int hashCode() {
        if (label==SET) {
            return labelSet.hashCode();
    }
    else {
      return label;
    }
  }


  // TODO: do we care about comparing set {A} with atom A? Doesn't now.
  @Override
  public boolean equals(Object o) {
    if ( o==null ) {
      return false;
    }
    if ( this == o ) {
      return true; // equals if same object
    }
    // labels must be the same even if epsilon or set or sempred etc...
        if ( label!=((Label)o).label ) {
            return false;
        }
    if ( label==SET ) {
      return this.labelSet.equals(((Label)o).labelSet);
    }
    return true;  // label values are same, so true
    }


  @Override
    public int compareTo(Label o) {
        return this.label-o.label;
    }


    /** Predicates are lists of AST nodes from the NFA created from the
     *  grammar, but the same predicate could be cut/paste into multiple
     *  places in the grammar.  I must compare the text of all the
     *  predicates to truly answer whether {p1,p2} .equals {p1,p2}.
     *  Unfortunately, I cannot rely on the AST.equals() to work properly
     *  so I must do a brute force O(n^2) nested traversal of the Set
     *  doing a String compare.
     *
     *  At this point, Labels are not compared for equals when they are
     *  predicates, but here's the code for future use.
     */
    /*
    protected boolean predicatesEquals(Set others) {
        Iterator iter = semanticContext.iterator();
        while (iter.hasNext()) {
            AST predAST = (AST) iter.next();
            Iterator inner = semanticContext.iterator();
            while (inner.hasNext()) {
                AST otherPredAST = (AST) inner.next();
                if ( !predAST.getText().equals(otherPredAST.getText()) ) {
                    return false;
                }
            }
        }
        return true;
    }
      */


  @Override
    public String toString() {
        switch (label) {
            case SET :
                return labelSet.toString();
            default :
                return String.valueOf(label);
        }
    }


    public String toString(Grammar g) {
        switch (label) {
            case SET :
                return labelSet.toString(g);
            default :
                return g.getTokenDisplayName(label);
        }
    }


    /*
    public String predicatesToString() {
        if ( semanticContext==NFAConfiguration.DEFAULT_CLAUSE_SEMANTIC_CONTEXT ) {
            return "!other preds";
        }
        StringBuffer buf = new StringBuffer();
        Iterator iter = semanticContext.iterator();
        while (iter.hasNext()) {
            AST predAST = (AST) iter.next();
            buf.append(predAST.getText());
            if ( iter.hasNext() ) {
                buf.append("&");
            }
        }
        return buf.toString();
    }
    */


  public static boolean intersect(Label label, Label edgeLabel) {
    boolean hasIntersection = false;
    boolean labelIsSet = label.isSet();
    boolean edgeIsSet = edgeLabel.isSet();
    if ( !labelIsSet && !edgeIsSet && edgeLabel.label==label.label ) {
      hasIntersection = true;
    }
    else if ( labelIsSet && edgeIsSet &&
          !edgeLabel.getSet().and(label.getSet()).isNil() ) {
      hasIntersection = true;
    }
    else if ( labelIsSet && !edgeIsSet &&
          label.getSet().member(edgeLabel.label) ) {
      hasIntersection = true;
    }
    else if ( !labelIsSet && edgeIsSet &&
          edgeLabel.getSet().member(label.label) ) {
      hasIntersection = true;
    }
    return hasIntersection;
  }
}
Source Code of org.antlr.analysis.Label

Related Classes of org.antlr.analysis.Label