Source Code of org.antlr.analysis.LL1Analyzer

/*
 * [The "BSD license"]
 *  Copyright (c) 2010 Terence Parr
 *  All rights reserved.
 *
 *  Redistribution and use in source and binary forms, with or without
 *  modification, are permitted provided that the following conditions
 *  are met:
 *  1. Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *  2. Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *  3. The name of the author may not be used to endorse or promote products
 *      derived from this software without specific prior written permission.
 *
 *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
package org.antlr.analysis;


import org.antlr.grammar.v3.ANTLRParser;
import org.antlr.misc.IntSet;
import org.antlr.misc.IntervalSet;
import org.antlr.tool.Grammar;
import org.antlr.tool.Rule;


import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;


/**
 * Created by IntelliJ IDEA.
 * User: parrt
 * Date: Dec 31, 2007
 * Time: 1:31:16 PM
 * To change this template use File | Settings | File Templates.
 */
public class LL1Analyzer {
  /**  0  if we hit end of rule and invoker should keep going (epsilon) */
  public static final int DETECT_PRED_EOR = 0;
  /**  1  if we found a nonautobacktracking pred */
  public static final int DETECT_PRED_FOUND = 1;
  /**  2  if we didn't find such a pred */
  public static final int DETECT_PRED_NOT_FOUND = 2;


  public Grammar grammar;


  /** Used during LOOK to detect computation cycles */
  protected Set<NFAState> lookBusy = new HashSet<NFAState>();


  public Map<NFAState, LookaheadSet> FIRSTCache = new HashMap<NFAState, LookaheadSet>();
  public Map<Rule, LookaheadSet> FOLLOWCache = new HashMap<Rule, LookaheadSet>();


  public LL1Analyzer(Grammar grammar) {
    this.grammar = grammar;
  }


  /*
  public void computeRuleFIRSTSets() {
    if ( getNumberOfDecisions()==0 ) {
      createNFAs();
    }
    for (Iterator it = getRules().iterator(); it.hasNext();) {
      Rule r = (Rule)it.next();
      if ( r.isSynPred ) {
        continue;
      }
      LookaheadSet s = FIRST(r);
      System.out.println("FIRST("+r.name+")="+s);
    }
  }
  */


  /*
  public Set<String> getOverriddenRulesWithDifferentFIRST() {
    // walk every rule in this grammar and compare FIRST set with
    // those in imported grammars.
    Set<String> rules = new HashSet();
    for (Iterator it = getRules().iterator(); it.hasNext();) {
      Rule r = (Rule)it.next();
      //System.out.println(r.name+" FIRST="+r.FIRST);
      for (int i = 0; i < delegates.size(); i++) {
        Grammar g = delegates.get(i);
        Rule importedRule = g.getRule(r.name);
        if ( importedRule != null ) { // exists in imported grammar
          // System.out.println(r.name+" exists in imported grammar: FIRST="+importedRule.FIRST);
          if ( !r.FIRST.equals(importedRule.FIRST) ) {
            rules.add(r.name);
          }
        }
      }
    }
    return rules;
  }


  public Set<Rule> getImportedRulesSensitiveToOverriddenRulesDueToLOOK() {
    Set<String> diffFIRSTs = getOverriddenRulesWithDifferentFIRST();
    Set<Rule> rules = new HashSet();
    for (Iterator it = diffFIRSTs.iterator(); it.hasNext();) {
      String r = (String) it.next();
      for (int i = 0; i < delegates.size(); i++) {
        Grammar g = delegates.get(i);
        Set<Rule> callers = g.ruleSensitivity.get(r);
        // somebody invokes rule whose FIRST changed in subgrammar?
        if ( callers!=null ) {
          rules.addAll(callers);
          //System.out.println(g.name+" rules "+callers+" sensitive to "+r+"; dup 'em");
        }
      }
    }
    return rules;
  }
*/


  /*
  public LookaheadSet LOOK(Rule r) {
    if ( r.FIRST==null ) {
      r.FIRST = FIRST(r.startState);
    }
    return r.FIRST;
  }
*/


  /** From an NFA state, s, find the set of all labels reachable from s.
   *  Used to compute follow sets for error recovery.  Never computes
   *  a FOLLOW operation.  FIRST stops at end of rules, returning EOR, unless
   *  invoked from another rule.  I.e., routine properly handles
   *
   *     a : b A ;
   *
   *  where b is nullable.
   *
   *  We record with EOR_TOKEN_TYPE if we hit the end of a rule so we can
   *  know at runtime (when these sets are used) to start walking up the
   *  follow chain to compute the real, correct follow set (as opposed to
   *  the FOLLOW, which is a superset).
   *
   *  This routine will only be used on parser and tree parser grammars.
   */
  public LookaheadSet FIRST(NFAState s) {
    //System.out.println("> FIRST("+s.enclosingRule.name+") in rule "+s.enclosingRule);
    lookBusy.clear();
    LookaheadSet look = _FIRST(s, false);
    //System.out.println("< FIRST("+s.enclosingRule.name+") in rule "+s.enclosingRule+"="+look.toString(this.grammar));
    return look;
  }


  public LookaheadSet FOLLOW(Rule r) {
        //System.out.println("> FOLLOW("+r.name+") in rule "+r.startState.enclosingRule);
    LookaheadSet f = FOLLOWCache.get(r);
    if ( f!=null ) {
      return f;
    }
    f = _FIRST(r.stopState, true);
    FOLLOWCache.put(r, f);
        //System.out.println("< FOLLOW("+r+") in rule "+r.startState.enclosingRule+"="+f.toString(this.grammar));
    return f;
  }


  public LookaheadSet LOOK(NFAState s) {
    if ( NFAToDFAConverter.debug ) {
      System.out.println("> LOOK("+s+")");
    }
    lookBusy.clear();
    LookaheadSet look = _FIRST(s, true);
    // FOLLOW makes no sense (at the moment!) for lexical rules.
    if ( grammar.type!=Grammar.LEXER && look.member(Label.EOR_TOKEN_TYPE) ) {
      // avoid altering FIRST reset as it is cached
      LookaheadSet f = FOLLOW(s.enclosingRule);
      f.orInPlace(look);
      f.remove(Label.EOR_TOKEN_TYPE);
      look = f;
      //look.orInPlace(FOLLOW(s.enclosingRule));
    }
    else if ( grammar.type==Grammar.LEXER && look.member(Label.EOT) ) {
      // if this has EOT, lookahead is all char (all char can follow rule)
      //look = new LookaheadSet(Label.EOT);
      look = new LookaheadSet(IntervalSet.COMPLETE_SET);
    }
    if ( NFAToDFAConverter.debug ) {
      System.out.println("< LOOK("+s+")="+look.toString(grammar));
    }
    return look;
  }


  protected LookaheadSet _FIRST(NFAState s, boolean chaseFollowTransitions) {
    /*
    System.out.println("_LOOK("+s+") in rule "+s.enclosingRule);
    if ( s.transition[0] instanceof RuleClosureTransition ) {
      System.out.println("go to rule "+((NFAState)s.transition[0].target).enclosingRule);
    }
    */
    if ( !chaseFollowTransitions && s.isAcceptState() ) {
      if ( grammar.type==Grammar.LEXER ) {
        // FOLLOW makes no sense (at the moment!) for lexical rules.
        // assume all char can follow
        return new LookaheadSet(IntervalSet.COMPLETE_SET);
      }
      return new LookaheadSet(Label.EOR_TOKEN_TYPE);
    }


    if ( lookBusy.contains(s) ) {
      // return a copy of an empty set; we may modify set inline
      return new LookaheadSet();
    }
    lookBusy.add(s);


    Transition transition0 = s.transition[0];
    if ( transition0==null ) {
      return null;
    }


    if ( transition0.label.isAtom() ) {
      int atom = transition0.label.getAtom();
      return new LookaheadSet(atom);
    }
    if ( transition0.label.isSet() ) {
      IntSet sl = transition0.label.getSet();
      return new LookaheadSet(sl);
    }


    // compute FIRST of transition 0
    LookaheadSet tset = null;
    // if transition 0 is a rule call and we don't want FOLLOW, check cache
    if ( !chaseFollowTransitions && transition0 instanceof RuleClosureTransition ) {
      tset = FIRSTCache.get((NFAState)transition0.target);
    }


    // if not in cache, must compute
    if ( tset==null ) {
      tset = _FIRST((NFAState)transition0.target, chaseFollowTransitions);
      // save FIRST cache for transition 0 if rule call
      if ( !chaseFollowTransitions && transition0 instanceof RuleClosureTransition ) {
        FIRSTCache.put((NFAState)transition0.target, tset);
      }
    }


        LookaheadSet tsetCached = tset; // tset is stored in cache. We can't return the same instance


    // did we fall off the end?
    if ( grammar.type!=Grammar.LEXER && tset.member(Label.EOR_TOKEN_TYPE) ) {
      if ( transition0 instanceof RuleClosureTransition ) {
        // we called a rule that found the end of the rule.
        // That means the rule is nullable and we need to
        // keep looking at what follows the rule ref.  E.g.,
        // a : b A ; where b is nullable means that LOOK(a)
        // should include A.
        RuleClosureTransition ruleInvocationTrans =
          (RuleClosureTransition)transition0;
        // remove the EOR and get what follows
        //tset.remove(Label.EOR_TOKEN_TYPE);
        NFAState following = ruleInvocationTrans.followState;
        LookaheadSet fset =  _FIRST(following, chaseFollowTransitions);
        fset.orInPlace(tset); // tset cached; or into new set
        fset.remove(Label.EOR_TOKEN_TYPE);
        tset = fset;
      }
    }


    Transition transition1 = s.transition[1];
    if ( transition1!=null ) {
      LookaheadSet tset1 =
        _FIRST((NFAState)transition1.target, chaseFollowTransitions);
      tset1.orInPlace(tset);
      tset = tset1;
    }


    // never return a cached set; clone
    return tset==tsetCached ? new LookaheadSet(tset) : tset;
  }


  /** Is there a non-syn-pred predicate visible from s that is not in
   *  the rule enclosing s?  This accounts for most predicate situations
   *  and lets ANTLR do a simple LL(1)+pred computation.
   *
   *  TODO: what about gated vs regular preds?
   */
  public boolean detectConfoundingPredicates(NFAState s) {
    lookBusy.clear();
    Rule r = s.enclosingRule;
    return _detectConfoundingPredicates(s, r, false) == DETECT_PRED_FOUND;
  }


  protected int _detectConfoundingPredicates(NFAState s,
                         Rule enclosingRule,
                         boolean chaseFollowTransitions)
  {
    //System.out.println("_detectNonAutobacktrackPredicates("+s+")");
    if ( !chaseFollowTransitions && s.isAcceptState() ) {
      if ( grammar.type==Grammar.LEXER ) {
        // FOLLOW makes no sense (at the moment!) for lexical rules.
        // assume all char can follow
        return DETECT_PRED_NOT_FOUND;
      }
      return DETECT_PRED_EOR;
    }


    if ( lookBusy.contains(s) ) {
      // return a copy of an empty set; we may modify set inline
      return DETECT_PRED_NOT_FOUND;
    }
    lookBusy.add(s);


    Transition transition0 = s.transition[0];
    if ( transition0==null ) {
      return DETECT_PRED_NOT_FOUND;
    }


    if ( !(transition0.label.isSemanticPredicate()||
         transition0.label.isEpsilon()) ) {
      return DETECT_PRED_NOT_FOUND;
    }


    if ( transition0.label.isSemanticPredicate() ) {
      //System.out.println("pred "+transition0.label);
      SemanticContext ctx = transition0.label.getSemanticContext();
      SemanticContext.Predicate p = (SemanticContext.Predicate)ctx;
      if ( p.predicateAST.getType() != ANTLRParser.BACKTRACK_SEMPRED ) {
        return DETECT_PRED_FOUND;
      }
    }
    
    /*
    if ( transition0.label.isSemanticPredicate() ) {
      System.out.println("pred "+transition0.label);
      SemanticContext ctx = transition0.label.getSemanticContext();
      SemanticContext.Predicate p = (SemanticContext.Predicate)ctx;
      // if a non-syn-pred found not in enclosingRule, say we found one
      if ( p.predicateAST.getType() != ANTLRParser.BACKTRACK_SEMPRED &&
         !p.predicateAST.enclosingRuleName.equals(enclosingRule.name) )
      {
        System.out.println("found pred "+p+" not in "+enclosingRule.name);
        return DETECT_PRED_FOUND;
      }
    }
    */


    int result = _detectConfoundingPredicates((NFAState)transition0.target,
                          enclosingRule,
                          chaseFollowTransitions);
    if ( result == DETECT_PRED_FOUND ) {
      return DETECT_PRED_FOUND;
    }


    if ( result == DETECT_PRED_EOR ) {
      if ( transition0 instanceof RuleClosureTransition ) {
        // we called a rule that found the end of the rule.
        // That means the rule is nullable and we need to
        // keep looking at what follows the rule ref.  E.g.,
        // a : b A ; where b is nullable means that LOOK(a)
        // should include A.
        RuleClosureTransition ruleInvocationTrans =
          (RuleClosureTransition)transition0;
        NFAState following = ruleInvocationTrans.followState;
        int afterRuleResult =
          _detectConfoundingPredicates(following,
                         enclosingRule,
                         chaseFollowTransitions);
        if ( afterRuleResult == DETECT_PRED_FOUND ) {
          return DETECT_PRED_FOUND;
        }
      }
    }


    Transition transition1 = s.transition[1];
    if ( transition1!=null ) {
      int t1Result =
        _detectConfoundingPredicates((NFAState)transition1.target,
                       enclosingRule,
                       chaseFollowTransitions);
      if ( t1Result == DETECT_PRED_FOUND ) {
        return DETECT_PRED_FOUND;
      }
    }


    return DETECT_PRED_NOT_FOUND;
  }


  /** Return predicate expression found via epsilon edges from s.  Do
   *  not look into other rules for now.  Do something simple.  Include
   *  backtracking synpreds.
   */
  public SemanticContext getPredicates(NFAState altStartState) {
    lookBusy.clear();
    return _getPredicates(altStartState, altStartState);
  }


  protected SemanticContext _getPredicates(NFAState s, NFAState altStartState) {
    //System.out.println("_getPredicates("+s+")");
    if ( s.isAcceptState() ) {
      return null;
    }


    // avoid infinite loops from (..)* etc...
    if ( lookBusy.contains(s) ) {
      return null;
    }
    lookBusy.add(s);


    Transition transition0 = s.transition[0];
    // no transitions
    if ( transition0==null ) {
      return null;
    }


    // not a predicate and not even an epsilon
    if ( !(transition0.label.isSemanticPredicate()||
         transition0.label.isEpsilon()) ) {
      return null;
    }


    SemanticContext p = null;
    SemanticContext p0;
    SemanticContext p1 = null;
    if ( transition0.label.isSemanticPredicate() ) {
      //System.out.println("pred "+transition0.label);
      p = transition0.label.getSemanticContext();
      // ignore backtracking preds not on left edge for this decision
      if ( ((SemanticContext.Predicate)p).predicateAST.getType() ==
          ANTLRParser.BACKTRACK_SEMPRED  &&
         s == altStartState.transition[0].target )
      {
        p = null; // don't count
      }
    }


    // get preds from beyond this state
    p0 = _getPredicates((NFAState)transition0.target, altStartState);


    // get preds from other transition
    Transition transition1 = s.transition[1];
    if ( transition1!=null ) {
      p1 = _getPredicates((NFAState)transition1.target, altStartState);
    }


    // join this&following-right|following-down
    return SemanticContext.and(p,SemanticContext.or(p0,p1));
  }
}
Source Code of org.antlr.analysis.LL1Analyzer

Related Classes of org.antlr.analysis.LL1Analyzer