Package org.jostraca.comp.antlr

Source Code of org.jostraca.comp.antlr.LLkAnalyzer

package org.jostraca.comp.antlr;

/* ANTLR Translator Generator
* Project led by Terence Parr at http://www.jGuru.com
* Software rights: http://www.antlr.org/license.html
*
* $Id: LLkAnalyzer.java,v 1.4 2004/11/19 17:50:49 rjrodger Exp $
*/

import org.jostraca.comp.antlr.collections.impl.BitSet;
import org.jostraca.comp.antlr.collections.impl.Vector;

/**A linear-approximate LL(k) grammar analzyer.
*
* All lookahead elements are sets of token types.
*
* @author  Terence Parr, John Lilley
* @see     antlr.Grammar
* @see     antlr.Lookahead
*/
public class LLkAnalyzer implements LLkGrammarAnalyzer {
    // Set "analyzerDebug" to true
    public boolean DEBUG_ANALYZER = false;
    private AlternativeBlock currentBlock;
    protected Tool tool = null;
    protected Grammar grammar = null;
    // True if analyzing a lexical grammar
    protected boolean lexicalAnalysis = false;
    // Used for formatting bit sets in default (Java) format
    CharFormatter charFormatter = new JavaCharFormatter();

    /** Create an LLk analyzer */
    public LLkAnalyzer(Tool tool_) {
        tool = tool_;
    }

    /** Return true if someone used the '.' wildcard default idiom.
     *  Either #(. children) or '.' as an alt by itself.
     */
    protected boolean altUsesWildcardDefault(Alternative alt) {
        AlternativeElement head = alt.head;
        // if element is #(. blah) then check to see if el is root
        if (head instanceof TreeElement &&
            ((TreeElement)head).root instanceof WildcardElement) {
            return true;
        }
        if (head instanceof WildcardElement && head.next instanceof BlockEndElement) {
            return true;
        }
        return false;
    }

    /**Is this block of alternatives LL(k)?  Fill in alternative cache for this block.
     * @return true if the block is deterministic
     */
    public boolean deterministic(AlternativeBlock blk) {
        /** The lookahead depth for this decision */
        int k = 1// start at k=1
        if (DEBUG_ANALYZER) System.out.println("deterministic(" + blk + ")");
        boolean det = true;
        int nalts = blk.alternatives.size();
        AlternativeBlock saveCurrentBlock = currentBlock;
        Alternative wildcardAlt = null;
        currentBlock = blk;

        /* don't allow nongreedy (...) blocks */
        if (blk.greedy == false && !(blk instanceof OneOrMoreBlock) && !(blk instanceof ZeroOrMoreBlock)) {
            tool.warning("Being nongreedy only makes sense for (...)+ and (...)*", grammar.getFilename(), blk.getLine(), blk.getColumn());
        }

        // SPECIAL CASE: only one alternative.  We don't need to check the
        // determinism, but other code expects the lookahead cache to be
        // set for the single alt.
        if (nalts == 1) {
            AlternativeElement e = blk.getAlternativeAt(0).head;
            currentBlock.alti = 0;
            blk.getAlternativeAt(0).cache[1] = e.look(1);
            blk.getAlternativeAt(0).lookaheadDepth = 1// set lookahead to LL(1)
            currentBlock = saveCurrentBlock;
            return true// always deterministic for one alt
        }

        outer:
            for (int i = 0; i < nalts - 1; i++) {
                currentBlock.alti = i;
                currentBlock.analysisAlt = i;  // which alt are we analyzing?
                currentBlock.altj = i + 1;    // reset this alt.  Haven't computed yet,
                // but we need the alt number.
                inner:
                    // compare against other alternatives with lookahead depth k
                    for (int j = i + 1; j < nalts; j++) {
                        currentBlock.altj = j;
                        if (DEBUG_ANALYZER) System.out.println("comparing " + i + " against alt " + j);
                        currentBlock.analysisAlt = j;  // which alt are we analyzing?
                        k = 1// always attempt minimum lookahead possible.

                        // check to see if there is a lookahead depth that distinguishes
                        // between alternatives i and j.
                        Lookahead[] r = new Lookahead[grammar.maxk + 1];
                        boolean haveAmbiguity;
                        do {
                            haveAmbiguity = false;
                            if (DEBUG_ANALYZER) System.out.println("checking depth " + k + "<=" + grammar.maxk);
                            Lookahead p,q;
                            p = getAltLookahead(blk, i, k);
                            q = getAltLookahead(blk, j, k);

                            // compare LOOK(alt i) with LOOK(alt j).  Is there an intersection?
                            // Lookahead must be disjoint.
                            if (DEBUG_ANALYZER) System.out.println("p is " + p.toString(",", charFormatter, grammar));
                            if (DEBUG_ANALYZER) System.out.println("q is " + q.toString(",", charFormatter, grammar));
                            // r[i] = p.fset.and(q.fset);
                            r[k] = p.intersection(q);
                            if (DEBUG_ANALYZER) System.out.println("intersection at depth " + k + " is " + r[k].toString());
                            if (!r[k].nil()) {
                                haveAmbiguity = true;
                                k++;
                            }
                            // go until no more lookahead to use or no intersection
                        } while (haveAmbiguity && k <= grammar.maxk);

                        Alternative ai = blk.getAlternativeAt(i);
                        Alternative aj = blk.getAlternativeAt(j);
                        if (haveAmbiguity) {
                            det = false;
                            ai.lookaheadDepth = NONDETERMINISTIC;
                            aj.lookaheadDepth = NONDETERMINISTIC;

                            /* if ith alt starts with a syntactic predicate, computing the
                             * lookahead is still done for code generation, but messages
                             * should not be generated when comparing against alt j.
                             * Alternatives with syn preds that are unnecessary do
                             * not result in syn pred try-blocks.
                             */
                            if (ai.synPred != null) {
                                if (DEBUG_ANALYZER) {
                                    System.out.println("alt " + i + " has a syn pred");
                                }
                                // The alt with the (...)=> block is nondeterministic for sure.
                                // If the (...)=> conflicts with alt j, j is nondeterministic.
                                // This prevents alt j from being in any switch statements.
                                // move on to next alternative=>no possible ambiguity!
                                //            continue inner;
                            }

                            /* if ith alt starts with a semantic predicate, computing the
                             * lookahead is still done for code generation, but messages
                             * should not be generated when comparing against alt j.
                             */
                            else if (ai.semPred != null) {
                                if (DEBUG_ANALYZER) {
                                    System.out.println("alt " + i + " has a sem pred");
                                }
                            }

                            /* if jth alt is exactly the wildcard or wildcard root of tree,
                             * then remove elements from alt i lookahead from alt j's lookahead.
                             * Don't do an ambiguity warning.
                             */
                            else if (altUsesWildcardDefault(aj)) {
                                // System.out.println("removing pred sets");
                                // removeCompetingPredictionSetsFromWildcard(aj.cache, aj.head, grammar.maxk);
                                wildcardAlt = aj;
                            }

                            /* If the user specified warnWhenFollowAmbig=false, then we
                             * can turn off this warning IFF one of the alts is empty;
                             * that is, it points immediately at the end block.
                             */
                            else if (!blk.warnWhenFollowAmbig &&
                                (ai.head instanceof BlockEndElement ||
                                aj.head instanceof BlockEndElement)) {
                                // System.out.println("ai.head pts to "+ai.head.getClass());
                                // System.out.println("aj.head pts to "+aj.head.getClass());
                            }

                            /* If they have the generateAmbigWarnings option off for the block
                             * then don't generate a warning.
                             */
                            else if (!blk.generateAmbigWarnings) {
                            }

                            /* If greedy=true and *one* empty alt shut off warning. */
                            else if (blk.greedySet && blk.greedy &&
                                ((ai.head instanceof BlockEndElement &&
                                !(aj.head instanceof BlockEndElement)) ||
                                (aj.head instanceof BlockEndElement &&
                                !(ai.head instanceof BlockEndElement)))) {
                                // System.out.println("greedy set to true; one alt empty");
                            }


                            /* We have no choice, but to report a nondetermism */
                            else {
                                tool.errorHandler.warnAltAmbiguity(
                                    grammar,
                                    blk, // the block
                                    lexicalAnalysis, // true if lexical
                                    grammar.maxk, // depth of ambiguity
                                    r, // set of linear ambiguities
                                    i, // first ambiguous alternative
                                    j        // second ambiguous alternative
                                );
                            }
                        }
                        else {
                            // a lookahead depth, k, was found where i and j do not conflict
                            ai.lookaheadDepth = Math.max(ai.lookaheadDepth, k);
                            aj.lookaheadDepth = Math.max(aj.lookaheadDepth, k);
                        }
                    }
            }

        // finished with block.

        // If had wildcard default clause idiom, remove competing lookahead
        /*
      if ( wildcardAlt!=null ) {
      removeCompetingPredictionSetsFromWildcard(wildcardAlt.cache, wildcardAlt.head, grammar.maxk);
      }
    */

        currentBlock = saveCurrentBlock;
        return det;
    }

    /**Is (...)+ block LL(1)?  Fill in alternative cache for this block.
     * @return true if the block is deterministic
     */
    public boolean deterministic(OneOrMoreBlock blk) {
        if (DEBUG_ANALYZER) System.out.println("deterministic(...)+(" + blk + ")");
        AlternativeBlock saveCurrentBlock = currentBlock;
        currentBlock = blk;
        boolean blkOk = deterministic((AlternativeBlock)blk);
        // block has been checked, now check that what follows does not conflict
        // with the lookahead of the (...)+ block.
        boolean det = deterministicImpliedPath(blk);
        currentBlock = saveCurrentBlock;
        return det && blkOk;
    }

    /**Is (...)* block LL(1)?  Fill in alternative cache for this block.
     * @return true if the block is deterministic
     */
    public boolean deterministic(ZeroOrMoreBlock blk) {
        if (DEBUG_ANALYZER) System.out.println("deterministic(...)*(" + blk + ")");
        AlternativeBlock saveCurrentBlock = currentBlock;
        currentBlock = blk;
        boolean blkOk = deterministic((AlternativeBlock)blk);
        // block has been checked, now check that what follows does not conflict
        // with the lookahead of the (...)* block.
        boolean det = deterministicImpliedPath(blk);
        currentBlock = saveCurrentBlock;
        return det && blkOk;
    }

    /**Is this (...)* or (...)+ block LL(k)?
     * @return true if the block is deterministic
     */
    public boolean deterministicImpliedPath(BlockWithImpliedExitPath blk) {
        /** The lookahead depth for this decision considering implied exit path */
        int k;
        boolean det = true;
        Vector alts = blk.getAlternatives();
        int nalts = alts.size();
        currentBlock.altj = -1// comparing against implicit optional/exit alt

        if (DEBUG_ANALYZER) System.out.println("deterministicImpliedPath");
        for (int i = 0; i < nalts; i++) {    // check follow against all alts
            Alternative alt = blk.getAlternativeAt(i);

            if (alt.head instanceof BlockEndElement) {
                tool.warning("empty alternative makes no sense in (...)* or (...)+", grammar.getFilename(), blk.getLine(), blk.getColumn());
            }

            k = 1;              // assume eac alt is LL(1) with exit branch
            // check to see if there is a lookahead depth that distinguishes
            // between alternative i and the exit branch.
            Lookahead[] r = new Lookahead[grammar.maxk + 1];
            boolean haveAmbiguity;
            do {
                haveAmbiguity = false;
                if (DEBUG_ANALYZER) System.out.println("checking depth " + k + "<=" + grammar.maxk);
                Lookahead p;
                Lookahead follow = blk.next.look(k);
                blk.exitCache[k] = follow;
                currentBlock.alti = i;
                p = getAltLookahead(blk, i, k);

                if (DEBUG_ANALYZER) System.out.println("follow is " + follow.toString(",", charFormatter, grammar));
                if (DEBUG_ANALYZER) System.out.println("p is " + p.toString(",", charFormatter, grammar));
                //r[k] = follow.fset.and(p.fset);
                r[k] = follow.intersection(p);
                if (DEBUG_ANALYZER) System.out.println("intersection at depth " + k + " is " + r[k]);
                if (!r[k].nil()) {
                    haveAmbiguity = true;
                    k++;
                }
                // go until no more lookahead to use or no intersection
            } while (haveAmbiguity && k <= grammar.maxk);

            if (haveAmbiguity) {
                det = false;
                alt.lookaheadDepth = NONDETERMINISTIC;
                blk.exitLookaheadDepth = NONDETERMINISTIC;
                Alternative ambigAlt = blk.getAlternativeAt(currentBlock.alti);

                /* If the user specified warnWhenFollowAmbig=false, then we
                 * can turn off this warning.
                 */
                if (!blk.warnWhenFollowAmbig) {
                }

                /* If they have the generateAmbigWarnings option off for the block
                 * then don't generate a warning.
                 */
                else if (!blk.generateAmbigWarnings) {
                }

                /* If greedy=true and alt not empty, shut off warning */
                else if (blk.greedy == true && blk.greedySet &&
                    !(ambigAlt.head instanceof BlockEndElement)) {
                    if (DEBUG_ANALYZER) System.out.println("greedy loop");
                }

                /* If greedy=false then shut off warning...will have
                 * to add "if FOLLOW break"
                 * block during code gen to compensate for removal of warning.
                 */
                else if (blk.greedy == false &&
                    !(ambigAlt.head instanceof BlockEndElement)) {
                    if (DEBUG_ANALYZER) System.out.println("nongreedy loop");
                    // if FOLLOW not single k-string (|set[k]| can
                    // be > 1 actually) then must warn them that
                    // loop may terminate incorrectly.
                    // For example, ('a'..'d')+ ("ad"|"cb")
                    if (!lookaheadEquivForApproxAndFullAnalysis(blk.exitCache, grammar.maxk)) {
                        tool.warning(new String[]{
                            "nongreedy block may exit incorrectly due",
                            "\tto limitations of linear approximate lookahead (first k-1 sets",
                            "\tin lookahead not singleton)."},
                                     grammar.getFilename(), blk.getLine(), blk.getColumn());
                    }
                }

                // no choice but to generate a warning
                else {
                    tool.errorHandler.warnAltExitAmbiguity(
                        grammar,
                        blk, // the block
                        lexicalAnalysis, // true if lexical
                        grammar.maxk, // depth of ambiguity
                        r, // set of linear ambiguities
                        i    // ambiguous alternative
                    );
                }
            }
            else {
                alt.lookaheadDepth = Math.max(alt.lookaheadDepth, k);
                blk.exitLookaheadDepth = Math.max(blk.exitLookaheadDepth, k);
            }
        }
        return det;
    }

    /**Compute the lookahead set of whatever follows references to
     * the rule associated witht the FOLLOW block.
     */
    public Lookahead FOLLOW(int k, RuleEndElement end) {
        // what rule are we trying to compute FOLLOW of?
        RuleBlock rb = (RuleBlock)end.block;
        // rule name is different in lexer
        String rule;
        if (lexicalAnalysis) {
            rule = CodeGenerator.encodeLexerRuleName(rb.getRuleName());
        }
        else {
            rule = rb.getRuleName();
        }

        if (DEBUG_ANALYZER) System.out.println("FOLLOW(" + k + "," + rule + ")");

        // are we in the midst of computing this FOLLOW already?
        if (end.lock[k]) {
            if (DEBUG_ANALYZER) System.out.println("FOLLOW cycle to " + rule);
            return new Lookahead(rule);
        }

        // Check to see if there is cached value
        if (end.cache[k] != null) {
            if (DEBUG_ANALYZER) {
                System.out.println("cache entry FOLLOW(" + k + ") for " + rule + ": " + end.cache[k].toString(",", charFormatter, grammar));
            }
            // if the cache is a complete computation then simply return entry
            if (end.cache[k].cycle == null) {
                return (Lookahead)end.cache[k].clone();
            }
            // A cache entry exists, but it is a reference to a cyclic computation.
            RuleSymbol rs = (RuleSymbol)grammar.getSymbol(end.cache[k].cycle);
            RuleEndElement re = rs.getBlock().endNode;
            // The other entry may not exist because it is still being
            // computed when this cycle cache entry was found here.
            if (re.cache[k] == null) {
                // return the cycle...that's all we can do at the moment.
                return (Lookahead)end.cache[k].clone();
            }
            else {
                if (DEBUG_ANALYZER) {
                    System.out.println("combining FOLLOW(" + k + ") for " + rule + ": from "+end.cache[k].toString(",", charFormatter, grammar) + " with FOLLOW for "+((RuleBlock)re.block).getRuleName()+": "+re.cache[k].toString(",", charFormatter, grammar));
                }
                // combine results from other rule's FOLLOW
                if ( re.cache[k].cycle==null ) {
                    // current rule depends on another rule's FOLLOW and
                    // it is complete with no cycle; just kill our cycle and
                    // combine full result from other rule's FOLLOW
                    end.cache[k].combineWith(re.cache[k]);
                    end.cache[k].cycle = null; // kill cycle as we're complete
                }
                else {
                    // the FOLLOW cache for other rule has a cycle also.
                    // Here is where we bubble up a cycle.  We better recursively
                    // wipe out cycles (partial computations).  I'm a little nervous
                    // that we might leave a cycle here, however.
                    Lookahead refFOLLOW = FOLLOW(k, re);
                    end.cache[k].combineWith( refFOLLOW );
                    // all cycles should be gone, but if not, record ref to cycle
                    end.cache[k].cycle = refFOLLOW.cycle;
                }
                if (DEBUG_ANALYZER) {
                    System.out.println("saving FOLLOW(" + k + ") for " + rule + ": from "+end.cache[k].toString(",", charFormatter, grammar));
                }
                // Return the updated cache entry associated
                // with the cycle reference.
                return (Lookahead)end.cache[k].clone();
            }
        }

        end.lock[k] = true// prevent FOLLOW computation cycles

        Lookahead p = new Lookahead();

        RuleSymbol rs = (RuleSymbol)grammar.getSymbol(rule);

        // Walk list of references to this rule to compute FOLLOW
        for (int i = 0; i < rs.numReferences(); i++) {
            RuleRefElement rr = rs.getReference(i);
            if (DEBUG_ANALYZER) System.out.println("next[" + rule + "] is " + rr.next.toString());
            Lookahead q = rr.next.look(k);
            if (DEBUG_ANALYZER) System.out.println("FIRST of next[" + rule + "] ptr is " + q.toString());
            /* If there is a cycle then if the cycle is to the rule for
       * this end block, you have a cycle to yourself.  Remove the
       * cycle indication--the lookahead is complete.
       */
            if (q.cycle != null && q.cycle.equals(rule)) {
                q.cycle = null// don't want cycle to yourself!
            }
            // add the lookahead into the current FOLLOW computation set
            p.combineWith(q);
            if (DEBUG_ANALYZER) System.out.println("combined FOLLOW[" + rule + "] is " + p.toString());
        }

        end.lock[k] = false; // we're not doing FOLLOW anymore

        // if no rules follow this, it can be a start symbol or called by a start sym.
        // set the follow to be end of file.
        if (p.fset.nil() && p.cycle == null) {
            if (grammar instanceof TreeWalkerGrammar) {
                // Tree grammars don't see EOF, they see end of sibling list or
                // "NULL TREE LOOKAHEAD".
                p.fset.add(Token.NULL_TREE_LOOKAHEAD);
            }
            else if (grammar instanceof LexerGrammar) {
                // Lexical grammars use Epsilon to indicate that the end of rule has been hit
                // EOF would be misleading; any character can follow a token rule not just EOF
                // as in a grammar (where a start symbol is followed by EOF).  There is no
                // sequence info in a lexer between tokens to indicate what is the last token
                // to be seen.
                // p.fset.add(EPSILON_TYPE);
                p.setEpsilon();
            }
            else {
                p.fset.add(Token.EOF_TYPE);
            }
        }

        // Cache the result of the FOLLOW computation
        if (DEBUG_ANALYZER) {
            System.out.println("saving FOLLOW(" + k + ") for " + rule + ": " + p.toString(",", charFormatter, grammar));
        }
        end.cache[k] = (Lookahead)p.clone();

        return p;
    }

    private Lookahead getAltLookahead(AlternativeBlock blk, int alt, int k) {
        Lookahead p;
        Alternative a = blk.getAlternativeAt(alt);
        AlternativeElement e = a.head;
        //System.out.println("getAltLookahead("+k+","+e+"), cache size is "+a.cache.length);
        if (a.cache[k] == null) {
            p = e.look(k);
            a.cache[k] = p;
        }
        else {
            p = a.cache[k];
        }
        return p;
    }

    /**Actions are ignored */
    public Lookahead look(int k, ActionElement action) {
        if (DEBUG_ANALYZER) System.out.println("lookAction(" + k + "," + action + ")");
        return action.next.look(k);
    }

    /**Combine the lookahead computed for each alternative */
    public Lookahead look(int k, AlternativeBlock blk) {
        if (DEBUG_ANALYZER) System.out.println("lookAltBlk(" + k + "," + blk + ")");
        AlternativeBlock saveCurrentBlock = currentBlock;
        currentBlock = blk;
        Lookahead p = new Lookahead();
        for (int i = 0; i < blk.alternatives.size(); i++) {
            if (DEBUG_ANALYZER) System.out.println("alt " + i + " of " + blk);
            // must set analysis alt
            currentBlock.analysisAlt = i;
            Alternative alt = blk.getAlternativeAt(i);
            AlternativeElement elem = alt.head;
            if (DEBUG_ANALYZER) {
                if (alt.head == alt.tail) {
                    System.out.println("alt " + i + " is empty");
                }
            }
            Lookahead q = elem.look(k);
            p.combineWith(q);
        }
        if (k == 1 && blk.not && subruleCanBeInverted(blk, lexicalAnalysis)) {
            // Invert the lookahead set
            if (lexicalAnalysis) {
                BitSet b = (BitSet)((LexerGrammar)grammar).charVocabulary.clone();
                int[] elems = p.fset.toArray();
                for (int j = 0; j < elems.length; j++) {
                    b.remove(elems[j]);
                }
                p.fset = b;
            }
            else {
                p.fset.notInPlace(Token.MIN_USER_TYPE, grammar.tokenManager.maxTokenType());
            }
        }
        currentBlock = saveCurrentBlock;
        return p;
    }

    /**Compute what follows this place-holder node and possibly
     * what begins the associated loop unless the
     * node is locked.
     * <p>
     * if we hit the end of a loop, we have to include
     * what tokens can begin the loop as well.  If the start
     * node is locked, then we simply found an empty path
     * through this subrule while analyzing it.  If the
     * start node is not locked, then this node was hit
     * during a FOLLOW operation and the FIRST of this
     * block must be included in that lookahead computation.
     */
    public Lookahead look(int k, BlockEndElement end) {
        if (DEBUG_ANALYZER) System.out.println("lookBlockEnd(" + k + ", " + end.block + "); lock is " + end.lock[k]);
        if (end.lock[k]) {
            // computation in progress => the tokens we would have
            // computed (had we not been locked) will be included
            // in the set by that computation with the lock on this
            // node.
            return new Lookahead();
        }

        Lookahead p;

        /* Hitting the end of a loop means you can see what begins the loop */
        if (end.block instanceof ZeroOrMoreBlock ||
            end.block instanceof OneOrMoreBlock) {
            // compute what can start the block,
            // but lock end node so we don't do it twice in same
            // computation.
            end.lock[k] = true;
            p = look(k, end.block);
            end.lock[k] = false;
        }
        else {
            p = new Lookahead();
        }

        /* Tree blocks do not have any follow because they are children
     * of what surrounds them.  For example, A #(B C) D results in
     * a look() for the TreeElement end of NULL_TREE_LOOKAHEAD, which
     * indicates that nothing can follow the last node of tree #(B C)
     */
        if (end.block instanceof TreeElement) {
            p.combineWith(Lookahead.of(Token.NULL_TREE_LOOKAHEAD));
        }

        /* Syntactic predicates such as ( (A)? )=> have no follow per se.
     * We cannot accurately say what would be matched following a
     * syntactic predicate (you MIGHT be ok if you said it was whatever
     * followed the alternative predicted by the predicate).  Hence,
     * (like end-of-token) we return Epsilon to indicate "unknown
     * lookahead."
     */
        else if (end.block instanceof SynPredBlock) {
            p.setEpsilon();
        }

        // compute what can follow the block
        else {
            Lookahead q = end.block.next.look(k);
            p.combineWith(q);
        }

        return p;
    }

    /**Return this char as the lookahead if k=1.
     * <p>### Doesn't work for ( 'a' 'b' | 'a' ~'b' ) yet!!!
     * <p>
     * If the atom has the <tt>not</tt> flag on, then
     * create the set complement of the tokenType
     * which is the set of all characters referenced
     * in the grammar with this char turned off.
     * Also remove characters from the set that
     * are currently allocated for predicting
     * previous alternatives.  This avoids ambiguity
     * messages and is more properly what is meant.
     * ( 'a' | ~'a' ) implies that the ~'a' is the
     * "else" clause.
     * <p>
     * NOTE: we do <b>NOT</b> include exit path in
     * the exclusion set. E.g.,
     * ( 'a' | ~'a' )* 'b'
     * should exit upon seeing a 'b' during the loop.
     */
    public Lookahead look(int k, CharLiteralElement atom) {
        if (DEBUG_ANALYZER) System.out.println("lookCharLiteral(" + k + "," + atom + ")");
        // Skip until analysis hits k==1
        if (k > 1) {
            return atom.next.look(k - 1);
        }
        if (lexicalAnalysis) {
            if (atom.not) {
                BitSet b = (BitSet)((LexerGrammar)grammar).charVocabulary.clone();
                if (DEBUG_ANALYZER) System.out.println("charVocab is " + b.toString());
                // remove stuff predicted by preceding alts and follow of block
                removeCompetingPredictionSets(b, atom);
                if (DEBUG_ANALYZER) System.out.println("charVocab after removal of prior alt lookahead " + b.toString());
                // now remove element that is stated not to be in the set
                b.clear(atom.getType());
                return new Lookahead(b);
            }
            else {
                return Lookahead.of(atom.getType());
            }
        }
        else {
            // Should have been avoided by MakeGrammar
            tool.panic("Character literal reference found in parser");
            // ... so we make the compiler happy
            return Lookahead.of(atom.getType());
        }
    }

    public Lookahead look(int k, CharRangeElement r) {
        if (DEBUG_ANALYZER) System.out.println("lookCharRange(" + k + "," + r + ")");
        // Skip until analysis hits k==1
        if (k > 1) {
            return r.next.look(k - 1);
        }
        BitSet p = BitSet.of(r.begin);
        for (int i = r.begin + 1; i <= r.end; i++) {
            p.add(i);
        }
        return new Lookahead(p);
    }

    public Lookahead look(int k, GrammarAtom atom) {
        if (DEBUG_ANALYZER) System.out.println("look(" + k + "," + atom + "[" + atom.getType() + "])");

        if (lexicalAnalysis) {
            // MakeGrammar should have created a rule reference instead
            tool.panic("token reference found in lexer");
        }
        // Skip until analysis hits k==1
        if (k > 1) {
            return atom.next.look(k - 1);
        }
        Lookahead l = Lookahead.of(atom.getType());
        if (atom.not) {
            // Invert the lookahead set against the token vocabulary
            int maxToken = grammar.tokenManager.maxTokenType();
            l.fset.notInPlace(Token.MIN_USER_TYPE, maxToken);
            // remove stuff predicted by preceding alts and follow of block
            removeCompetingPredictionSets(l.fset, atom);
        }
        return l;
    }

    /**The lookahead of a (...)+ block is the combined lookahead of
     * all alternatives and, if an empty path is found, the lookahead
     * of what follows the block.
     */
    public Lookahead look(int k, OneOrMoreBlock blk) {
        if (DEBUG_ANALYZER) System.out.println("look+" + k + "," + blk + ")");
        Lookahead p = look(k, (AlternativeBlock)blk);
        return p;
    }

    /**Combine the lookahead computed for each alternative.
     * Lock the node so that no other computation may come back
     * on itself--infinite loop.  This also implies infinite left-recursion
     * in the grammar (or an error in this algorithm ;)).
     */
    public Lookahead look(int k, RuleBlock blk) {
        if (DEBUG_ANALYZER) System.out.println("lookRuleBlk(" + k + "," + blk + ")");
        Lookahead p = look(k, (AlternativeBlock)blk);
        return p;
    }

    /**If not locked or noFOLLOW set, compute FOLLOW of a rule.
     * <p>
     * TJP says 8/12/99: not true anymore:
     * Lexical rules never compute follow.  They set epsilon and
     * the code generator gens code to check for any character.
     * The code generator must remove the tokens used to predict
     * any previous alts in the same block.
     * <p>
     * When the last node of a rule is reached and noFOLLOW,
     * it implies that a "local" FOLLOW will be computed
     * after this call.  I.e.,
     * <pre>
     *    a : b A;
     *    b : B | ;
     *    c : b C;
     * </pre>
     * Here, when computing the look of rule b from rule a,
     * we want only {B,EPSILON_TYPE} so that look(b A) will
     * be {B,A} not {B,A,C}.
     * <p>
     * if the end block is not locked and the FOLLOW is
     * wanted, the algorithm must compute the lookahead
     * of what follows references to this rule.  If
     * end block is locked, FOLLOW will return an empty set
     * with a cycle to the rule associated with this end block.
     */
    public Lookahead look(int k, RuleEndElement end) {
        if (DEBUG_ANALYZER)
            System.out.println("lookRuleBlockEnd(" + k + "); noFOLLOW=" +
                               end.noFOLLOW + "; lock is " + end.lock[k]);
        if (/*lexicalAnalysis ||*/ end.noFOLLOW) {
            Lookahead p = new Lookahead();
            p.setEpsilon();
            p.epsilonDepth = BitSet.of(k);
            return p;
        }
        Lookahead p = FOLLOW(k, end);
        return p;
    }

    /**Compute the lookahead contributed by a rule reference.
     *
     * <p>
     * When computing ruleref lookahead, we don't want the FOLLOW
     * computation done if an empty path exists for the rule.
     * The FOLLOW is too loose of a set...we want only to
     * include the "local" FOLLOW or what can follow this
     * particular ref to the node.  In other words, we use
     * context information to reduce the complexity of the
     * analysis and strengthen the parser.
     *
     * The noFOLLOW flag is used as a means of restricting
     * the FOLLOW to a "local" FOLLOW.  This variable is
     * orthogonal to the <tt>lock</tt> variable that prevents
     * infinite recursion.  noFOLLOW does not care about what k is.
     */
    public Lookahead look(int k, RuleRefElement rr) {
        if (DEBUG_ANALYZER) System.out.println("lookRuleRef(" + k + "," + rr + ")");
        RuleSymbol rs = (RuleSymbol)grammar.getSymbol(rr.targetRule);
        if (rs == null || !rs.defined) {
            tool.error("no definition of rule " + rr.targetRule, grammar.getFilename(), rr.getLine(), rr.getColumn());
            return new Lookahead();
        }
        RuleBlock rb = rs.getBlock();
        RuleEndElement end = rb.endNode;
        boolean saveEnd = end.noFOLLOW;
        end.noFOLLOW = true;
        // go off to the rule and get the lookahead (w/o FOLLOW)
        Lookahead p = look(k, rr.targetRule);
        if (DEBUG_ANALYZER) System.out.println("back from rule ref to " + rr.targetRule);
        // restore state of end block
        end.noFOLLOW = saveEnd;

        // check for infinite recursion.  If a cycle is returned: trouble!
        if (p.cycle != null) {
            tool.error("infinite recursion to rule " + p.cycle + " from rule " +
                       rr.enclosingRuleName, grammar.getFilename(), rr.getLine(), rr.getColumn());
        }

        // is the local FOLLOW required?
        if (p.containsEpsilon()) {
            if (DEBUG_ANALYZER)
                System.out.println("rule ref to " +
                                   rr.targetRule + " has eps, depth: " + p.epsilonDepth);

            // remove epsilon
            p.resetEpsilon();
            // fset.clear(EPSILON_TYPE);

            // for each lookahead depth that saw epsilon
            int[] depths = p.epsilonDepth.toArray();
            p.epsilonDepth = null;    // clear all epsilon stuff
            for (int i = 0; i < depths.length; i++) {
                int rk = k - (k - depths[i]);
                Lookahead q = rr.next.look(rk)// see comments in Lookahead
                p.combineWith(q);
            }
            // note: any of these look() computations for local follow can
            // set EPSILON in the set again if the end of this rule is found.
        }

        return p;
    }

    public Lookahead look(int k, StringLiteralElement atom) {
        if (DEBUG_ANALYZER) System.out.println("lookStringLiteral(" + k + "," + atom + ")");
        if (lexicalAnalysis) {
            // need more lookahead than string can provide?
            if (k > atom.processedAtomText.length()) {
                return atom.next.look(k - atom.processedAtomText.length());
            }
            else {
                // get char at lookahead depth k, from the processed literal text
                return Lookahead.of(atom.processedAtomText.charAt(k - 1));
            }
        }
        else {
            // Skip until analysis hits k==1
            if (k > 1) {
                return atom.next.look(k - 1);
            }
            Lookahead l = Lookahead.of(atom.getType());
            if (atom.not) {
                // Invert the lookahead set against the token vocabulary
                int maxToken = grammar.tokenManager.maxTokenType();
                l.fset.notInPlace(Token.MIN_USER_TYPE, maxToken);
            }
            return l;
        }
    }

    /**The lookahead of a (...)=> block is the lookahead of
     * what follows the block.  By definition, the syntactic
     * predicate block defies static analysis (you want to try it
     * out at run-time).  The LOOK of (a)=>A B is A for LL(1)
     * ### is this even called?
     */
    public Lookahead look(int k, SynPredBlock blk) {
        if (DEBUG_ANALYZER) System.out.println("look=>(" + k + "," + blk + ")");
        return blk.next.look(k);
    }

    public Lookahead look(int k, TokenRangeElement r) {
        if (DEBUG_ANALYZER) System.out.println("lookTokenRange(" + k + "," + r + ")");
        // Skip until analysis hits k==1
        if (k > 1) {
            return r.next.look(k - 1);
        }
        BitSet p = BitSet.of(r.begin);
        for (int i = r.begin + 1; i <= r.end; i++) {
            p.add(i);
        }
        return new Lookahead(p);
    }

    public Lookahead look(int k, TreeElement t) {
        if (DEBUG_ANALYZER)
            System.out.println("look(" + k + "," + t.root + "[" + t.root.getType() + "])");
        if (k > 1) {
            return t.next.look(k - 1);
        }
        Lookahead l = null;
        if (t.root instanceof WildcardElement) {
            l = t.root.look(1); // compute FIRST set minus previous rows
        }
        else {
            l = Lookahead.of(t.root.getType());
            if (t.root.not) {
                // Invert the lookahead set against the token vocabulary
                int maxToken = grammar.tokenManager.maxTokenType();
                l.fset.notInPlace(Token.MIN_USER_TYPE, maxToken);
            }
        }
        return l;
    }

    public Lookahead look(int k, WildcardElement wc) {
        if (DEBUG_ANALYZER) System.out.println("look(" + k + "," + wc + ")");

        // Skip until analysis hits k==1
        if (k > 1) {
            return wc.next.look(k - 1);
        }

        BitSet b;
        if (lexicalAnalysis) {
            // Copy the character vocabulary
            b = (BitSet)((LexerGrammar)grammar).charVocabulary.clone();
        }
        else {
            b = new BitSet(1);
            // Invert the lookahead set against the token vocabulary
            int maxToken = grammar.tokenManager.maxTokenType();
            b.notInPlace(Token.MIN_USER_TYPE, maxToken);
            if (DEBUG_ANALYZER) System.out.println("look(" + k + "," + wc + ") after not: " + b);
        }

        // Remove prediction sets from competing alternatives
        // removeCompetingPredictionSets(b, wc);

        return new Lookahead(b);
    }

    /** The (...)* element is the combined lookahead of the alternatives and what can
     *  follow the loop.
     */
    public Lookahead look(int k, ZeroOrMoreBlock blk) {
        if (DEBUG_ANALYZER) System.out.println("look*(" + k + "," + blk + ")");
        Lookahead p = look(k, (AlternativeBlock)blk);
        Lookahead q = blk.next.look(k);
        p.combineWith(q);
        return p;
    }

    /**Compute the combined lookahead for all productions of a rule.
     * If the lookahead returns with epsilon, at least one epsilon
     * path exists (one that consumes no tokens).  The noFOLLOW
     * flag being set for this endruleblk, indicates that the
     * a rule ref invoked this rule.
     *
     * Currently only look(RuleRef) calls this.  There is no need
     * for the code generator to call this.
     */
    public Lookahead look(int k, String rule) {
        if (DEBUG_ANALYZER) System.out.println("lookRuleName(" + k + "," + rule + ")");
        RuleSymbol rs = (RuleSymbol)grammar.getSymbol(rule);
        RuleBlock rb = rs.getBlock();

        if (rb.lock[k]) {
            if (DEBUG_ANALYZER)
                System.out.println("infinite recursion to rule " + rb.getRuleName());
            return new Lookahead(rule);
        }

        // have we computed it before?
        if (rb.cache[k] != null) {
            if (DEBUG_ANALYZER) {
                System.out.println("found depth " + k + " result in FIRST " + rule + " cache: " +
                                   rb.cache[k].toString(",", charFormatter, grammar));
            }
            return (Lookahead)rb.cache[k].clone();
        }

        rb.lock[k] = true;
        Lookahead p = look(k, (RuleBlock)rb);
        rb.lock[k] = false;

        // cache results
        rb.cache[k] = (Lookahead)p.clone();
        if (DEBUG_ANALYZER) {
            System.out.println("saving depth " + k + " result in FIRST " + rule + " cache: " +
                               rb.cache[k].toString(",", charFormatter, grammar));
        }
        return p;
    }

    /** If the first k-1 sets are singleton sets, the appoximate
     *  lookahead analysis is equivalent to full lookahead analysis.
     */
    public static boolean lookaheadEquivForApproxAndFullAnalysis(Lookahead[] bset, int k) {
        // first k-1 sets degree 1?
        for (int i = 1; i <= k - 1; i++) {
            BitSet look = bset[i].fset;
            if (look.degree() > 1) {
                return false;
            }
        }
        return true;
    }

    /** Remove the prediction sets from preceding alternatives
     * and follow set, but *only* if this element is the first element
     * of the alternative.  The class members currenBlock and
     * currentBlock.analysisAlt must be set correctly.
     * @param b The prediction bitset to be modified
     * @el The element of interest
     */
    private void removeCompetingPredictionSets(BitSet b, AlternativeElement el) {
        // Only do this if the element is the first element of the alt,
        // because we are making an implicit assumption that k==1.
        GrammarElement head = currentBlock.getAlternativeAt(currentBlock.analysisAlt).head;
        // if element is #(. blah) then check to see if el is root
        if (head instanceof TreeElement) {
            if (((TreeElement)head).root != el) {
                return;
            }
        }
        else if (el != head) {
            return;
        }
        for (int i = 0; i < currentBlock.analysisAlt; i++) {
            AlternativeElement e = currentBlock.getAlternativeAt(i).head;
            b.subtractInPlace(e.look(1).fset);
        }
    }

    /** Remove the prediction sets from preceding alternatives
     * The class members currenBlock must be set correctly.
     * Remove prediction sets from 1..k.
     * @param look The prediction lookahead to be modified
     * @el The element of interest
     * @k  How deep into lookahead to modify
     */
    private void removeCompetingPredictionSetsFromWildcard(Lookahead[] look, AlternativeElement el, int k) {
        for (int d = 1; d <= k; d++) {
            for (int i = 0; i < currentBlock.analysisAlt; i++) {
                AlternativeElement e = currentBlock.getAlternativeAt(i).head;
                look[d].fset.subtractInPlace(e.look(d).fset);
            }
        }
    }

    /** reset the analyzer so it looks like a new one */
    private void reset() {
        grammar = null;
        DEBUG_ANALYZER = false;
        currentBlock = null;
        lexicalAnalysis = false;
    }

    /** Set the grammar for the analyzer */
    public void setGrammar(Grammar g) {
        if (grammar != null) {
            reset();
        }
        grammar = g;

        // Is this lexical?
        lexicalAnalysis = (grammar instanceof LexerGrammar);
        DEBUG_ANALYZER = grammar.analyzerDebug;
    }

    public boolean subruleCanBeInverted(AlternativeBlock blk, boolean forLexer) {
        if (
            blk instanceof ZeroOrMoreBlock ||
            blk instanceof OneOrMoreBlock ||
            blk instanceof SynPredBlock
        ) {
            return false;
        }
        // Cannot invert an empty subrule
        if (blk.alternatives.size() == 0) {
            return false;
        }
        // The block must only contain alternatives with a single element,
        // where each element is a char, token, char range, or token range.
        for (int i = 0; i < blk.alternatives.size(); i++) {
            Alternative alt = blk.getAlternativeAt(i);
            // Cannot have anything interesting in the alternative ...
            if (alt.synPred != null || alt.semPred != null || alt.exceptionSpec != null) {
                return false;
            }
            // ... and there must be one simple element
            AlternativeElement elt = alt.head;
            if (
                !(
                elt instanceof CharLiteralElement ||
                elt instanceof TokenRefElement ||
                elt instanceof CharRangeElement ||
                elt instanceof TokenRangeElement ||
                (elt instanceof StringLiteralElement && !forLexer)
                ) ||
                !(elt.next instanceof BlockEndElement) ||
                elt.getAutoGenType() != GrammarElement.AUTO_GEN_NONE
            ) {
                return false;
            }
        }
        return true;
    }
}
TOP

Related Classes of org.jostraca.comp.antlr.LLkAnalyzer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.