Source Code of com.creativewidgetworks.goldparser.engine.Parser

package com.creativewidgetworks.goldparser.engine;


import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.io.StringReader;
import java.util.Map;
import java.util.Stack;
import java.util.TreeMap;


import com.creativewidgetworks.goldparser.engine.enums.AdvanceMode;
import com.creativewidgetworks.goldparser.engine.enums.CGTRecord;
import com.creativewidgetworks.goldparser.engine.enums.EndingMode;
import com.creativewidgetworks.goldparser.engine.enums.LRActionType;
import com.creativewidgetworks.goldparser.engine.enums.ParseMessage;
import com.creativewidgetworks.goldparser.engine.enums.ParseResult;
import com.creativewidgetworks.goldparser.engine.enums.SymbolType;
import com.creativewidgetworks.goldparser.util.FormatHelper;


/**
 * Parser 
 *
 * This is the main class in the GOLD Parser Engine and is used to perform
 * all duties required to the parsing of a source text string. This class
 * contains the LALR(1) State Machine code, the DFA State Machine code,
 * character table (used by the DFA algorithm) and all other structures and
 * methods needed to interact with the developer.
 * 
 * Dependencies: 
 * @see Group
 * @see GroupList
 * @see FStateList
 * @see LRState
 * @see Position
 * @see Production
 * @see ProductionList
 * @see Reduction
 * @see Symbol
 * @see SymbolList
 * @see Token
 *
 * Note that several class fields are marked as protected instead of private. This was done
 * to avoid having to add getters() used only for testing.  If and when the tests are
 * refactored to use reflection to probe these fields, they can once again be marked
 * protected.
 *
 * @author Devin Cook (http://www.DevinCook.com/GOLDParser)
 * @author Ralph Iden (http://www.creativewidgetworks.com), port to Java
 * @version 5.0.0
 */
public class Parser {
    
    // Standard attribute names
    public static final String ABOUT             = "About";
    public static final String AUTHOR            = "Author";
    public static final String CASE_SENSITIVE    = "Case Sensitive";
    public static final String CHARACTER_MAPPING = "Character Mapping";
    public static final String CHARACTER_SET     = "Character Set";
    public static final String GENERATED_BY      = "Generated By";
    public static final String GENERATED_DATE    = "Generated Date";
    public static final String NAME              = "Name";
    public static final String START_SYMBOL      = "Start Symbol";
    public static final String VERSION           = "Version";
   
    public static final String PARSER_NAME = "GOLD Parser Engine - Version ";
    public static final String PARSER_VERSION = "5.0.3";


    // Flag to indicate which grammar table file is being processed
    protected boolean version1Format;
    
    // Symbols recognized by the system
    protected SymbolList symbolTable;


    // DFA
    protected FAStateList dfa;
    protected CharacterSetList characterSetTable;
    protected StringBuilder lookaheadBuffer;
    
    // Productions
    protected ProductionList productionTable;
    
    // LALR
    protected LRStateList lrStates;
    private int currentLALR;
    protected Stack<Token> stack;


    // Fields for Reductions and errors
    private SymbolList expectedSymbols; 
    protected boolean haveReduction;
    private boolean trimReductions;      
    
    // Locally used fields
    private boolean tablesLoaded;
    private Stack<Token> inputTokens; // Tokens to be analyzed


    // Input reader for the source code to parse
    protected Reader source;
    
    // Line and column information
    private Position sysPosition;      // Internal only, so user cannot alter values
    private Position currentPosition;  // Location of last read terminal


    // Grammar attributes
    protected Map<String, String> attributes;
    
    // Lexical groups
    private Stack<Token> groupStack;
    protected GroupList groupTable;
    
    public Parser() {
        stack = new Stack<Token>();
        inputTokens = new Stack<Token>();
        groupStack = new Stack<Token>();
    }
    
    /*----------------------------------------------------------------------------*/


    /**
     * Return library name and version information
     * @return version information
     */
    public String about() {
        return PARSER_NAME + PARSER_VERSION;
    }


    /*----------------------------------------------------------------------------*/


    /**
     * Consume/remove characters from the front of the lookahead buffer
     * and adjust the value of the system Position object.
     * @param count the number of characters to consume
     */
    private void consumeBuffer(int count) {
        if (count > 0 && count <= lookaheadBuffer.length()) {
            // Adjust position
            for (int i = 0; i < count; i++) {
                char c = lookaheadBuffer.charAt(i);
                if (c == 0x0A) {
                    if (sysPosition.getColumn() > 1) {
                        // Increment row if Unix EOLN (LF)
                        sysPosition.incrementLine();
                    }
                } else if (c == 0x0D) {
                    sysPosition.incrementLine();
                } else {
                    sysPosition.incrementColumn();
                }
            }
            
            // Remove the characters
            lookaheadBuffer.delete(0, count);
        }
    }


    /*----------------------------------------------------------------------------*/


    public String getAttribute(String name) {
        return getAttribute(name, null);
    }
    
    public String getAttribute(String name, String defaultValue) {
        if (attributes == null) {
            attributes = new TreeMap<String, String>();
        }
        
        String value = attributes.get(name);
        return value == null ? defaultValue : value;
    }
    
    public void setAttribute(String name, String value) {
        if (attributes == null) {
            attributes = new TreeMap<String, String>();
        }
        attributes.put(name, value);
    }
    
    /*----------------------------------------------------------------------------*/


    public Position getCurrentPosition() {
        return currentPosition;
    }
    
    /*----------------------------------------------------------------------------*/
    
    /**
     * Return the last token read by the parser
     * @return the last token read by the parser.
     */
    protected Token getCurrentToken() {
        return inputTokens.peek();
    }


    /*----------------------------------------------------------------------------*/


    /**
     * When parse() returns a REDUCE, the method will return the current reduction.
     * @return the reduction
     */
    public Reduction getCurrentReduction() {
        return haveReduction ? stack.peek().asReduction() : null;
    }
    
    protected void setCurrentReduction(Reduction reduction) {
        if (haveReduction) {
            stack.peek().setData(reduction);
        }
    }
    
    /*----------------------------------------------------------------------------*/
    
    /**
     * Return the list of expected symbols
     * @Token the last token read by the parser.
     */
    public SymbolList getExpectedSymbols() {
        return expectedSymbols;
    }
    
    /*----------------------------------------------------------------------------*/
    
    /**
     * Searches the symbol table for the first occurrence of the the
     * specified symbol type.
     * @param SymbolTypeTest to find
     * @return Symbol, the first symbol of type or null if no symbol found
     */
    private Symbol getFirstSymbolOfType(SymbolType type) {
        for (Symbol symbol : symbolTable) {
            if (symbol.getType().equals(type)) {
                return symbol;
            }
        }
        return null;
    }


    /*----------------------------------------------------------------------------*/
    
    /**
     * Searches the symbol table for a specific symbol specified by the symbol name.
     * @param name of symbol to find
     * @return Symbol, the symbol or null if no symbol found
     */
    protected Symbol getSymbolByName(String name) {
        if (symbolTable != null) {
            for (Symbol symbol : symbolTable) {
                if (symbol.getName().equals(name)) {
                    return symbol;
                }
            }
        }
        return null;
    }
    
    /*----------------------------------------------------------------------------*/


    /**
     * Return the parser's current position (row, column)
     * @return Position
     */
    public Position getPosition() {
        return currentPosition;
    }
    
    /*----------------------------------------------------------------------------*/


    protected Reader getSource() {
        return source;
    }
    
    /*----------------------------------------------------------------------------*/


    protected boolean isVersion1Format() {
        return version1Format;
    }
    
    protected void setVersion1Format(boolean version1) {
        version1Format = version1;
    }
    
    /*----------------------------------------------------------------------------*/


    /**
     * Loads the parse tables from the specified file.
     * NOTE: Only CGT version 5.0 is supported.
     * @param file to open and load.
     * @return true if the file was successfully processed.
     * @throws IOException
     */
    protected boolean loadTables(File file) throws IOException {
        if (file == null) {
            throw new IOException(FormatHelper.formatMessage("messages", "error.cgt_missing"));
        }
        return loadTables(new FileInputStream(file));
    }


    /**
     * Loads the parse tables from the specified input stream. The inputstream will
     * be closed when the method returns.
     * NOTE: Only CGT version 5.0 is supported.
     * @param input stream to read.
     * @return true if the stream was successfully processed.
     * @throws IOException
     */
    protected boolean loadTables(InputStream input) throws IOException {
        boolean result = true;
        int index;
            
        CGT cgt = new CGT();
        CharacterSet characterSet;
        
        try {
            cgt.open(input);
            
            restart();
            tablesLoaded = false;
            
            while (result) {
                cgt.getNextRecord();
                if (cgt.atEOF()) {
                    break;
                }


                int recordType = cgt.retrieveByte();
                // System.out.println(CGTRecord.getCGTRecord(recordType));
                
                switch (CGTRecord.getCGTRecord(recordType)) {
                    case PARAMETER:
                        version1Format = true;
                        setAttribute(NAME, cgt.retrieveString());
                        setAttribute(VERSION, cgt.retrieveString());
                        setAttribute(AUTHOR, cgt.retrieveString());
                        setAttribute(ABOUT, cgt.retrieveString());
                        setAttribute(CASE_SENSITIVE, Boolean.toString(cgt.retrieveBoolean()));
                        setAttribute(START_SYMBOL, Integer.toString(cgt.retrieveInteger()));                        
                        break;
                
                    case PROPERTY:
                        // Index (not used), name, value
                        version1Format = false;
                        cgt.retrieveInteger();  // Index (not used)
                        setAttribute(cgt.retrieveString(), cgt.retrieveString());
                        break;
                      
                    // Counts for Symbols, Rules, DFA, and LALR lists    
                    case COUNTS:
                    case COUNTS5:
                        symbolTable = new SymbolList(cgt.retrieveInteger());
                        characterSetTable = new CharacterSetList(cgt.retrieveInteger());
                        productionTable = new ProductionList(cgt.retrieveInteger());
                        dfa = new FAStateList(cgt.retrieveInteger());
                        lrStates = new LRStateList(cgt.retrieveInteger());
                        if (!isVersion1Format()) {
                            groupTable = new GroupList(cgt.retrieveInteger());
                        } else {
                            // Create an empty table in case GROUP_START/END pairs follow
                            groupTable = new GroupList();
                        }
                        break;


                    // Character set     
                    case CHARSET:
                        index = cgt.retrieveInteger();
                        characterSet = new CharacterSet();
                        characterSetTable.set(index, characterSet);     
                        characterSet.add(new CharacterRange(cgt.retrieveString()));
                        break;
                        
                    // Character range     
                    case CHARRANGES:
                        index = cgt.retrieveInteger();
                        cgt.retrieveInteger(); // codepage
                        cgt.retrieveInteger(); // total sets
                        cgt.retrieveEntry(); // reserved


                        characterSet = new CharacterSet();
                        characterSetTable.set(index, characterSet);
                        while (!cgt.isRecordComplete()) {
                            characterSet.add(new CharacterRange(cgt.retrieveInteger(), cgt.retrieveInteger()));
                        }


                        break;                        
                        
                    // Symbols    
                    case SYMBOL:
                        index = cgt.retrieveInteger();
                        String name = cgt.retrieveString();
                        SymbolType type = SymbolType.getSymbolType(cgt.retrieveInteger());
                        Symbol symbol = new Symbol(name, type, index);
                        symbolTable.set(index, symbol);
                        break;


                    // Rules (productions)    
                    case RULE:
                        index = cgt.retrieveInteger();
                        int headIndex = cgt.retrieveInteger();
                        cgt.retrieveEntry();  // Reserved
                        
                        Production production = new Production(symbolTable.get(headIndex), index);
                        productionTable.set(index, production);
                        while (!cgt.isRecordComplete()) {
                            int symIndex = cgt.retrieveInteger();
                            production.getHandle().add(symbolTable.get(symIndex));
                        }
                        
                        break;                        


                    // Initial states for DFA and LALR
                    case INITIALSTATES:
                        dfa.setInitialState(cgt.retrieveInteger());
                        lrStates.setInitialState(cgt.retrieveInteger());
                        break;                        
                        
                    // Groups   
                    case GROUP:
                        index = cgt.retrieveInteger();


                        Group group = new Group();
                        group.setName(cgt.retrieveString());
                        group.setContainer(symbolTable.get(cgt.retrieveInteger()));
                        group.setStart(symbolTable.get(cgt.retrieveInteger()));
                        group.setEnd(symbolTable.get(cgt.retrieveInteger()));
                        group.setAdvanceMode(AdvanceMode.getAdvanceMode(cgt.retrieveInteger()));
                        group.setEndingMode(EndingMode.getEndingMode(cgt.retrieveInteger()));


                        cgt.retrieveEntry(); // Reserved                        
                        
                        // Nesting levels
                        int count = cgt.retrieveInteger();
                        for (int i = 0; i < count; i++) {
                            group.getNesting().add(cgt.retrieveInteger());
                        }
  
                        // Link back
                        group.getContainer().setGroup(group);
                        group.getStart().setGroup(group);
                        group.getEnd().setGroup(group);
                        
                        groupTable.set(index, group);
                        break;
                        
                    case GROUPNESTING:
                        break;
        
                    case DFASTATE:
                        index = cgt.retrieveInteger();
                        boolean accept = cgt.retrieveBoolean();
                        int acceptIndex = cgt.retrieveInteger();
                        cgt.retrieveEntry(); // Reserved


                        if (accept) {
                            dfa.set(index, new FAState(symbolTable.get(acceptIndex)));
                        } else {
                            dfa.set(index, new FAState());
                        }


                        while (!cgt.isRecordComplete()) {
                            int setIndex = cgt.retrieveInteger();
                            int target = cgt.retrieveInteger();
                            cgt.retrieveEntry();  // Reserved
                            dfa.get(index).getEdges().add(new FAEdge(characterSetTable.get(setIndex), target));
                        }
                        
                        break;
                        
                    case LRSTATE:
                        index = cgt.retrieveInteger();
                        cgt.retrieveEntry();  // Reserved
                        
                        LRState lrState = new LRState();
                        lrStates.set(index, lrState);
                        while (!cgt.isRecordComplete()) {
                            symbol = symbolTable.get(cgt.retrieveInteger());
                            LRActionType actionType = LRActionType.getLRActionType(cgt.retrieveInteger());
                            int value = cgt.retrieveInteger();
                            cgt.retrieveEntry();  // Reserved
                            lrState.add(new LRAction(symbol,actionType, value));
                        }
                        
                        break;
                        
                    case UNDEFINED:
                        throw new IOException("Unknown record type of " + recordType + " was read.");
                }
            }
        } finally {
            cgt.close();
        }
        
        tablesLoaded = result;
        
        resolveCommentGroupsForVersion1Grammars();
        
        return result;
    }
    
    /*----------------------------------------------------------------------------*/


    /**
     * Return a single character at charIndex. This method will read and fill the
     * buffer as needed from the source stream. 
     * @param charIndex offset of the lookahead buffer.
     * @return char that was read or "" EOF has been reached.
     */
    private String lookahead(int charIndex) {
        if (charIndex >= 0) {
            if (charIndex > lookaheadBuffer.length()) {
                // Requesting data past the end of stream, so perform a read
                int readCount = charIndex - lookaheadBuffer.length();
                for (int i = 0; i < readCount; i++) {
                    int c;
                    try {
                        c = source.read();
                    } catch (IOException ioe) {
                        c = -1;
                    }
                    if (c != -1) {
                        lookaheadBuffer.append((char)c);
                    } else {
                        break; // EOF reached
                    }
                }
            }
            
            // If the buffer is still smaller than charIndex, we have reached
            // the end of the text. In this case, return a null string - the DFA
            // code will understand.
            return (charIndex <= lookaheadBuffer.length()) ? String.valueOf(lookaheadBuffer.charAt(charIndex - 1)) : "";
        }
        
        return "";
    }
    
    /*----------------------------------------------------------------------------*/


    /**
     * Return count characters from the lookahead buffer. 
     * These characters are used to create the text stored in a token. Because of  
     * the design of the DFA algorithm, count should never exceed the buffer length.
     * @param count number of characters to return
     * @return String 
     */
    private String getLookaheadBuffer(int count) {
        if (count > lookaheadBuffer.length()) {
            count = lookaheadBuffer.length();
        }
        
        return count > 0 ? lookaheadBuffer.substring(0, count) : "";
    }
    
    /*----------------------------------------------------------------------------*/


    /**
     * This method implements the DFA for the parser's lexer. A
     * token is generated which is used by the LALR state machine.
     * @return Token
     */
    private Token lookaheadDFA() {
        Token token = new Token();
     
        int currentDFA = dfa.getInitialState();
        
        int curPosition = 1;            // Next byte in the input stream
        int lastAcceptState = -1;       // Nothing has been accepted yet
        int lastAcceptPosition = -1;
        int target = 0;
        
        String str = lookahead(1);
        if (str.length() > 0) {
            boolean found;
            boolean done = false;
            while (!done) {
                // Search all the branches of the current DFA state for the next 
                // character in the input stream. If found, the target state is returned.
                str = lookahead(curPosition);
                if (str.length() == 0) {
                    found = false;
                } else {
                    found = false;
                    for (int i = 0; !found && i < dfa.get(currentDFA).getEdges().size(); i++) {
                        FAEdge edge = dfa.get(currentDFA).getEdges().get(i);
                        if (edge.getChars().contains(str.charAt(0))) {
                            found = true;
                            target = edge.getTarget();
                        }
                    }
                }


                // This block checks whether an edge was found from the current state. If so, the 
                // state and current position advance. Otherwise it is time to exit the main loop 
                // and report the token found (if there was one). If the LastAcceptState is -1, then 
                // we never found a match and the Error Token is created. Otherwise, a new token is 
                // created using the Symbol in the Accept State and all the characters that comprise it.
                if (found) {
                    // This code checks whether the target state accepts a token. If so, it sets
                    // the appropriate variables so when the algorithm is done, it can return the
                    // proper token and number of characters.
                    if (dfa.get(target).getAccept() != null) {
                        lastAcceptState = target;
                        lastAcceptPosition = curPosition;
                    }
                    currentDFA = target;
                    curPosition++;
                } else {
                    // No edge found
                    done = true;
                    if (lastAcceptState == -1) {
                        // Lexer doesn't recognize the symbol
                        token.setSymbol(getFirstSymbolOfType(SymbolType.ERROR));
                        token.setData(getLookaheadBuffer(1));
                    } else {
                        // Create Token and read characters
                        // Data contains the total number of accept characters
                        token.setSymbol(dfa.get(lastAcceptState).getAccept());
                        token.setData(getLookaheadBuffer(lastAcceptPosition));
                    }
                }
            }
        } else {
            token.setData("");
            token.setSymbol(getFirstSymbolOfType(SymbolType.END));
        }


        token.setPosition(new Position(sysPosition));
        
        return token;
    }


    /*----------------------------------------------------------------------------*/


    /**
     * Open a file to be parsed
     * @param sourceFile to be parsed
     * @return true if the file is ready to be parsed
     * @throws IOException if file is not available.
     */
    protected boolean open(File sourceFile) throws IOException {
        return open(new FileReader(sourceFile));
    }
    
    /**
     * Prepare the parser to process the source contained in the String.
     * @param sourceStatements, the code to parse
     * @return true if the source is ready to be parsed.
     */
    protected boolean open(String sourceStatements) {
        return open(new StringReader(sourceStatements));
    }
    
    /**
     * Open the reader to be parsed
     * @param reader that will be used to read the source code to parse.
     * @return true if the reader is ready to be parsed.
     */
    protected boolean open(Reader reader) {
        restart();
        source = reader;
        stack.push(new Token());
        return true;
    }


    /*----------------------------------------------------------------------------*/


    /**
     * Returns the next token in the stream -- This method can be overridden to support
     * virtual terminals (indentation sensitive grammars, etc.)
     */
    protected Token nextToken() {
        return produceToken();
    }
    
    /**
     * Performs a parse action on the input stream. This method is typically used in a loop until 
     * either the grammar is accepted or an error occurs.
     * @return ParseMessage
     */
    protected ParseMessage parse() {
        if (!tablesLoaded) {
            return ParseMessage.NOT_LOADED_ERROR;
        }


        Token read;
        ParseMessage parseMessage = ParseMessage.UNDEFINED;
        
        // Loop until a breakable event
        boolean done = false;
        while (!done) {
            if (inputTokens.size() == 0) {
                read = nextToken();
                inputTokens.push(read);
                
                // Handle the case where an unterminated comment block consumes the entire program
                if (SymbolType.END.equals(read.getType()) && groupStack.size() > 0) {
                    // Runaway group
                    parseMessage = ParseMessage.GROUP_ERROR;                    
                } else {
                    // A good token was read
                    parseMessage = ParseMessage.TOKEN_READ;
                }
                
                done = true;
            } else {
                read = inputTokens.peek();
                currentPosition.set(read.getPosition());  // Update current position


                if (SymbolType.NOISE.equals(read.getType())) {
                    // Discard token - these tokens were already reported to the user
                    inputTokens.pop();
                } else if (SymbolType.ERROR.equals(read.getType())) {
                    parseMessage = ParseMessage.LEXICAL_ERROR;
                    done = true;
                } else if (SymbolType.END.equals(read.getType()) && groupStack.size() > 0) {
                    // Runaway group
                    parseMessage = ParseMessage.GROUP_ERROR;
                    done = true;                    
                } else {
                    ParseResult parseResult = parseLALR(read);  // Same method as v1
                    switch (parseResult) {
                        case ACCEPT:
                            parseMessage = ParseMessage.ACCEPT;
                            done = true;
                            break;
                            
                        case INTERNAL_ERROR:
                            parseMessage = ParseMessage.INTERNAL_ERROR;
                            done = true;
                            break;


                        case REDUCE_NORMAL:
                            parseMessage = ParseMessage.REDUCTION;
                            done = true;
                            break;


                        case SHIFT:
                            // ParseToken() shifted the token on the front of the Token-Queue. It 
                            // now exists on the Token-Stack and must be eliminated from the queue.
                            inputTokens.remove(0);
                            break;
                            
                        case SYNTAX_ERROR:
                            parseMessage = ParseMessage.SYNTAX_ERROR;
                            done = true;
                            break;
                            
                        case REDUCE_ELIMINATED:  // fall through intended
                        case UNDEFINED:
                            // do nothing
                            break;
                    }
                }
            }
        }
        
        return parseMessage;
    }
        
    /*----------------------------------------------------------------------------*/


    /**
     * This method analyzes a token and either:
     *   1. Makes a SINGLE reduction and pushes a complete Reduction object on the stack
     *   2. Accepts the token and shifts
     *   3. Errors and places the expected symbol indexes in the Tokens list.
     *   
     * @param nextToken to be analyzed
     * @return ParseResult  
     */
    private ParseResult parseLALR(Token nextToken) {
        ParseResult parseResult = null;


        haveReduction = false;
        
        LRAction parseAction = lrStates.get(currentLALR).find(nextToken);
        //System.out.println("Action: " + parseAction.toString());
        
        switch (parseAction.getType()) {
            case ACCEPT:
                haveReduction = true;
                parseResult = ParseResult.ACCEPT;
                break;
                
            case REDUCE:
                // Produce a reduction - remove as many tokens as members in the rule and push a nonterminal token
                Production production = productionTable.get(parseAction.getValue());
                
                Token head;
                if (trimReductions && production.containsOneNonTerminal()) {
                    // The current rule only consists of a single nonterminal and can be trimmed from the
                    // parse tree. Usually we create a new Reduction, assign it to the Data property
                    // of Head and push it on the m_Stack. However, in this case, the Data property of the
                    // Head will be assigned the Data property of the reduced token (i.e. the only one
                    // on the m_Stack). To save code, the value popped of the m_Stack is changed into the head.
                    head = stack.pop();
                    head.setSymbol(production.getHead());
                    parseResult = ParseResult.REDUCE_ELIMINATED;
                } else {
                    haveReduction = true;
                    Reduction newReduction = new Reduction(production.getHandle().size());
                    newReduction.setParent(production);
                    for (int i = production.getHandle().size() - 1; i >= 0; i--) {
                        newReduction.set(i, stack.pop());
                    }
                    head = new Token(production.getHead(), newReduction);
                    parseResult = ParseResult.REDUCE_NORMAL;
                }
                
                // goto value
                int index = stack.peek().getState();


                LRAction lrAction = lrStates.get(index).find(production.getHead());
                if (!lrAction.equals(LRState.LRACTION_UNDEFINED)) {
                    currentLALR = lrAction.getValue();
                    head.setState(currentLALR);
                    stack.push(head);
                } else {
                    parseResult = ParseResult.INTERNAL_ERROR;
                }
                
                break;
                
            case SHIFT:
                currentLALR = parseAction.getValue();
                nextToken.setState(currentLALR);
                stack.push(nextToken);
                parseResult = ParseResult.SHIFT;
                break;
                
            case ERROR:      // fall-through intended
            case GOTO:       // fall-through intended         
            case UNDEFINED:
                // Syntax error - produce a list of expected symbols to report
                expectedSymbols.clear();
                for (LRAction action : lrStates.get(currentLALR)) {
                    SymbolType type = action.getSymbol().getType();
                    switch (type) {
                        case CONTENT:       // fall-through intended
                        case END:           // fall-through intended
                            expectedSymbols.add(action.getSymbol());
                            break;


                        case GROUP_START:   // fall-through intended
                        case GROUP_END:     // fall-through intended
                        case COMMENT_LINE:
                            expectedSymbols.add(action.getSymbol());
                            break;
                            
                        case ERROR:
                        case NOISE:
                        case NON_TERMINAL:
                        case UNDEFINED:
                            // do nothing
                            break;
                    }
                }
                
                parseResult = ParseResult.SYNTAX_ERROR;
                break;
        }
        
        return parseResult;
    }
    
    /*----------------------------------------------------------------------------*/


    /**
     * This method creates a token and also takes into account the current
     * lexing mode of the parser. In particular, it contains the group logic. 
     *
     * A stack is used to track the current "group". This replaces the comment
     * level counter. Text is appended to the token on the top of the stack. This 
     * allows the group text to returned in one chunk.
     * @return Token
     */
    protected Token produceToken() {
        Token token = null;


        boolean nestGroup = false;
        
        boolean done = false;
        while (!done) {
            Token read = lookaheadDFA();
            
            // Groups (comments, etc.)
            // The logic - to determine if a group should be nested - requires that the top 
            // of the stack and the symbol's linked group need to be looked at. Both of these 
            // can be unset. So, this section sets a boolean and avoids errors. We will use 
            // this boolean in the logic chain below. 
            if (read.getType().equals(SymbolType.GROUP_START) || read.getType().equals(SymbolType.COMMENT_LINE)) {
                if (groupStack.size() == 0) {
                    nestGroup = true;
                } else {
                    nestGroup = groupStack.peek().getGroup().getNesting().contains(read.getGroup().getIndex());
                }
            } else {
                nestGroup = false;
            }


            // Logic chain
            if (nestGroup) {
                consumeBuffer(read.asString().length());
                
                // fix up the comment block
                if (read.getData() != null) {
                    read.appendData(read.getData().toString());
                    read.setData(null);
                }
                
                groupStack.push(read);                
            } else if (groupStack.size() == 0) {
                // The token is ready to be analyzed
                consumeBuffer(read.asString().length());
                token = read;
                done = true;
            } else if (groupStack.peek().getGroup().getEnd().getTableIndex() == read.getTableIndex()) {
                // End the current group
                Token pop = groupStack.pop();


                // Ending logic
                if (pop.getGroup().getEndingMode() == EndingMode.CLOSED) {
                    pop.appendData(read.asString());
                    consumeBuffer(read.asString().length());
                }
                
                if (groupStack.size() == 0) {
                    // We are out of the group. Return pop'd token which contains all the group text    
                    pop.setSymbol(pop.getGroup().getContainer());
                    token = pop;
                    done = true;
                } else {
                    // Append group text to parent
                    groupStack.peek().appendData(pop.asString());
                }
            } else if (read.getType().equals(SymbolType.END)) {
                // EOF always stops the loop. The caller method (parse) can flag a runaway group error.
                token = read;
                done = true;
            } else {
                // We are in a group, Append to the Token on the top of the stack.
                // Take into account the Token group mode 
                Token top = groupStack.peek();
                if (top.getGroup().getAdvanceMode() == AdvanceMode.TOKEN) {
                    // Append all text
                    top.appendData(read.asString());
                    consumeBuffer(read.asString().length());
                } else {
                    // Append one character
                    top.appendData(read.asString().substring(0, 1));
                    consumeBuffer(1);
                }
            }                
        }
        
        return token;
    }
    
    /*----------------------------------------------------------------------------*/


    /**
     * Inserts Group objects into the group table so comments can be processed in a 
     * grammar.  It is assumed that version 1.0 files have a maximum of 1 closed
     * comment block and one comment line symbol.
     */
    private void resolveCommentGroupsForVersion1Grammars() {
        if (isVersion1Format()) {
            Group group;
            Symbol symbolStart = null;
            Symbol symbolEnd = null;
            
            // Create a new COMMENT_LINE group
            for (Symbol currentStartSymbol : symbolTable) {
                if (currentStartSymbol.getType().equals(SymbolType.COMMENT_LINE)) {
                    symbolStart = currentStartSymbol;
                    group = new Group();
                    group.setName("Comment Line");
                    group.setContainer(symbolTable.findByName(SymbolList.SYMBOL_COMMENT));
                    group.setStart(symbolStart);
                    group.setEnd(symbolTable.findByName("NewLine"));
                    group.setAdvanceMode(AdvanceMode.TOKEN);
                    group.setEndingMode(EndingMode.OPEN);
                    groupTable.add(group);
                    symbolStart.setGroup(group);
                    break;
                }
            }


            // Create a new COMMENT_BLOCK group
            for (Symbol currentStartSymbol : symbolTable) {
                if (currentStartSymbol.getType().equals(SymbolType.GROUP_START)) {
                    symbolStart = symbolEnd = currentStartSymbol;
                    for (Symbol currentEndSymbol : symbolTable) {
                        if (currentEndSymbol.getType().equals(SymbolType.GROUP_END)) { 
                            symbolEnd = currentEndSymbol;
                            break;
                        }
                    }    
                    group = new Group();
                    group.setName("Comment Block");
                    group.setContainer(symbolTable.findByName(SymbolList.SYMBOL_COMMENT));
                    group.setStart(symbolStart);
                    group.setEnd(symbolEnd);
                    group.setAdvanceMode(AdvanceMode.TOKEN);
                    group.setEndingMode(EndingMode.CLOSED);
                    groupTable.add(group);
                    
                    symbolStart.setGroup(group);                         
                    symbolEnd.setGroup(group);                         
                    
                    break;
                }
            }
        }
    }
    
    /*----------------------------------------------------------------------------*/
    
    /**
     * Restarts the parser. The loaded tables are retained
     */
    protected void restart() {
        currentLALR = LRState.INITIAL_STATE;
        
        sysPosition = new Position(1, 1);
        currentPosition = new Position(1, 1);
        
        lookaheadBuffer = new StringBuilder();
        
        haveReduction = false;
        
        if (expectedSymbols == null) {
            expectedSymbols = new SymbolList();
        }
        expectedSymbols.clear();


        if (groupStack == null) {
            groupStack = new Stack<Token>();
        }
        groupStack.clear();


        if (inputTokens == null) {
            inputTokens = new Stack<Token>();
        }
        inputTokens.clear();


        if (stack == null) {
            stack = new Stack<Token>();
        }
        stack.clear();
    }
    
    /*----------------------------------------------------------------------------*/


    public void setTrimReductions(boolean value) {
        trimReductions = value;
    }
}
Source Code of com.creativewidgetworks.goldparser.engine.Parser

Related Classes of com.creativewidgetworks.goldparser.engine.Parser