Package beaver.spec

Source Code of beaver.spec.GrammarBuilder$DeclarationWalker

/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* This file is part of Beaver Parser Generator.                       *
* Copyright (C) 2003-2009 Alexander Demenchuk <alder@softanvil.com>.  *
* All rights reserved.                                                *
* See the file "LICENSE" for the terms and conditions for copying,    *
* distribution and modification of Beaver.                            *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */

package beaver.spec;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Set;
import java.util.Iterator;

import beaver.Symbol;
import beaver.comp.util.Log;
import beaver.spec.Production.RHS;
import beaver.spec.ast.Declaration;
import beaver.spec.ast.GrammarTreeRoot;
import beaver.spec.ast.Rule;
import beaver.spec.ast.TreeWalker;

/**
*
*/
public class GrammarBuilder extends TreeWalker
{
  /**
   * Specialization that visits only Rules
   */
  static class RuleWalker extends TreeWalker
  {
    public void visit(GrammarTreeRoot node)
    {
      for (int i = 0; i < node.rules.length; i++)
      {
        node.rules[i].accept(this);
      }
    }
  }
 
  /**
   * Specialization that visits only Declarations
   */
  static class DeclarationWalker extends TreeWalker
  {
    public void visit(GrammarTreeRoot node)
    {
      for (int i = 0; i < node.declarations.length; i++)
      {
        node.declarations[i].accept(this);
      }
    }
  }
 
  /**
   * Checks whether braces in Java code are balanced, to make sure that a
   * generated parser will be compilable.
   *
   * @param code Java code where braces are checked
   * @return true if braces are balanced
   */
  static private boolean checkBraces(String code)
  {
    boolean ovr = false;
    int n = 0, len = code.length();
    for (int i = 0; i < len; i++)
    {
      char c = code.charAt(i);
      if (c == '{')
        n++;
      else if (c == '}')
        n--;
      if (n < 0)
      {
        ovr = true;
      }
    }
    return !ovr && n == 0;
  }
 
  static private String trimCode(String code)
  {
    if (code != null)
    {
          int i = code.length();
      do { --i; } while (i >= 0 && Character.isWhitespace(code.charAt(i)));
      code = code.substring(0, i + 1);
    }
    return code;
  }

  /** "Destination" to report our progress */
  private Log log;
 
  /** The "thing" we build here. */
  private Grammar grammar;

  /** Maps symbol names to grammar symbols */
  private HashMap symbols;
 
  /** Number of non-terminal symbols defined by the grammar */
  private int n_nonterms;
 
  /** Number of terminals in the grammar */
  private int n_terms;
 
  /** Number of production rules in the grammar */
  private int n_rules;
 
  public GrammarBuilder(Log log)
  {
    this.log = log;
  }
 
  public Grammar getGrammar()
  {
    return grammar;
  }
 
  public void visit(GrammarTreeRoot root)
  {
    grammar = new Grammar();

    symbols = new HashMap(89);
    symbols.put(grammar.error.name, grammar.error);
   
    n_nonterms = 1; // error (above)
    n_terms = 1; // implicit EOF
    n_rules = 0;

    final HashMap tokens = new HashMap(89);
        final ArrayList goals = new ArrayList();
   
    root.accept(new TreeWalker() /* collect terminals, nonterminals and "virtual" symbols */
    {
      public void visit(Declaration.Terminals decl)
      {
        for (int i = 0; i < decl.symbols.length; i++)
        {
          String sym_name = (String) decl.symbols[i].value;
          if (!symbols.containsKey(sym_name))
          {
            symbols.put(sym_name, new Terminal(sym_name));
            n_terms++;
            tokens.put(sym_name, decl.symbols[i]);
          }
        }
      }
      public void visit(Rule rule)
      {
        String lhs_sym_name = rule.getLHSSymbolName();
        if (!symbols.containsKey(lhs_sym_name))
        {
          symbols.put(lhs_sym_name, new NonTerminal(lhs_sym_name));
          n_nonterms++;
        }
        else if (tokens.containsKey(lhs_sym_name))
        {
          log.error(rule.lhs_sym, "nonterminal was declared as a terminal");
          symbols.put(lhs_sym_name, new NonTerminal(lhs_sym_name));
          n_nonterms++;
          tokens.remove(lhs_sym_name);
          n_terms--;
        }
        super.visit(rule);
      }
      public void visit(Rule.Definition rhs)
      {
        String prec_sym_name = rhs.getPrecedenceSymbolName();
        if (prec_sym_name != null && !symbols.containsKey(prec_sym_name))
        {
          GrammarSymbol sym = new Terminal(prec_sym_name);
          sym.id = -1; // "virtual"
          symbols.put(prec_sym_name, sym);
        }
        n_rules++;
      }
    });
    root.accept(new RuleWalker() /* "collect" undefined symbols */
    {
      public void visit(Rule.Definition.Element rhs_item)
      {
        String rhs_sym_name = rhs_item.getName();
        GrammarSymbol rhs_sym = (GrammarSymbol) symbols.get(rhs_sym_name);
        if (rhs_sym == null)
        {
          log.error(rhs_item.sym_name, "symbol is neither a terminal nor a nonterminal of the grammar");
          symbols.put(rhs_sym_name, rhs_sym = new Terminal(rhs_sym_name));
          rhs_sym.id = -1; // make it a "virtual" terminal
        }
        else if (rhs_sym instanceof Terminal)
        {
          if (rhs_sym.id < 0)
            log.error(rhs_item.sym_name, "symbol is not declared as a grammar terminal");
          else
            tokens.remove(rhs_sym_name);
        }
      }
    });
    for (Iterator i = tokens.values().iterator(); i.hasNext();)
    {
      Symbol token = (Symbol) i.next();
      log.warning(token, "declared terminal is not used by the grammar");
      symbols.remove(token.value);
      n_terms--;
    }
    root.accept(new DeclarationWalker()
    {
      /** Next unused highest precedence */
      private int precedence = Integer.MAX_VALUE;
      private Set imports = new HashSet();
      private Set impls = new HashSet();
     
      public void visit(GrammarTreeRoot root)
      {
        imports.add(Grammar.EBNF_LIST_TYPE);
        imports.add("beaver.*");
        super.visit(root);
        grammar.imports = (String[]) imports.toArray(new String[imports.size()]);
        grammar.impls = (String[]) impls.toArray(new String[impls.size()]);
      }
      public void visit(Declaration.Header decl)
      {
        if (grammar.prolog == null)
        {
          String text = (String) decl.code.value;
          int i = 0;
          char c;
          while (Character.isWhitespace(c = text.charAt(i)) || c == '\n') i++;
         
          grammar.prolog = i > 0 ? text.substring(i) : text;
        }
        else
        {
          grammar.prolog += (String) decl.code.value;
        }
      }
      public void visit(Declaration.PackageName decl)
      {
        if (grammar.package_name != null)
        {
          log.warning(decl.name, "Parser package has been already defined as \"" + grammar.package_name + "\", new name ignored.");
        }
        else
        {
          grammar.package_name = decl.getName();
        }
      }
      public void visit(Declaration.Implements decl)
      {
        for (int i = 0; i < decl.symbols.length; i++)
        {
          impls.add(decl.symbols[i].value);
        }
      }
      public void visit(Declaration.Imports decl)
      {
        for (int i = 0; i < decl.symbols.length; i++)
        {
          imports.add(decl.symbols[i].value);
        }
      }
      public void visit(Declaration.ClassName decl)
      {
        if (grammar.class_name != null)
        {
          log.warning(decl.name, "Parser class name has been already defined as \"" + grammar.class_name + "\", new name ignored.");
        }
        else
        {
          grammar.class_name = decl.getName();
        }
      }
      public void visit(Declaration.ClassCode decl)
      {
        if (grammar.class_code != null)
        {
          log.warning(decl.code, "Embedded parser class code has been already defined, new code ignored.");
        }
        else
        {
          grammar.class_code = trimCode(getCode(decl));
        }
      }
      public void visit(Declaration.ConstructorCode decl)
      {
        if (grammar.init_code != null)
        {
          log.warning(decl.code, "Parser initialization code has been already defined, new code ignored.");
        }
        else
        {
          grammar.init_code = trimCode(getCode(decl));
        }
      }
      public void visit(Declaration.Goal decl)
      {
        String sym_name = decl.getName();
        GrammarSymbol sym = (GrammarSymbol) symbols.get(sym_name);
        if (sym == null)
        {
          log.error(decl.name, "Symbol is undefined");
        }
        else if (sym instanceof Terminal)
        {
          log.error(decl.name, "Symbol is a terminal");
        }
        else
        {
          goals.add(sym);
        }
      }
      public void visit(Declaration.TypeOf decl)
      {
        String type = decl.getTypeName();
        for (int i = 0; i < decl.symbols.length; i++)
        {
          GrammarSymbol sym = (GrammarSymbol) symbols.get(decl.symbols[i].value);
          if (sym == null)
          {
            log.error(decl.symbols[i], "Symbol is undefined");
          }
          else if (sym.type != null)
          {
            log.error(decl.symbols[i], "Symbol's Java type is already set to \"" + sym.type + "\"");
          }
          else
          {
            sym.type = type;
          }
        }
      }
      public void visit(Declaration.LeftAssoc decl)
      {
        setPrecedence(decl, Terminal.Associativity.LEFT);
      }
      public void visit(Declaration.RightAssoc decl)
      {
        setPrecedence(decl, Terminal.Associativity.RIGHT);
      }
      public void visit(Declaration.NonAssoc decl)
      {
        setPrecedence(decl, Terminal.Associativity.NONE);
      }
     
      private void setPrecedence(Declaration.SymbolsContainer decl, Terminal.Associativity type)
      {
        for (int i = 0; i < decl.symbols.length; i++)
        {
          String sym_name = (String) decl.symbols[i].value;
          GrammarSymbol sym = (GrammarSymbol) symbols.get(sym_name);
          if (sym == null)
          {
            log.warning(decl.symbols[i], "Symbol is not used by the grammar");
          }
          else if (sym instanceof NonTerminal)
          {
            log.error(decl.symbols[i], "Symbol is a non-terminal.");
          }
          else
          {
            ((Terminal) sym).setPrecedence(precedence, type);
          }
        }
        precedence--;
      }
     
      private String getCode(Declaration.CodeContainer decl)
      {
        String code = decl.getCode();
        if (!checkBraces(code))
        {
          log.warning(decl, "Java code has unbalanced braces");
        }
        return code;
      }
    });

        final ArrayList rules = new ArrayList(n_rules * 2); // twice the size to provide space for synthetic rules
       
        if (goals.isEmpty())
        {
            log.warning(root.rules[0].lhs_sym, "Grammar has not declared any goals, will use first declared nonterminal");
            grammar.goal_symbol = (NonTerminal) symbols.get(root.rules[0].getLHSSymbolName());
        }
        else if (goals.size() == 1) // conventional path
        {
            grammar.goal_symbol = (NonTerminal) goals.get(0);
        }
        else // parser needs a synthetic goal
        {
            NonTerminal[] alts = (NonTerminal[]) goals.toArray(new NonTerminal[goals.size()]);
           
            grammar.goal_symbol = new NonTerminal("$goal");
            symbols.put(grammar.goal_symbol.name, grammar.goal_symbol);
            n_nonterms++;

            rules.add(new Production(rules.size(), grammar.goal_symbol, new Production.RHS(alts[0])));
           
            for (int i = 1; i < alts.length; i++)
            {
                Terminal term = new Terminal("$" + alts[i].name);
                symbols.put(term.name, term);
                n_terms++;
                rules.add(new Production(rules.size(), grammar.goal_symbol, new Production.RHS(term, alts[i])));
            }
        }

    root.accept(new RuleWalker() /* grammar's goal cannot be used in any RHS */
    {
      boolean found = false;
     
      public void visit(GrammarTreeRoot root)
      {
        super.visit(root);
        if (found)
        {
          NonTerminal new_goal_sym = new NonTerminal("$goal");         
          new_goal_sym.type = grammar.goal_symbol.type;
          symbols.put(new_goal_sym.name, new_goal_sym);
          n_nonterms++;

          rules.add(new Production(rules.size(), new_goal_sym, new Production.RHS(grammar.goal_symbol)));

          grammar.goal_symbol = new_goal_sym;
        }
      }
     
      public void visit(Rule.Definition.Element rhs_item)
      {
        if (!found)
        {
          found = grammar.goal_symbol.name.equals(rhs_item.getName());
        }
      }
    });
    root.accept(new RuleWalker() /* build production rules */
    {
      private NonTerminal lhs_sym;
      private ArrayList rhs_elements = new ArrayList();
     
      public void visit(Rule rule)
      {
        lhs_sym = (NonTerminal) symbols.get(rule.getLHSSymbolName());
        super.visit(rule);
      }
     
      public void visit(Rule.Definition rhs)
      {
        rhs_elements.clear();
       
        super.visit(rhs);

        Production rule = new Production(rules.size(),
                                         lhs_sym,
                         new Production.RHS((Production.RHS.Item[]) rhs_elements.toArray(new Production.RHS.Item[rhs_elements.size()])),
                         rhs.getPrecedenceSymbolName() == null ? null : (Terminal) symbols.get(rhs.getPrecedenceSymbolName()));
        String code = rhs.getReduceActionCode();
        if (code != null)
        {
          if (!checkBraces(code))
          {
            log.warning(rhs.code, "Java code has unbalanced braces");
          }
          rule.code = trimCode(code);
        }
        if (rhs.elements.length > 0)
        {
          rule.start_pos = rhs.elements[0].getStart();
          rule.end_pos = rhs.elements[rhs.elements.length - 1].getEnd();
        }
        rules.add(rule);
      }
     
      public void visit(Rule.Definition.Element rhs_item)
      {
        GrammarSymbol rhs_sym = rhs_item.ebnf_sym.value == null ? (GrammarSymbol) symbols.get(rhs_item.getName()) : getExtendedSymbol(rhs_item);
         
        rhs_elements.add(new Production.RHS.Item(rhs_sym, rhs_item.getAlias()));
      }
     
      private GrammarSymbol getExtendedSymbol(Rule.Definition.Element rhs_item)
      {
        switch (rhs_item.getExtUseMark())
        {
          case '?': return getOpt(rhs_item.getName());
          case '+': return getLst(rhs_item.getName());
          case '*': return getOpt(getLst(rhs_item.getName()).name);
        }
        throw new IllegalArgumentException("unrecognized extended symbol notation");
      }
     
      private NonTerminal getOpt(String sym_name)
      {
        String opt_sym_name = "opt$" + sym_name;
        NonTerminal opt_sym = (NonTerminal) symbols.get(opt_sym_name);
        if (opt_sym == null)
        {
          GrammarSymbol item_sym = (GrammarSymbol) symbols.get(sym_name);
          symbols.put(opt_sym_name, opt_sym = new NonTerminal(opt_sym_name, item_sym.type));
          n_nonterms++;
          rules.add(new Production(rules.size(), opt_sym, new Production.RHS()));
          rules.add(new Production(rules.size(), opt_sym, new Production.RHS(item_sym)));
        }
        return opt_sym;
      }
     
      private NonTerminal getLst(String sym_name)
      {
        String lst_sym_name = "lst$" + sym_name;
        NonTerminal lst_sym = (NonTerminal) symbols.get(lst_sym_name);
        if (lst_sym == null)
        {
          GrammarSymbol item_sym = (GrammarSymbol) symbols.get(sym_name);
          symbols.put(lst_sym_name, lst_sym = new NonTerminal(lst_sym_name, item_sym.type != null ? "+" + item_sym.type : null));
          n_nonterms++;

          rules.add(new Production(rules.size(), lst_sym, new Production.RHS(item_sym)));
          rules.add(new Production(rules.size(), lst_sym, new Production.RHS(lst_sym, item_sym)));
        }
        return lst_sym;
      }
    });
   
    grammar.rules = (Production[]) rules.toArray(new Production[rules.size()]);
    grammar.nonterminals = getNonTerminals();
    grammar.terminals = getTerminals();

        propagateTypes(grammar.nonterminals);
    writeListsCode(grammar.nonterminals);
  }
 
  private Terminal[] getTerminals()
  {
    Production[] rules = new Production[grammar.rules.length];
    System.arraycopy(grammar.rules, 0, rules, 0, rules.length);
    Arrays.sort(rules, Production.NUM_TERM_CMP);
   
    Terminal[] terms = new Terminal[n_terms];
    terms[0] = grammar.eof;
    int n = 1;
    for (int i = 0; i < rules.length; i++)
    {
      RHS rhs = rules[i].rhs;
      if (rhs.n_term > 0)
      {
        for (int j = 0; j < rhs.items.length; j++)
        {
          GrammarSymbol sym = rhs.items[j].symbol;
          if (sym instanceof Terminal && sym.id == 0)
          {
            Terminal term = (Terminal) sym;
            term.id = (short) n;
            terms[n++] = term;
          }
        }
      }
    }
    if (n < n_terms)
      throw new IllegalStateException("found less terminals than previously counted");
   
    return terms;
  }
 
  private NonTerminal[] getNonTerminals()
  {
    Production[] rules = new Production[grammar.rules.length];
    System.arraycopy(grammar.rules, 0, rules, 0, rules.length);
    Arrays.sort(rules, Production.NUM_NONTERM_CMP);
   
    NonTerminal[] nts = new NonTerminal[n_nonterms];
    int n = 0;
    for (int i = 0; i < rules.length; i++)
    {
      RHS rhs = rules[i].rhs;
      if (rhs.n_nonterm > 0)
      {
        for (int j = 0; j < rhs.items.length; j++)
        {
          GrammarSymbol sym = rhs.items[j].symbol;
          if (sym instanceof NonTerminal && sym.id == 0)
          {
            NonTerminal nt = (NonTerminal) sym;
            nt.id = (short) (n + n_terms);
            nts[n++] = nt;
          }
        }
      }
    }
    grammar.goal_symbol.id = (short) (n + n_terms);
    nts[n++] = grammar.goal_symbol;
    if (grammar.error.id == 0)
    {
      grammar.error.id = (short) (n + n_terms);
      nts[n++] = grammar.error;
    }
    if (n < n_nonterms)
      throw new IllegalStateException("found less nonterminals than previously counted");

    return nts;
  }
   
    private void propagateTypes(NonTerminal[] nts)
    {
        boolean more_found;
        do {
            more_found = false;
           
            for (int i = 0; i < nts.length; i++)
            {
                if (nts[i].type != null)
                    continue;
               
                if (nts[i].definitions.size() != 2)
                {
                    if (nts[i].definitions.size() == 1)
                    {
                        Production rule = nts[i].definitions.start();
                        if (rule.code == null && rule.rhs.size() == 1)
                        {
                            GrammarSymbol item = rule.rhs.start().symbol;
                            if (item.type != null)
                            {
                                nts[i].type = item.type;
                                more_found = true;
                            }
                        }
                    }
                    continue;
                }
               
                Production elem_rule = nts[i].definitions.start();
                if (elem_rule.rhs.size() != 1)
                {
                    if ((elem_rule = elem_rule.next_definition).rhs.size() != 1)
                        continue;
                }
                if (elem_rule.code != null)
                    continue;

                GrammarSymbol elem = elem_rule.rhs.start().symbol;
                if (elem.type == null)
                    continue;
               
                Production next_rule = elem_rule.next_definition != null ? elem_rule.next_definition : nts[i].definitions.start();
                if (next_rule.code != null)
                    continue;

                if (next_rule.rhs.size() == 0)
                {
                    nts[i].type = elem.type;
                    more_found = true;
                }
                else if (next_rule.rhs.size() >= 2 &&  next_rule.rhs.start().symbol == nts[i] && next_rule.rhs.end().symbol == elem)
                {
                    nts[i].type = "+" + elem.type;
                    more_found = true;
                }
            }           
        }
        while (more_found);
    }
 
  private void writeListsCode(NonTerminal[] nts)
  {
    for (int i = 0; i < nts.length; i++)
    {
      if (nts[i].definitions.size() != 2)
        continue;
     
      Production new_list_rule = nts[i].definitions.start();
      if (new_list_rule.rhs.size() != 1)
      {
        if ((new_list_rule = new_list_rule.next_definition).rhs.size() != 1)
          continue;
      }
      if (new_list_rule.code != null)
        continue;
      GrammarSymbol elem = new_list_rule.rhs.start().symbol;
     
      Production add_elem_rule = nts[i].definitions.start();
      if (add_elem_rule.rhs.size() < 2)
      {
        if ((add_elem_rule = add_elem_rule.next_definition).rhs.size() < 2)
          continue;
      }
      if (add_elem_rule.code != null)
        continue;
      if (add_elem_rule.rhs.start().symbol != nts[i] || add_elem_rule.rhs.end().symbol != elem)
        continue;
     
      if (nts[i].type == null)
      {
        nts[i].type = "+" + (elem.type != null ? elem.type : "beaver.Symbol");
      }
      new_list_rule.code = new StringBuffer(Grammar.EBNF_LIST_TYPE_NAME.length() * 2 + 77)
        .append(Grammar.EBNF_LIST_TYPE_NAME).append(" lst = new ").append(Grammar.EBNF_LIST_TYPE_NAME).append("(); ")
        .append("lst.add(_symbols[offset + 1]").append(elem.type != null ? ".value" : "").append("); ")
        .append("return new Symbol(lst);")
        .toString();
      add_elem_rule.code = new StringBuffer(Grammar.EBNF_LIST_TYPE_NAME.length() + 88)
      .append("((").append(Grammar.EBNF_LIST_TYPE_NAME).append(") _symbols[offset + 1].value).add(_symbols[offset + ").append(add_elem_rule.rhs.size()).append("]").append(elem.type != null ? ".value" : "").append("); ")
      .append("return _symbols[offset + 1];")
      .toString();
    }
  }
}
TOP

Related Classes of beaver.spec.GrammarBuilder$DeclarationWalker

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.