Source Code of org.antlr.v4.parse.TokenVocabParser

/*
 * [The "BSD license"]
 *  Copyright (c) 2012 Terence Parr
 *  Copyright (c) 2012 Sam Harwell
 *  All rights reserved.
 *
 *  Redistribution and use in source and binary forms, with or without
 *  modification, are permitted provided that the following conditions
 *  are met:
 *
 *  1. Redistributions of source code must retain the above copyright
 *     notice, this list of conditions and the following disclaimer.
 *  2. Redistributions in binary form must reproduce the above copyright
 *     notice, this list of conditions and the following disclaimer in the
 *     documentation and/or other materials provided with the distribution.
 *  3. The name of the author may not be used to endorse or promote products
 *     derived from this software without specific prior written permission.
 *
 *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */


package org.antlr.v4.parse;


import org.antlr.runtime.Token;
import org.antlr.v4.Tool;
import org.antlr.v4.codegen.CodeGenerator;
import org.antlr.v4.tool.ErrorType;
import org.antlr.v4.tool.Grammar;
import org.antlr.v4.tool.ast.GrammarAST;


import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;


/** */
public class TokenVocabParser {
  protected final Grammar g;


  public TokenVocabParser(Grammar g) {
    this.g = g;
  }


  /** Load a vocab file {@code <vocabName>.tokens} and return mapping. */
  public Map<String,Integer> load() {
    Map<String,Integer> tokens = new LinkedHashMap<String,Integer>();
    int maxTokenType = -1;
    File fullFile = getImportedVocabFile();
    FileInputStream fis = null;
    BufferedReader br = null;
    Tool tool = g.tool;
    String vocabName = g.getOptionString("tokenVocab");
    try {
      Pattern tokenDefPattern = Pattern.compile("([^\n]+?)[ \\t]*?=[ \\t]*?([0-9]+)");
      fis = new FileInputStream(fullFile);
      InputStreamReader isr;
      if (tool.grammarEncoding != null) {
        isr = new InputStreamReader(fis, tool.grammarEncoding);
      }
      else {
        isr = new InputStreamReader(fis);
      }


      br = new BufferedReader(isr);
      String tokenDef = br.readLine();
      int lineNum = 1;
      while ( tokenDef!=null ) {
        Matcher matcher = tokenDefPattern.matcher(tokenDef);
        if ( matcher.find() ) {
          String tokenID = matcher.group(1);
          String tokenTypeS = matcher.group(2);
          int tokenType;
          try {
            tokenType = Integer.valueOf(tokenTypeS);
          }
          catch (NumberFormatException nfe) {
            tool.errMgr.toolError(ErrorType.TOKENS_FILE_SYNTAX_ERROR,
                        vocabName + CodeGenerator.VOCAB_FILE_EXTENSION,
                        " bad token type: "+tokenTypeS,
                        lineNum);
            tokenType = Token.INVALID_TOKEN_TYPE;
          }
          tool.log("grammar", "import "+tokenID+"="+tokenType);
          tokens.put(tokenID, tokenType);
          maxTokenType = Math.max(maxTokenType,tokenType);
          lineNum++;
        }
        else {
          if ( tokenDef.length()>0 ) { // ignore blank lines
            tool.errMgr.toolError(ErrorType.TOKENS_FILE_SYNTAX_ERROR,
                        vocabName + CodeGenerator.VOCAB_FILE_EXTENSION,
                        " bad token def: " + tokenDef,
                        lineNum);
          }
        }
        tokenDef = br.readLine();
      }
    }
    catch (FileNotFoundException fnfe) {
      GrammarAST inTree = g.ast.getOptionAST("tokenVocab");
      String inTreeValue = inTree.getToken().getText();
      if ( vocabName.equals(inTreeValue) ) {
        tool.errMgr.grammarError(ErrorType.CANNOT_FIND_TOKENS_FILE_REFD_IN_GRAMMAR,
                     g.fileName,
                     inTree.getToken(),
                     fullFile);
      }
      else { // must be from -D option on cmd-line not token in tree
        tool.errMgr.toolError(ErrorType.CANNOT_FIND_TOKENS_FILE_GIVEN_ON_CMDLINE,
                    fullFile,
                    g.name);
      }
    }
    catch (Exception e) {
      tool.errMgr.toolError(ErrorType.ERROR_READING_TOKENS_FILE,
                  e,
                  fullFile,
                  e.getMessage());
    }
    finally {
      try {
        if ( br!=null ) br.close();
      }
      catch (IOException ioe) {
        tool.errMgr.toolError(ErrorType.ERROR_READING_TOKENS_FILE,
                    ioe,
                    fullFile,
                    ioe.getMessage());
      }
    }
    return tokens;
  }


  /** Return a File descriptor for vocab file.  Look in library or
   *  in -o output path.  antlr -o foo T.g4 U.g4 where U needs T.tokens
   *  won't work unless we look in foo too. If we do not find the
   *  file in the lib directory then must assume that the .tokens file
   *  is going to be generated as part of this build and we have defined
   *  .tokens files so that they ALWAYS are generated in the base output
   *  directory, which means the current directory for the command line tool if there
   *  was no output directory specified.
   */
  public File getImportedVocabFile() {
    String vocabName = g.getOptionString("tokenVocab");
    File f = new File(g.tool.libDirectory,
              File.separator +
              vocabName +
              CodeGenerator.VOCAB_FILE_EXTENSION);
    if (f.exists()) {
      return f;
    }


    // We did not find the vocab file in the lib directory, so we need
    // to look for it in the output directory which is where .tokens
    // files are generated (in the base, not relative to the input
    // location.)
    f = new File(g.tool.outputDirectory, vocabName + CodeGenerator.VOCAB_FILE_EXTENSION);
    return f;
  }
}
Source Code of org.antlr.v4.parse.TokenVocabParser

Related Classes of org.antlr.v4.parse.TokenVocabParser