/*
[The "BSD license"]
Copyright (c) 2010 Kyle Yetter
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The name of the author may not be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.antlr.codegen;
import org.antlr.Tool;
import org.antlr.tool.Grammar;
import org.stringtemplate.v4.AttributeRenderer;
import org.stringtemplate.v4.ST;
import org.stringtemplate.v4.STGroup;
import java.io.IOException;
import java.util.*;
public class RubyTarget extends Target
{
/** A set of ruby keywords which are used to escape labels and method names
* which will cause parse errors in the ruby source
*/
public static final Set<String> rubyKeywords =
new HashSet<String>() {
{
add( "alias" ); add( "END" ); add( "retry" );
add( "and" ); add( "ensure" ); add( "return" );
add( "BEGIN" ); add( "false" ); add( "self" );
add( "begin" ); add( "for" ); add( "super" );
add( "break" ); add( "if" ); add( "then" );
add( "case" ); add( "in" ); add( "true" );
add( "class" ); add( "module" ); add( "undef" );
add( "def" ); add( "next" ); add( "unless" );
add( "defined?" ); add( "nil" ); add( "until" );
add( "do" ); add( "not" ); add( "when" );
add( "else" ); add( "or" ); add( "while" );
add( "elsif" ); add( "redo" ); add( "yield" );
add( "end" ); add( "rescue" );
}
};
public static Map<String, Map<String, Object>> sharedActionBlocks = new HashMap<String, Map<String, Object>>();
public class RubyRenderer implements AttributeRenderer
{
protected String[] rubyCharValueEscape = new String[256];
public RubyRenderer() {
for ( int i = 0; i < 16; i++ ) {
rubyCharValueEscape[ i ] = "\\x0" + Integer.toHexString( i );
}
for ( int i = 16; i < 32; i++ ) {
rubyCharValueEscape[ i ] = "\\x" + Integer.toHexString( i );
}
for ( char i = 32; i < 127; i++ ) {
rubyCharValueEscape[ i ] = Character.toString( i );
}
for ( int i = 127; i < 256; i++ ) {
rubyCharValueEscape[ i ] = "\\x" + Integer.toHexString( i );
}
rubyCharValueEscape['\n'] = "\\n";
rubyCharValueEscape['\r'] = "\\r";
rubyCharValueEscape['\t'] = "\\t";
rubyCharValueEscape['\b'] = "\\b";
rubyCharValueEscape['\f'] = "\\f";
rubyCharValueEscape['\\'] = "\\\\";
rubyCharValueEscape['"'] = "\\\"";
}
@Override
public String toString( Object o, String formatName, Locale locale ) {
if ( formatName==null ) {
return o.toString();
}
String idString = o.toString();
if ( idString.length() == 0 ) return idString;
if ( formatName.equals( "snakecase" ) ) {
return snakecase( idString );
} else if ( formatName.equals( "camelcase" ) ) {
return camelcase( idString );
} else if ( formatName.equals( "subcamelcase" ) ) {
return subcamelcase( idString );
} else if ( formatName.equals( "constant" ) ) {
return constantcase( idString );
} else if ( formatName.equals( "platform" ) ) {
return platform( idString );
} else if ( formatName.equals( "lexerRule" ) ) {
return lexerRule( idString );
} else if ( formatName.equals( "constantPath" ) ) {
return constantPath( idString );
} else if ( formatName.equals( "rubyString" ) ) {
return rubyString( idString );
} else if ( formatName.equals( "label" ) ) {
return label( idString );
} else if ( formatName.equals( "symbol" ) ) {
return symbol( idString );
} else {
throw new IllegalArgumentException( "Unsupported format name" );
}
}
/** given an input string, which is presumed
* to contain a word, which may potentially be camelcased,
* and convert it to snake_case underscore style.
*
* algorithm --
* iterate through the string with a sliding window 3 chars wide
*
* example -- aGUIWhatNot
* c c+1 c+2 action
* a G << 'a' << '_' // a lower-upper word edge
* G U I << 'g'
* U I W << 'w'
* I W h << 'i' << '_' // the last character in an acronym run of uppers
* W h << 'w'
* ... and so on
*/
private String snakecase( String value ) {
StringBuilder output_buffer = new StringBuilder();
int l = value.length();
int cliff = l - 1;
char cur;
char next;
char peek;
if ( value.length() == 0 ) return value;
if ( l == 1 ) return value.toLowerCase();
for ( int i = 0; i < cliff; i++ ) {
cur = value.charAt( i );
next = value.charAt( i + 1 );
if ( Character.isLetter( cur ) ) {
output_buffer.append( Character.toLowerCase( cur ) );
if ( Character.isDigit( next ) || Character.isWhitespace( next ) ) {
output_buffer.append( '_' );
} else if ( Character.isLowerCase( cur ) && Character.isUpperCase( next ) ) {
// at camelcase word edge
output_buffer.append( '_' );
} else if ( ( i < cliff - 1 ) && Character.isUpperCase( cur ) && Character.isUpperCase( next ) ) {
// cur is part of an acronym
peek = value.charAt( i + 2 );
if ( Character.isLowerCase( peek ) ) {
/* if next is the start of word (indicated when peek is lowercase)
then the acronym must be completed by appending an underscore */
output_buffer.append( '_' );
}
}
} else if ( Character.isDigit( cur ) ) {
output_buffer.append( cur );
if ( Character.isLetter( next ) ) {
output_buffer.append( '_' );
}
} else if ( Character.isWhitespace( cur ) ) {
// do nothing
} else {
output_buffer.append( cur );
}
}
cur = value.charAt( cliff );
if ( ! Character.isWhitespace( cur ) ) {
output_buffer.append( Character.toLowerCase( cur ) );
}
return output_buffer.toString();
}
private String constantcase( String value ) {
return snakecase( value ).toUpperCase();
}
private String platform( String value ) {
return ( "__" + value + "__" );
}
private String symbol( String value ) {
if ( value.matches( "[a-zA-Z_]\\w*[\\?\\!\\=]?" ) ) {
return ( ":" + value );
} else {
return ( "%s(" + value + ")" );
}
}
private String lexerRule( String value ) {
// System.out.print( "lexerRule( \"" + value + "\") => " );
if ( value.equals( "Tokens" ) ) {
// System.out.println( "\"token!\"" );
return "token!";
} else {
// String result = snakecase( value ) + "!";
// System.out.println( "\"" + result + "\"" );
return ( snakecase( value ) + "!" );
}
}
private String constantPath( String value ) {
return value.replaceAll( "\\.", "::" );
}
private String rubyString( String value ) {
StringBuilder output_buffer = new StringBuilder();
int len = value.length();
output_buffer.append( '"' );
for ( int i = 0; i < len; i++ ) {
output_buffer.append( rubyCharValueEscape[ value.charAt( i ) ] );
}
output_buffer.append( '"' );
return output_buffer.toString();
}
private String camelcase( String value ) {
StringBuilder output_buffer = new StringBuilder();
int cliff = value.length();
char cur;
char next;
boolean at_edge = true;
if ( value.length() == 0 ) return value;
if ( cliff == 1 ) return value.toUpperCase();
for ( int i = 0; i < cliff; i++ ) {
cur = value.charAt( i );
if ( Character.isWhitespace( cur ) ) {
at_edge = true;
continue;
} else if ( cur == '_' ) {
at_edge = true;
continue;
} else if ( Character.isDigit( cur ) ) {
output_buffer.append( cur );
at_edge = true;
continue;
}
if ( at_edge ) {
output_buffer.append( Character.toUpperCase( cur ) );
if ( Character.isLetter( cur ) ) at_edge = false;
} else {
output_buffer.append( cur );
}
}
return output_buffer.toString();
}
private String label( String value ) {
if ( rubyKeywords.contains( value ) ) {
return platform( value );
} else if ( Character.isUpperCase( value.charAt( 0 ) ) &&
( !value.equals( "FILE" ) ) &&
( !value.equals( "LINE" ) ) ) {
return platform( value );
} else if ( value.equals( "FILE" ) ) {
return "_FILE_";
} else if ( value.equals( "LINE" ) ) {
return "_LINE_";
} else {
return value;
}
}
private String subcamelcase( String value ) {
value = camelcase( value );
if ( value.length() == 0 )
return value;
Character head = Character.toLowerCase( value.charAt( 0 ) );
String tail = value.substring( 1 );
return head.toString().concat( tail );
}
}
@Override
protected void genRecognizerFile(
Tool tool,
CodeGenerator generator,
Grammar grammar,
ST outputFileST
) throws IOException
{
/*
Below is an experimental attempt at providing a few named action blocks
that are printed in both lexer and parser files from combined grammars.
ANTLR appears to first generate a parser, then generate an independent lexer,
and then generate code from that. It keeps the combo/parser grammar object
and the lexer grammar object, as well as their respective code generator and
target instances, completely independent. So, while a bit hack-ish, this is
a solution that should work without having to modify Terrence Parr's
core tool code.
- sharedActionBlocks is a class variable containing a hash map
- if this method is called with a combo grammar, and the action map
in the grammar contains an entry for the named scope "all",
add an entry to sharedActionBlocks mapping the grammar name to
the "all" action map.
- if this method is called with an `implicit lexer'
(one that's extracted from a combo grammar), check to see if
there's an entry in sharedActionBlocks for the lexer's grammar name.
- if there is an action map entry, place it in the lexer's action map
- the recognizerFile template has code to place the
"all" actions appropriately
problems:
- This solution assumes that the parser will be generated
before the lexer. If that changes at some point, this will
not work.
- I have not investigated how this works with delegation yet
Kyle Yetter - March 25, 2010
*/
if ( grammar.type == Grammar.COMBINED ) {
Map<String, Map<String, Object>> actions = grammar.getActions();
if ( actions.containsKey( "all" ) ) {
sharedActionBlocks.put( grammar.name, actions.get( "all" ) );
}
} else if ( grammar.implicitLexer ) {
if ( sharedActionBlocks.containsKey( grammar.name ) ) {
Map<String, Map<String, Object>> actions = grammar.getActions();
actions.put( "all", sharedActionBlocks.get( grammar.name ) );
}
}
STGroup group = generator.getTemplates();
RubyRenderer renderer = new RubyRenderer();
try {
group.registerRenderer( Class.forName( "java.lang.String" ), renderer );
} catch ( ClassNotFoundException e ) {
// this shouldn't happen
System.err.println( "ClassNotFoundException: " + e.getMessage() );
e.printStackTrace( System.err );
}
String fileName =
generator.getRecognizerFileName( grammar.name, grammar.type );
generator.write( outputFileST, fileName );
}
@Override
public String getTargetCharLiteralFromANTLRCharLiteral(
CodeGenerator generator,
String literal
)
{
int code_point = 0;
literal = literal.substring( 1, literal.length() - 1 );
if ( literal.charAt( 0 ) == '\\' ) {
switch ( literal.charAt( 1 ) ) {
case '\\':
case '"':
case '\'':
code_point = literal.codePointAt( 1 );
break;
case 'n':
code_point = 10;
break;
case 'r':
code_point = 13;
break;
case 't':
code_point = 9;
break;
case 'b':
code_point = 8;
break;
case 'f':
code_point = 12;
break;
case 'u': // Assume unnnn
code_point = Integer.parseInt( literal.substring( 2 ), 16 );
break;
default:
System.out.println( "1: hey you didn't account for this: \"" + literal + "\"" );
break;
}
} else if ( literal.length() == 1 ) {
code_point = literal.codePointAt( 0 );
} else {
System.out.println( "2: hey you didn't account for this: \"" + literal + "\"" );
}
return ( "0x" + Integer.toHexString( code_point ) );
}
@Override
public int getMaxCharValue( CodeGenerator generator )
{
// Versions before 1.9 do not support unicode
return 0xFF;
}
@Override
public String getTokenTypeAsTargetLabel( CodeGenerator generator, int ttype )
{
String name = generator.grammar.getTokenDisplayName( ttype );
// If name is a literal, return the token type instead
if ( name.charAt( 0 )=='\'' ) {
return generator.grammar.computeTokenNameFromLiteral( ttype, name );
}
return name;
}
@Override
public boolean isValidActionScope( int grammarType, String scope ) {
if ( scope.equals( "all" ) ) {
return true;
}
if ( scope.equals( "token" ) ) {
return true;
}
if ( scope.equals( "module" ) ) {
return true;
}
if ( scope.equals( "overrides" ) ) {
return true;
}
switch ( grammarType ) {
case Grammar.LEXER:
if ( scope.equals( "lexer" ) ) {
return true;
}
break;
case Grammar.PARSER:
if ( scope.equals( "parser" ) ) {
return true;
}
break;
case Grammar.COMBINED:
if ( scope.equals( "parser" ) ) {
return true;
}
if ( scope.equals( "lexer" ) ) {
return true;
}
break;
case Grammar.TREE_PARSER:
if ( scope.equals( "treeparser" ) ) {
return true;
}
break;
}
return false;
}
@Override
public String encodeIntAsCharEscape( final int v ) {
final int intValue;
if ( v == 65535 ) {
intValue = -1;
} else {
intValue = v;
}
return String.valueOf( intValue );
}
}