package org.typeexit.kettle.plugin.steps.ruby;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.SortedSet;
import java.util.TreeSet;
import org.apache.commons.lang.ArrayUtils;
import org.eclipse.swt.SWT;
import org.eclipse.swt.custom.StyleRange;
import org.eclipse.swt.custom.StyledText;
import org.eclipse.swt.graphics.Color;
import org.eclipse.swt.graphics.RGB;
import org.eclipse.swt.widgets.Display;
import org.jruby.CompatVersion;
import org.jruby.common.RubyWarnings;
import org.jruby.lexer.yacc.ByteArrayLexerSource;
import org.jruby.lexer.yacc.LexerSource;
import org.jruby.lexer.yacc.RubyYaccLexer;
import org.jruby.lexer.yacc.SyntaxException;
import org.jruby.lexer.yacc.Token;
import org.jruby.parser.ParserConfiguration;
import org.jruby.parser.ParserSupport;
import org.jruby.parser.RubyParserResult;
import org.jruby.parser.Tokens;
import org.jruby.Ruby;
import org.pentaho.di.ui.core.widget.StyledTextComp;
import org.typeexit.kettle.plugin.steps.ruby.RubyStepMeta.RubyVersion;
public class RubyStepSyntaxHighlighter {
RubyYaccLexer lexer;
ParserSupport parserSupport;
Color[] colors;
final int TOKEN_COMMENT = -100;
String[] STANDARD_GLOBAL_FUNCTIONS = {"abort", "autoload", "autoload?", "binding", "block_given?", "callcc", "caller", "chomp", "chomp!", "chop",
"chop!", "evel", "exec", "exit", "exit!", "fail", "fork", "format", "getc", "gets", "gsub", "gsub!", "iterator?", "load", "open", "p", "print", "printf", "putc", "puts", "rand",
"readline", "readlines", "scan", "select", "sleep", "split", "sprintf", "srand", "sub", "sub!", "syscall", "system", "test", "trap", "warn"
};
String[] STANDARD_METHODS = { "allocate", "clone", "display", "dup", "enum_for", "eql?", "equal?", "extend", "freeze", "frozen?", "hash", "id", "inherited", "inspect", "instance_of?", "is_a?",
"kind_of?", "method", "methods", "new", "nil?", "object_id", "respond_to?", "send", "superclass", "taint", "tainted?", "to_a", "to_enum", "to_s", "untaint"
};
String[] PSEUDO_KEYWORDS = { "at_exit", "attr", "attr_accessor", "attr_reader", "attr_writer", "include", "lambda", "load", "proc", "loop", "private", "protected", "public", "raise", "catch",
"java_import", "require", "import", "include_package"
};
SortedSet<String> GLOBAL_FUNCTIONS_SET = new TreeSet<String>(Arrays.asList(STANDARD_GLOBAL_FUNCTIONS));
SortedSet<String> STANDARD_METHODS_SET = new TreeSet<String>(Arrays.asList(STANDARD_METHODS));
SortedSet<String> PSEUDO_KEYWORDS_SET = new TreeSet<String>(Arrays.asList(PSEUDO_KEYWORDS));
final int COLOR_BLACK = 0;
final int COLOR_GREENISH = 1;
final int COLOR_BLUE = 2;
final int COLOR_ORANGE = 4;
final int COLOR_RED = 5;
final int COLOR_GREEN = 6;
final int COLOR_GRAY = 7;
final int STYLE_DEFAULT = 0;
final int STYLE_STRING = 1;
final int STYLE_SYMBOL = 2;
final int STYLE_KEYWORD = 3;
final int STYLE_GLOBAL_FUNCTION = 4;
final int STYLE_STANDARD_METHOD = 5;
final int STYLE_LITERAL_BOUNDARY = 6;
final int STYLE_COMMENT = 7;
final int STYLE_CONSTANT = 8;
final int STYLE_VARIABLE = 9;
StyleRange[] styles;
public RubyStepSyntaxHighlighter() {
// -- the colors to use --
Display display = Display.getDefault();
colors = new Color[] {
new Color(display, new RGB(0, 0, 0)), // black
new Color(display, new RGB(63, 127, 95)), // Greenish
new Color(display, new RGB(0, 0, 192)), // Blue
new Color(display, new RGB(127, 0, 85)), // -- not used --
new Color(display, new RGB(255, 102, 0)), // Orange
new Color(display, new RGB(225, 0, 0)), // Red
new Color(display, new RGB(0, 128, 0)), // Green
new Color(display, new RGB(128, 128, 128)) // Gray
};
styles = new StyleRange[] {
new StyleRange(0, 0, null, null, SWT.NORMAL),
new StyleRange(0, 0, colors[COLOR_RED], null, SWT.NORMAL),
new StyleRange(0, 0, colors[COLOR_ORANGE], null, SWT.NORMAL),
new StyleRange(0, 0, colors[COLOR_BLUE], null, SWT.BOLD),
new StyleRange(0, 0, colors[COLOR_GREEN], null, SWT.NORMAL),
new StyleRange(0, 0, colors[COLOR_GREEN], null, SWT.NORMAL),
new StyleRange(0, 0, colors[COLOR_GRAY], null, SWT.BOLD),
new StyleRange(0, 0, colors[COLOR_GREENISH], null, SWT.ITALIC),
new StyleRange(0, 0, colors[COLOR_GRAY], null, SWT.BOLD),
new StyleRange(0, 0, colors[COLOR_GRAY], null, SWT.NORMAL)
};
// -- lexer for finding language parts --
lexer = new RubyYaccLexer();
ParserSupport parserSupport = new ParserSupport();
RubyStepMeta meta = new RubyStepMeta();
Ruby runtime = RubyStepFactory.createScriptingContainer(true,meta.getRubyVersion()).getProvider().getRuntime();
ParserConfiguration parserConfig = new ParserConfiguration(runtime, 0, true, CompatVersion.BOTH);
parserSupport.setConfiguration(parserConfig);
parserSupport.setResult(new RubyParserResult());
parserSupport.setWarnings(new RubyWarnings(null));
parserSupport.initTopLocalVariables();
lexer.setEncoding(RubyYaccLexer.UTF8_ENCODING);
lexer.setParserSupport(parserSupport);
lexer.setState(RubyYaccLexer.LexState.EXPR_BEG);
}
private StyleRange tokenToStyleRange(int t, Object value, int prevt) {
// determine keyword style up front
if (t >= Tokens.kCLASS && t <= Tokens.kDO_LAMBDA) {
return styles[STYLE_KEYWORD];
}
switch (t) {
case TOKEN_COMMENT:
return styles[STYLE_COMMENT];
case Tokens.tSTRING_BEG:
case Tokens.tSTRING_CONTENT:
case Tokens.tSTRING_END:
case Tokens.tSTRING_DBEG:
case Tokens.tSTRING_DVAR:
return styles[STYLE_STRING];
case Tokens.tCONSTANT:
return styles[STYLE_CONSTANT];
case Tokens.tGVAR:
case Tokens.tIVAR:
return styles[STYLE_VARIABLE];
case Tokens.tREGEXP_BEG:
case Tokens.tREGEXP_END:
case Tokens.tPIPE:
return styles[STYLE_LITERAL_BOUNDARY];
case Tokens.tSYMBEG:
return styles[STYLE_SYMBOL];
case Tokens.tIDENTIFIER:
if (prevt == Tokens.tSYMBEG) {
return styles[STYLE_SYMBOL];
}
// fall through
case Tokens.tFID:
if (value instanceof Token && PSEUDO_KEYWORDS_SET.contains(((Token) value).getValue().toString())) {
return styles[STYLE_KEYWORD];
}
if (value instanceof Token && STANDARD_METHODS_SET.contains(((Token) value).getValue().toString())) {
return styles[STYLE_STANDARD_METHOD];
}
if (value instanceof Token && GLOBAL_FUNCTIONS_SET.contains(((Token) value).getValue().toString())) {
return styles[STYLE_GLOBAL_FUNCTION];
}
default:
return styles[STYLE_DEFAULT];
}
}
public void highlight(String title, StyledTextComp wText) {
// set up lexer process
String script = wText.getText();
StyledText canvas = wText.getStyledText();
byte[] utf8Script = null;
int[] encodingBytes = null;
try {
utf8Script = script.getBytes("UTF-8");
encodingBytes = new int[utf8Script.length+1];
int runner = 0;
for (int i = 0; i < utf8Script.length; i++) {
runner += (utf8Script[i] < 0 && -((int)utf8Script[i])+128 > 192)?1:0;
encodingBytes[i] = runner;
}
encodingBytes[encodingBytes.length-1] = runner;
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
return;
}
List<String> lines = new ArrayList<String>(canvas.getLineCount());
LexerSource lexerSource = new ByteArrayLexerSource(title, utf8Script, lines, 0, true);
lexer.reset();
lexer.setSource(lexerSource);
lexer.setState(RubyYaccLexer.LexState.EXPR_BEG);
// remember bounds of current token
int leftTokenBorder = 0;
int rightTokenBorder = 0;
int t = 0;
int prevt = 0;
int lastCommentEnd = 0;
ArrayList<StyleRange> ranges = new ArrayList<StyleRange>(200);
ArrayList<Integer> intRanges = new ArrayList<Integer>(400);
try {
boolean keepParsing = true;
while (keepParsing) {
/* take care of comments, which are stripped out by the lexer */
int[] upcomingComment = null;
while ((rightTokenBorder >= lastCommentEnd || rightTokenBorder == 0 ) && (upcomingComment = getUpcomingCommentPos(utf8Script, rightTokenBorder)) != null){
leftTokenBorder = upcomingComment[0];
rightTokenBorder = leftTokenBorder + upcomingComment[1];
lastCommentEnd = rightTokenBorder;
//System.out.println("Found comment -> [" + leftTokenBorder + "," + rightTokenBorder + "]");
ranges.add(tokenToStyleRange(TOKEN_COMMENT, null, prevt));
int left = leftTokenBorder - encodingBytes[leftTokenBorder];
int right = rightTokenBorder-encodingBytes[rightTokenBorder]- left;
intRanges.add(left);
intRanges.add(right);
}
/* read language syntax */
int oldOffset = lexerSource.getOffset();
keepParsing = lexer.advance();
prevt = t;
t = lexer.token();
Object v = lexer.value();
leftTokenBorder = oldOffset;
if (leftTokenBorder < lastCommentEnd && lexerSource.getOffset() > lastCommentEnd){
leftTokenBorder = lastCommentEnd;
}
rightTokenBorder = lexerSource.getOffset();
//System.out.println("Found token " + t + " -> " + lexer.value() + " [" + leftTokenBorder + "," + rightTokenBorder + "]");
// skip whitespace and error formatting
if (t != '\n' && t != -1){
ranges.add(tokenToStyleRange(t, v, prevt));
int left = leftTokenBorder - encodingBytes[leftTokenBorder];
int right = rightTokenBorder-encodingBytes[rightTokenBorder]- (leftTokenBorder - encodingBytes[leftTokenBorder]);
intRanges.add(left);
intRanges.add(right);
}
}
// don't mind anything that might go wrong during parsing
} catch (SyntaxException e) {
// apply the latest style to the rest of the file in case there is a syntax error
if (ranges.size() > 0) {
ranges.remove(ranges.size() - 1);
intRanges.remove(intRanges.size()-1);
intRanges.remove(intRanges.size()-1);
}
ranges.add(tokenToStyleRange(t, null, prevt));
int left = leftTokenBorder - encodingBytes[leftTokenBorder];
intRanges.add(left);
intRanges.add(wText.getText().length() - left);
} catch (Exception e) {
// the lexer will sometimes throw a non-syntax exception when confronted with malformed input
//e.printStackTrace();
}
// don't mind swt errors in case some unforseen input brought the style ranges out of order
try {
canvas.setStyleRanges(ArrayUtils.toPrimitive(intRanges.toArray(new Integer[0])), ranges.toArray(new StyleRange[0]));
}
catch (Exception e){
//e.printStackTrace();
}
}
// returns position and length pair of a comment that starts at this position (forwarding through whitespace)
// return null if there's no comment coming up
private int[] getUpcomingCommentPos(byte[] utf8Script, int pos) {
// if we're in the middle of a string or regex, there's no comments
if (lexer.getStrTerm() != null)
return null;
// looking for next comment while ignoring whitespace
boolean searchingComment = true;
boolean isComment = false;
int idx = pos;
do {
if (idx >= utf8Script.length) {
searchingComment = false;
break;
}
switch (utf8Script[idx]) {
case '\t':
case ' ':
case '\n':
case '\r':
idx++;
break;
case '#':
isComment = true;
searchingComment = false;
break;
default:
searchingComment = false;
}
} while (searchingComment);
if (isComment) {
// now to determine it's length, just scan up to \n or EOF
int end = idx;
boolean foundEnd = false;
do{
end += 1;
if (end >= utf8Script.length){
foundEnd = true;
break;
}
switch(utf8Script[end]){
case '\n':
foundEnd = true;
}
}while(!foundEnd);
return new int[] {idx, end-idx};
} else {
return null;
}
}
public void newRubyVersionSelected(RubyVersion rubyVersion) {
// consider setting explicit compat level on parser configuration
}
}