Source Code of org.typeexit.kettle.plugin.steps.ruby.RubyStepSyntaxHighlighter

package org.typeexit.kettle.plugin.steps.ruby;


import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.SortedSet;
import java.util.TreeSet;


import org.apache.commons.lang.ArrayUtils;
import org.eclipse.swt.SWT;
import org.eclipse.swt.custom.StyleRange;
import org.eclipse.swt.custom.StyledText;
import org.eclipse.swt.graphics.Color;
import org.eclipse.swt.graphics.RGB;
import org.eclipse.swt.widgets.Display;
import org.jruby.CompatVersion;
import org.jruby.common.RubyWarnings;
import org.jruby.lexer.yacc.ByteArrayLexerSource;
import org.jruby.lexer.yacc.LexerSource;
import org.jruby.lexer.yacc.RubyYaccLexer;
import org.jruby.lexer.yacc.SyntaxException;
import org.jruby.lexer.yacc.Token;
import org.jruby.parser.ParserConfiguration;
import org.jruby.parser.ParserSupport;
import org.jruby.parser.RubyParserResult;
import org.jruby.parser.Tokens;
import org.jruby.Ruby;
import org.pentaho.di.ui.core.widget.StyledTextComp;
import org.typeexit.kettle.plugin.steps.ruby.RubyStepMeta.RubyVersion;


public class RubyStepSyntaxHighlighter {


  RubyYaccLexer lexer;
  ParserSupport parserSupport;
  Color[] colors;
  final int TOKEN_COMMENT = -100;


  String[] STANDARD_GLOBAL_FUNCTIONS = {"abort", "autoload", "autoload?", "binding", "block_given?", "callcc", "caller", "chomp", "chomp!", "chop",
      "chop!", "evel", "exec", "exit", "exit!", "fail", "fork", "format", "getc", "gets", "gsub", "gsub!", "iterator?", "load", "open", "p", "print", "printf", "putc", "puts", "rand",
      "readline", "readlines", "scan", "select", "sleep", "split", "sprintf", "srand", "sub", "sub!", "syscall", "system", "test", "trap", "warn"


  };


  String[] STANDARD_METHODS = { "allocate", "clone", "display", "dup", "enum_for", "eql?", "equal?", "extend", "freeze", "frozen?", "hash", "id", "inherited", "inspect", "instance_of?", "is_a?",
      "kind_of?", "method", "methods", "new", "nil?", "object_id", "respond_to?", "send", "superclass", "taint", "tainted?", "to_a", "to_enum", "to_s", "untaint"


  };


  String[] PSEUDO_KEYWORDS = { "at_exit", "attr", "attr_accessor", "attr_reader", "attr_writer", "include", "lambda", "load", "proc", "loop", "private", "protected", "public", "raise", "catch",
      "java_import", "require", "import", "include_package"


  };


  SortedSet<String> GLOBAL_FUNCTIONS_SET = new TreeSet<String>(Arrays.asList(STANDARD_GLOBAL_FUNCTIONS));
  SortedSet<String> STANDARD_METHODS_SET = new TreeSet<String>(Arrays.asList(STANDARD_METHODS));
  SortedSet<String> PSEUDO_KEYWORDS_SET = new TreeSet<String>(Arrays.asList(PSEUDO_KEYWORDS));
  
  final int COLOR_BLACK = 0;
  final int COLOR_GREENISH = 1;
  final int COLOR_BLUE = 2;
  final int COLOR_ORANGE = 4;
  final int COLOR_RED = 5;
  final int COLOR_GREEN = 6;
  final int COLOR_GRAY = 7;
  
  final int STYLE_DEFAULT = 0;
  final int STYLE_STRING = 1;
  final int STYLE_SYMBOL = 2;
  final int STYLE_KEYWORD = 3;
  final int STYLE_GLOBAL_FUNCTION = 4;
  final int STYLE_STANDARD_METHOD = 5;
  final int STYLE_LITERAL_BOUNDARY = 6;
  final int STYLE_COMMENT = 7;
  final int STYLE_CONSTANT = 8;
  final int STYLE_VARIABLE = 9;
  
  
  StyleRange[] styles;
  


  public RubyStepSyntaxHighlighter() {


    // -- the colors to use --
    Display display = Display.getDefault();
    colors = new Color[] {
        new Color(display, new RGB(0, 0, 0)),     // black
        new Color(display, new RGB(63, 127, 95)),   // Greenish 
        new Color(display, new RGB(0, 0, 192)),   // Blue
        new Color(display, new RGB(127, 0, 85)),   // -- not used --
        new Color(display, new RGB(255, 102, 0)),   // Orange  
        new Color(display, new RGB(225, 0, 0)),   // Red
        new Color(display, new RGB(0, 128, 0)),   // Green
        new Color(display, new RGB(128, 128, 128))   // Gray
    };
    
    styles = new StyleRange[] {
      new StyleRange(0, 0, null, null, SWT.NORMAL),
      new StyleRange(0, 0, colors[COLOR_RED], null, SWT.NORMAL),
      new StyleRange(0, 0, colors[COLOR_ORANGE], null, SWT.NORMAL),
      new StyleRange(0, 0, colors[COLOR_BLUE], null, SWT.BOLD),
      new StyleRange(0, 0, colors[COLOR_GREEN], null, SWT.NORMAL),
      new StyleRange(0, 0, colors[COLOR_GREEN], null, SWT.NORMAL),
      new StyleRange(0, 0, colors[COLOR_GRAY], null, SWT.BOLD),
      new StyleRange(0, 0, colors[COLOR_GREENISH], null, SWT.ITALIC),
      new StyleRange(0, 0, colors[COLOR_GRAY], null, SWT.BOLD),
      new StyleRange(0, 0, colors[COLOR_GRAY], null, SWT.NORMAL)
    };


    // -- lexer for finding language parts --
    lexer = new RubyYaccLexer();


    ParserSupport parserSupport = new ParserSupport();
    RubyStepMeta meta = new RubyStepMeta();
    Ruby runtime = RubyStepFactory.createScriptingContainer(true,meta.getRubyVersion()).getProvider().getRuntime();
    ParserConfiguration parserConfig = new ParserConfiguration(runtime, 0, true, CompatVersion.BOTH);
    parserSupport.setConfiguration(parserConfig);
    parserSupport.setResult(new RubyParserResult());
    parserSupport.setWarnings(new RubyWarnings(null));
    parserSupport.initTopLocalVariables();


    lexer.setEncoding(RubyYaccLexer.UTF8_ENCODING);
    lexer.setParserSupport(parserSupport);
    lexer.setState(RubyYaccLexer.LexState.EXPR_BEG);


  }


  private StyleRange tokenToStyleRange(int t, Object value, int prevt) {


    // determine keyword style up front
    if (t >= Tokens.kCLASS && t <= Tokens.kDO_LAMBDA) {
      return styles[STYLE_KEYWORD];
    }


    switch (t) {
    case TOKEN_COMMENT:
      return styles[STYLE_COMMENT];
    case Tokens.tSTRING_BEG:
    case Tokens.tSTRING_CONTENT:
    case Tokens.tSTRING_END:
    case Tokens.tSTRING_DBEG:
    case Tokens.tSTRING_DVAR:
      return styles[STYLE_STRING];
    case Tokens.tCONSTANT:
      return styles[STYLE_CONSTANT];
    case Tokens.tGVAR:
    case Tokens.tIVAR:
      return styles[STYLE_VARIABLE];
    case Tokens.tREGEXP_BEG:
    case Tokens.tREGEXP_END:
    case Tokens.tPIPE:
      return styles[STYLE_LITERAL_BOUNDARY];
    case Tokens.tSYMBEG:
      return styles[STYLE_SYMBOL];
    case Tokens.tIDENTIFIER:
      if (prevt == Tokens.tSYMBEG) {
        return styles[STYLE_SYMBOL];
      }
      // fall through
    case Tokens.tFID:


      if (value instanceof Token && PSEUDO_KEYWORDS_SET.contains(((Token) value).getValue().toString())) {
        return styles[STYLE_KEYWORD];
      }


      if (value instanceof Token && STANDARD_METHODS_SET.contains(((Token) value).getValue().toString())) {
        return styles[STYLE_STANDARD_METHOD];
      }


      if (value instanceof Token && GLOBAL_FUNCTIONS_SET.contains(((Token) value).getValue().toString())) {
        return styles[STYLE_GLOBAL_FUNCTION];
      }


    default:
      return styles[STYLE_DEFAULT];
    }


  }


  public void highlight(String title, StyledTextComp wText) {


    // set up lexer process
    String script = wText.getText();
    StyledText canvas = wText.getStyledText();
    byte[] utf8Script = null;
    int[] encodingBytes = null;


    try {
      utf8Script = script.getBytes("UTF-8");
      encodingBytes = new int[utf8Script.length+1];
      int runner = 0;
      for (int i = 0; i < utf8Script.length; i++) {
        runner += (utf8Script[i] < 0 && -((int)utf8Script[i])+128 > 192)?1:0;
        encodingBytes[i] = runner;
      }
      encodingBytes[encodingBytes.length-1] = runner;
    } catch (UnsupportedEncodingException e) {
      e.printStackTrace();
      return;
    }


    List<String> lines = new ArrayList<String>(canvas.getLineCount());


    LexerSource lexerSource = new ByteArrayLexerSource(title, utf8Script, lines, 0, true);


    lexer.reset();
    lexer.setSource(lexerSource);
    lexer.setState(RubyYaccLexer.LexState.EXPR_BEG);


    // remember bounds of current token
    int leftTokenBorder = 0;
    int rightTokenBorder = 0;
    int t = 0;
    int prevt = 0;
    int lastCommentEnd = 0;


    ArrayList<StyleRange> ranges = new ArrayList<StyleRange>(200);
    ArrayList<Integer> intRanges = new ArrayList<Integer>(400);


    try {
      
      boolean keepParsing = true;
      
      while (keepParsing) {
        
        /* take care of comments, which are stripped out by the lexer */
        int[] upcomingComment = null;
        while ((rightTokenBorder >= lastCommentEnd || rightTokenBorder == 0 ) && (upcomingComment = getUpcomingCommentPos(utf8Script, rightTokenBorder)) != null){
          leftTokenBorder = upcomingComment[0];
          rightTokenBorder = leftTokenBorder + upcomingComment[1];
          lastCommentEnd = rightTokenBorder;
          //System.out.println("Found comment -> [" + leftTokenBorder + "," + rightTokenBorder + "]");
          ranges.add(tokenToStyleRange(TOKEN_COMMENT, null, prevt));


          int left = leftTokenBorder - encodingBytes[leftTokenBorder];
          int right = rightTokenBorder-encodingBytes[rightTokenBorder]- left;
          
          intRanges.add(left);
          intRanges.add(right);
        }
        
        /* read language syntax */
        int oldOffset = lexerSource.getOffset();
        keepParsing = lexer.advance();
        prevt = t;
        t = lexer.token();
        Object v = lexer.value();


        leftTokenBorder = oldOffset;
        if (leftTokenBorder < lastCommentEnd && lexerSource.getOffset() > lastCommentEnd){
          leftTokenBorder = lastCommentEnd;
        }
        rightTokenBorder = lexerSource.getOffset();        
        
        //System.out.println("Found token " + t + " -> " + lexer.value() + " [" + leftTokenBorder + "," + rightTokenBorder + "]");


        // skip whitespace and error formatting
        if (t != '\n' && t != -1){ 
          ranges.add(tokenToStyleRange(t, v, prevt));
          int left = leftTokenBorder - encodingBytes[leftTokenBorder];
          int right = rightTokenBorder-encodingBytes[rightTokenBorder]- (leftTokenBorder - encodingBytes[leftTokenBorder]);
          intRanges.add(left);
          intRanges.add(right); 
        }
      
      }


      // don't mind anything that might go wrong during parsing
    } catch (SyntaxException e) {
      // apply the latest style to the rest of the file in case there is a syntax error
      if (ranges.size() > 0) {
        ranges.remove(ranges.size() - 1);
        intRanges.remove(intRanges.size()-1);
        intRanges.remove(intRanges.size()-1);
      }
      ranges.add(tokenToStyleRange(t, null, prevt));
      int left = leftTokenBorder - encodingBytes[leftTokenBorder];
      intRanges.add(left);
      intRanges.add(wText.getText().length() - left);


    } catch (Exception e) {
      // the lexer will sometimes throw a non-syntax exception when confronted with malformed input
      //e.printStackTrace();
    }
    
    // don't mind swt errors in case some unforseen input brought the style ranges out of order
    try {
      canvas.setStyleRanges(ArrayUtils.toPrimitive(intRanges.toArray(new Integer[0])), ranges.toArray(new StyleRange[0]));
    }
    catch (Exception e){
      //e.printStackTrace();
    }
    


  }


  // returns position and length pair of a comment that starts at this position (forwarding through whitespace)
  // return null if there's no comment coming up
  private int[] getUpcomingCommentPos(byte[] utf8Script, int pos) {


    // if we're in the middle of a string or regex, there's no comments 
    if (lexer.getStrTerm() != null)
      return null;


    // looking for next comment while ignoring whitespace
    boolean searchingComment = true;
    boolean isComment = false;
    int idx = pos;
    do {
      if (idx >= utf8Script.length) {
        searchingComment = false;
        break;
      }
      switch (utf8Script[idx]) {
      case '\t':
      case ' ':
      case '\n':
      case '\r':
        idx++;
        break;
      case '#':
        isComment = true;
        searchingComment = false;
        break;
      default:
        searchingComment = false;
      }
    } while (searchingComment);


    if (isComment) {
      // now to determine it's length, just scan up to \n or EOF
      int end = idx;
      boolean foundEnd = false;
      do{
        end += 1;
        if (end >= utf8Script.length){
          foundEnd = true;
          break;
        }
        switch(utf8Script[end]){
        case '\n':
          foundEnd = true;
        }
      }while(!foundEnd);


      return new int[] {idx, end-idx};
    } else {
      return null;
    }


  }


  public void newRubyVersionSelected(RubyVersion rubyVersion) {
    // consider setting explicit compat level on parser configuration
  }


}
Source Code of org.typeexit.kettle.plugin.steps.ruby.RubyStepSyntaxHighlighter

Related Classes of org.typeexit.kettle.plugin.steps.ruby.RubyStepSyntaxHighlighter