Package lipstone.joshua.parser

Source Code of lipstone.joshua.parser.Tokenizer

package lipstone.joshua.parser;

import java.util.Stack;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import lipstone.joshua.parser.exceptions.SyntaxException;
import lipstone.joshua.parser.exceptions.UnbalancedParenthesesException;
import lipstone.joshua.parser.types.BigDec;
import lipstone.joshua.parser.util.ConsCell;
import lipstone.joshua.parser.util.ConsType;

public class Tokenizer {
  private static final Pattern whitespace = Pattern.compile("[\t ]"),
      numbers = Pattern.compile("(([0-9]+(\\\\.[0-9]*)?|\\\\.[0-9]+)(\\\\*?i)?|infinity)"),
      identifiers = Pattern.compile("(['a-zA-Z_$%#&:;<=>?~\\|][a-zA-Z_\\.$%&:;<=>?~_\\|]*|(!)+)"),
      strings = Pattern.compile("\"([^\"]|\\\\\")*\""),
      separators = Pattern.compile("(,|\n)"), operators = Pattern.compile("[\\Q+-*/%^\\E\\\\]");
 
  public static ConsCell tokenizeString(String input) throws SyntaxException, UnbalancedParenthesesException {
    ConsCell head = new ConsCell(), result = head;
    Stack<ConsCell> parenStack = new Stack<>();
    Stack<Integer> parens = new Stack<>();
    Matcher m;
    int current = 0;
    while (current < input.length()) {
      boolean found = false;
      //Parentheses
      if (!found && input.charAt(current) == '(') {
        parens.push(current);
        ConsCell descent = new ConsCell();
        parenStack.push(head.append(new ConsCell(descent, ConsType.CONS_CELL)));
        head = descent;
        current++;
        found = true;
      }
     
      if (!found && input.charAt(current) == ')') {
        if (parenStack.isEmpty())
          throw new UnbalancedParenthesesException(current, input, null);
        head = parenStack.pop();
        parens.pop();
        current++;
        found = true;
      }
     
      //Object
      if (!found && input.charAt(0) == '{') {
        int end = Parser.getEndIndex(input, current, "{", "}") + 1;
        head = head.append(new ConsCell(input.substring(current, end), ConsType.OBJECT));
        current += end;
        found = true;
      }
     
      //Operators
      m = operators.matcher(input);
      if (!found && m.find(current) && m.start() == current) { //An operator is a special identifier
        head = head.append(new ConsCell((input.charAt(current) == '\\') ? '/' : input.charAt(current), ConsType.OPERATOR));
        current++;
        found = true;
      }
     
      //Separator
      m = separators.matcher(input);
      if (!found && m.find(current) && m.start() == current) {
        head = head.append(new ConsCell(m.group(), ConsType.SEPARATOR));
        current += m.group().length();
        found = true;
      }
     
      //Whitespace
      m = whitespace.matcher(input);
      if (!found && m.find(current) && m.start() == current) {
        current += m.group().length();
        found = true;
      }
     
      //Numbers
      m = numbers.matcher(input);
      if (!found && m.find(current) && m.start() == current) {
        head = head.append(new ConsCell(new BigDec(m.group()), ConsType.NUMBER));
        current += m.group().length();
        found = true;
      }
     
      //Identifiers
      m = identifiers.matcher(input);
      if (!found && m.find(current) && m.start() == current) {
        head = head.append(new ConsCell(m.group(), ConsType.IDENTIFIER));
        current += m.group().length();
        found = true;
      }
     
      //Strings
      m = strings.matcher(input);
      if (!found && m.find(current) && m.start() == current) {
        head = head.append(new ConsCell(m.group().substring(1, m.group().length() - 1), ConsType.STRING));
        current += m.group().length();
        found = true;
      }
     
      //If there aren't any matches
      if (!found)
        throw new SyntaxException(input, null);
    }
    if (!parenStack.isEmpty())
      throw new UnbalancedParenthesesException(parens.peek(), input, null);
    return result;
  }
}
TOP

Related Classes of lipstone.joshua.parser.Tokenizer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.