Package ket.math.convert

Source Code of ket.math.convert.Tokenization

/*
* Copyright (C) 2011  Alasdair C. Hamilton
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program.  If not, see <http://www.gnu.org/licenses/>
*/

package ket.math.convert;

import java.util.regex.*;

import ket.math.*;
import ket.math.purpose.Text;
import ket.math.purpose.VariableToken;
import ket.math.purpose.Word;
import ketUI.Ket;

/**
* Process a given string representation of an equation and convert it into a
* series of tokens given a collection of the symbols that constitute a list of
* functions and variables.
*/
public class Tokenization {

  private static final String WORD_REGEX = "[a-zA-Z][a-zA-Z\\d]*";
  private static final Pattern WORD = Pattern.compile(WORD_REGEX);
  private static final Pattern QUOTES  = Pattern.compile("^\"[^\"]*\"");
  private static final Pattern DOUBLE = Pattern.compile("\\d+\\.(?:\\d*)?(?:[eE][+-]?\\d+)?");
  private static final Pattern INTEGER = Pattern.compile("\\d+(?:[eE][+-]?\\d+)?");
  private static final Pattern WHITE_SPACE = Pattern.compile("^\\s+");


  private Tokenization(String equationString, KnownArguments knownArguments) {
    // Not used.
  }

  /**
   * Convert an equation into a series of tokens which are stored inside a
   * branch instance.  Words, numbers, text and ... are converted to tokens
   * while whitespace is discarded.  
   * @param equationString A string representation of the equation to be parsed.
   * @param knownArguments The repository of all functions that have been defined at runtime or by the user.
   * @return A branch object, the arguments of which is a token.
   */
  public static Branch tokenize(String equationString, KnownArguments knownArguments) {
    StringBuffer equation = new StringBuffer(equationString + " ")
    Branch tokenList = new Branch();
    while (equation.length()>0) {
      char nextCharacter = equation.charAt(0);
      if (Character.isDigit(nextCharacter)) {
        tokenList.append(readNextNumber(equation)); // Note: Generalization: may return a branch!
      } else if (nextCharacter=='"') {
        String quotedString = readNext(QUOTES, equation);
        if (quotedString!=null) {
          String string = quotedString.substring(1, quotedString.length()-1);
          Text text = new Text(string);
          Token token = new Token(text);
          tokenList.append(token);
        } else {
          Token unknown = readNextSymbol(equation, knownArguments);
          tokenList.append(unknown);
        }
      } else if (Character.isWhitespace(nextCharacter)) {
        readNext(WHITE_SPACE, equation);
      } else if (Character.isLetter(nextCharacter)) {
        String string = readNext(WORD, equation);
        if (string==null) {
          Token unknown = readNextSymbol(equation, knownArguments);
          tokenList.append(unknown);
          continue;
        }
        Word word = new Word(string);
        Token variable = new VariableToken(word);
        tokenList.append(variable);
      } else {
        Token unknown = readNextSymbol(equation, knownArguments);
        tokenList.append(unknown);
      }
    }
    return tokenList;
  }
 
  /**
   * Read the next regular-expression-defined symbol in equation
   * and encapsulate it in a token object.
   * @param equation A string representation of an equation which starts with a symbol.
   * @return The token to be returned.
   */
  private static Token readNextSymbol(StringBuffer equation, KnownArguments knownArguments) {
    for (Symbol symbol : knownArguments.getOperandSymbolSet()) {
      // Compare the each known symbol against the given string.
      Token token = symbol.matchNextToken(equation);
      if (token!=null) {
        return token;
      }
    }

    // Add unknown symbol as a new token.
    Ket.out.println(" --- unknown character --- ");
    Ket.out.println("An unknown character was found while parsing and has been added as a separate block of text. ");
    String firstLetter = equation.substring(0, 1);
    Ket.out.println("first letter = '" + firstLetter + "'.");
    equation.deleteCharAt(0);
    Ket.out.println("           tokenize: #2 /"+firstLetter+"/");
    return new Token(new Text(firstLetter));
  }

  /**
   * Read the next regular expression.
   */
  private static String readNext(Pattern pattern, StringBuffer equation) {
    Matcher matcher = pattern.matcher(equation);
    if (matcher.find() && matcher.start()==0) {
      String content;
      if (matcher.groupCount()>0) {
        content = matcher.group(matcher.groupCount());
        equation.delete(0, matcher.end(matcher.groupCount()));
      } else {
        content = matcher.group();
        equation.delete(0, matcher.end());
      }
      return content;
    } else {
      return null;
    }
  }

  /**
   * Read a double or integer pattern from equation.
   * @param equation The string representation of an equation to be parsed which begins with a number.
   * @return the next number from the left of equation encapsulated as an integer token or double token.
   */
  private static Argument readNextNumber(StringBuffer equation) {
    String doubleString = readNext(DOUBLE, equation);
    if (doubleString!=null) {
      //- double doubleValue = Double.parseDouble(doubleString);
      String[] parts = doubleString.split("[eE]");
      switch (parts.length) {
        case 1: // mantissa
          double doubleValue = Double.parseDouble(parts[0]);
          return new Token(doubleValue);

        case 2: // mantissa E exponent
          double mantissa = Double.parseDouble(parts[0]);
          int exponent = Integer.parseInt(parts[1]);
          Branch power = new Branch(Function.POWER, new Token(10), new Token(exponent));
          return new Branch(Function.TIMES, new Token(mantissa), power);

        default:
          Ket.out.println(" !!! Bug: cannot interpret number format: !!! ");
          Ket.out.println(doubleString);
          return new VariableToken(new Text(doubleString));
      }
    } else {
      String integerString = readNext(INTEGER, equation);
      String[] parts = integerString.split("[eE]");
      switch (parts.length) {
        case 1: // mantissa
          int intValue = Integer.parseInt(parts[0]);
          return new Token(intValue);

        case 2: // mantissa E exponent
          int mantissa = Integer.parseInt(parts[0]);
          int exponent = Integer.parseInt(parts[1]);
          Branch power = new Branch(Function.POWER, new Token(10), new Token(exponent));
          return new Branch(Function.TIMES, new Token(mantissa), power);

        default:
          Ket.out.println(" !!! Bug: cannot interpret number format: !!! ");
          Ket.out.println(integerString);
          return new VariableToken(new Text(integerString));
      }
    }
  }
}
TOP

Related Classes of ket.math.convert.Tokenization

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.