Package naturalLanguage

Source Code of naturalLanguage.NaturalLanguage

package naturalLanguage;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import lipstone.joshua.parser.Tokenizer;
import lipstone.joshua.parser.exceptions.ParserException;
import lipstone.joshua.parser.exceptions.PluginConflictException;
import lipstone.joshua.parser.exceptions.SyntaxException;
import lipstone.joshua.parser.exceptions.UnbalancedParenthesesException;
import lipstone.joshua.parser.exceptions.UndefinedResultException;
import lipstone.joshua.parser.plugin.ParserPlugin;
import lipstone.joshua.parser.plugin.helpdata.Command;
import lipstone.joshua.parser.plugin.types.CommandPlugin;
import lipstone.joshua.parser.plugin.types.InputFilterPlugin;
import lipstone.joshua.parser.types.BigDec;
import lipstone.joshua.parser.util.ConsCell;
import lipstone.joshua.parser.util.ConsType;

public class NaturalLanguage extends ParserPlugin implements InputFilterPlugin, CommandPlugin {
  private HashMap<ConsCell, ConsCell> naturalMath = null;
  private HashMap<String, String> abbreviations, abbreviationPrefixes = null;
  private HashMap<String, Integer> naturalNumbers = null;
  private Units units = null;
  private BigDec tax = new BigDec(0.0825);
  private Pattern prefixPattern;
 
  public ConsCell convertMaths(ConsCell input) {
    for (ConsCell key : naturalMath.keySet())
      input.replaceAll(key, naturalMath.get(key));
    return input;
  }
 
  /**
   * make sure to run convertMaths prior to running this function as it relies on there being +-\*\/ signs in between
   * numbers
   *
   * @param input
   *            the equation as a string with typed in numbers i.e. one hundred twenty three thousand ninety nine
   * @param parser
   *            the parser that precipitated this method call
   * @return the equation with the typed out numbers replaced with the appropriate numerals
   * @throws UndefinedResultException
   */
  public ConsCell convertNumbers(ConsCell input) throws NumberFormatException, UndefinedResultException {
    ConsCell current = input;
    int numeric = -2;
    do {
      if (current.getCarType() == ConsType.IDENTIFIER && (numeric = numberValue((String) current.getCar())) >= -1) {
        ConsCell head = current;
        int number = 0, working = 0, decimal = 0, step = 0;
        boolean dec = false, first = current.isFirstConsCell();
        do {
          if (numeric > 100) {
            number += working * numeric;
            working = 0;
            step = 0;
          }
          else if (numeric == 100) {
            working *= numeric;
            step = 0;
          }
          else if (numeric >= 10) {
            if (step > 0) {
              working *= 100;
              number *= 100;
            }
            working += numeric;
            step = 2;
          }
          else if (numeric >= 0) {
            if (step == 1) {
              working *= 10;
              number *= 10;
            }
            working += numeric;
            step = 1;
          }
          else
            break;
        } while ((current = current.getNextConsCell()).getCarType() == ConsType.IDENTIFIER && (numeric = numberValue((String) current.getCar())) > -1);
        if (numeric == -1)
          dec = true;
        number += working;
        working = 0;
        step = 0;
        if (dec) {
          while ((current = current.getNextConsCell()).getCarType() == ConsType.IDENTIFIER && (numeric = numberValue((String) current.getCar())) > -1) {
            if (numeric > 100) {
              decimal += working * numeric;
              working = 0;
              step = 0;
            }
            else if (numeric == 100) {
              working *= numeric;
              step = 0;
            }
            else if (numeric >= 10) {
              if (step > 0) {
                working *= 100;
                decimal *= 100;
              }
              working += numeric;
              step = 2;
            }
            else if (numeric >= 0) {
              if (step == 1) {
                working *= 10;
                decimal *= 10;
              }
              working += numeric;
              step = 1;
            }
            else
              break;
          }
          decimal += working;
        }
        if (first)
          input = new ConsCell(new BigDec(number + (dec ? ((double) decimal) / Math.pow(10, ((int) Math.log10(decimal) + 1)) : 0)), ConsType.NUMBER, current);
        else {
          head = head.remove().getPreviousConsCell();
          head.append(new ConsCell(new BigDec(number + (dec ? ((double) decimal) / Math.pow(10, ((int) Math.log10(decimal) + 1)) : 0)), ConsType.NUMBER, current));
        }
        if (current.isNull())
          return input;
      }
    } while (!(current = current.getNextConsCell()).isNull());
    return input;
  }
 
  /**
   * Convert things like 10 tickets at $75 a ticket into something that this calculator can handle.
   *
   * @param input
   *            the equation, which can contain in-line variable syntax
   * @param parser
   *            the parser that precipitated this method call
   * @return the converted input
   * @throws ParserException
   */
  public String convertInlineVariables(String input) throws NumberFormatException, ParserException {
    while (input.contains(" at ")) {
      int mid = input.indexOf(" at "), start = 0, end = 0;
      String number = "0", multiplier = "1", divisor = "1", var = "";
      for (int i = mid; i >= 0; i--) {
        if (parser.isNumber(input.charAt(i))) {
          var = input.substring(i + 1, mid).trim();
          for (start = i; start >= 0; start--) {
            if (input.charAt(start) == '(')
              start = parser.getEndIndex(input, start);
            else if (!parser.isNumber(input.charAt(start)) && !parser.isOperator(input.charAt(start)))
              break;
          }
          if (start > 0)
            number = input.substring(start + 1, i + 1);
          else
            number = input.substring(0, i + 1);
          break;
        }
      }
      for (int i = mid + 5; i < input.length(); i++) {
        if (parser.isNumber(input.charAt(i))) {
          for (end = i; end < input.length(); end++) {
            if (input.charAt(end) == '(')
              end = parser.getEndIndex(input, end);
            else if (!parser.isNumber(input.charAt(end)) && !parser.isOperator(input.charAt(end)))
              break;
          }
          if (end < input.length() - 1)
            multiplier = input.substring(i - 1, end);
          else
            multiplier = input.substring(i - 1);
          break;
        }
      }
      int finalEnd = input.length();
      for (finalEnd = input.indexOf(var, mid) + var.length(); finalEnd < input.length(); finalEnd++) {
        if (parser.isNumber(input.charAt(finalEnd)) || input.charAt(finalEnd) == ' ')
          break;
      }
      int beginDiv = input.indexOf(" for ", mid);
      if (beginDiv > mid && beginDiv < finalEnd) {
        for (int i = beginDiv + 5; i < finalEnd; i++) {
          if (parser.isNumber(input.charAt(i))) {
            int a = 0;
            for (a = i + 1; a < finalEnd; a++) {
              if (input.charAt(a) == '(')
                a = parser.getEndIndex(input, a);
              else if (!parser.isNumber(input.charAt(a)) && !parser.isOperator(input.charAt(a)))
                break;
            }
            divisor = input.substring(i, a);
            break;
          }
        }
      }
    }
    return input;
  }
 
  public ConsCell plusTax(ConsCell input) {
    if (input.length() < 3)
      return input;
    ConsCell current = input;
    int i = 0;
    do {
      if (current.getCar().equals("plus") && current.getNextConsCell().getCar().equals("tax")) {
        current.getNextConsCell().remove();
        ConsCell insert = input.clone();
        insert.getNextConsCell(i).removeAll();
        current.replaceCar(insert);
        current.append(new ConsCell('*', ConsType.OPERATOR, new ConsCell(tax, ConsType.NUMBER, current.getNextConsCell())));
      }
      i++;
    } while (!(current = current.getNextConsCell()).isNull() && !current.getNextConsCell().isNull()); //This and the one after it are not null
    return input;
  }
 
  /**
   * Finds unit conversion sets in the input
   *
   * @param input
   *            the input
   * @return the unit with the unit conversions converted into a format that this parser can handle
   * @throws ParserException
   * @throws NumberFormatException
   */
  public ConsCell findUnitConversion(ConsCell input) throws ParserException {
    ArrayList<String> oldVars = parser.getVars(); //Save the variables for later
    ConsCell current = input;
    do {
      if (current.getCarType() == ConsType.CONS_CELL)
        current.replaceCar(findUnitConversion((ConsCell) current.getCar()));
      if (current.getCarType() == ConsType.IDENTIFIER && ((String) current.getCar()).equals("in")) {
        ConsCell leftUnits = new ConsCell(), rightUnits = new ConsCell();
        ConsCell head = current.getPreviousConsCell();
        boolean power = true;
        do {
          head.singular().append(leftUnits);
          leftUnits = leftUnits.getPreviousConsCell();
          ConsCell previous = head.getPreviousConsCell();
          power = head.getCarType() != ConsType.NUMBER || (!previous.isNull() && (previous.getCarType() == ConsType.OPERATOR && ((char) previous.getCar() == '^' ||
              ((char) previous.getCar() == '+' || (char) previous.getCar() == '-' && previous.getPreviousConsCell().getCarType() == ConsType.OPERATOR &&
                  (char) previous.getPreviousConsCell().getCar() == '^'))));
        } while (power && !(head = head.remove().getPreviousConsCell()).isNull() &&
            (head.getCarType() == ConsType.OPERATOR || (head.getCarType() == ConsType.IDENTIFIER && units.getUnitNames().contains((String) head.getCar())) || head.getCarType() == ConsType.CONS_CELL));
        head = current.getNextConsCell();
        ConsCell temp = rightUnits;
        power = false;
        do {
          temp = temp.append(head);
          power = head.getCarType() == ConsType.OPERATOR && ((char) head.getCar() == '^' || (power && ((char) head.getCar() == '-' || (char) head.getCar() == '+')));
        } while (!(head = head.remove()).isNull() &&
            ((head.getCarType() == ConsType.OPERATOR && ((char) head.getCar() == '^' || (power && ((char) head.getCar() == '-' || (char) head.getCar() == '+')))) ||
                (head.getCarType() == ConsType.IDENTIFIER && units.getUnitNames().contains((String) head.getCar())) || (power && (head.getCarType() == ConsType.CONS_CELL ||
            head.getCarType() == ConsType.NUMBER))));
        //In theory, leftUnits and rightUnits are composed of units, the signs between them and powers
        current.replaceCar(new ConsCell('*', ConsType.OPERATOR));
        current.insert(new ConsCell(units.convertUnits(leftUnits, rightUnits), ConsType.NUMBER));
      }
    } while (!(current = current.getNextConsCell()).isNull());
    parser.setVars(oldVars); //Restore the variables
    return input;
  }
 
  /*public String findUnitConversion(String input, Parser parser) throws NumberFormatException, ParserException {
    Matcher m = Pattern.compile("\\bin\\b").matcher(input);
    while (m.find()) {
      int start = 0, end = input.length(), split = m.start();
      //Determine the start point of the quantity, factoring in numeric exponents on the units
      for (int i = m.start(); i >= 0; i--) {
        boolean isStart = false;
        if (input.charAt(i) == ')')
          i = parser.getStartIndex(input, i);
        if (parser.isNumber(input.charAt(i))) {
          for (int a = i; a >= 0; a--) {
            if ((parser.isOperator(input.charAt(a)) || input.charAt(a) == ' ') && input.charAt(a) != '^') {
              isStart = true;
              start = a;
              break;
            }
            if (parser.isNumber(input.charAt(a))) {
              i = a;
              break;
            }
          }
        }
        if (isStart)
          break;
      }
      //Determine the end point of the units, factoring in numeric exponents on the the units
      for (int i = m.end(); i < input.length(); i++) {
        if (parser.isOperator(input.charAt(i)) && input.charAt(i) != '^') {
          end = i;
          break;
        }
        boolean isEnd = false;
        if (input.charAt(i) == '(')
          i = parser.getEndIndex(input, i);
        int a = i + 1;
        if (input.charAt(i) == '^') {
          for (a = i + 1; a < input.length(); a++) {
            if ((parser.isOperator(input.charAt(a)) || input.charAt(a) == ' ') && input.charAt(a) != '^') {
              isEnd = true;
              start = a;
              break;
            }
            if (parser.isNumber(input.charAt(a))) {
              i = a;
              break;
            }
          }
        }
        if (isEnd) {
          end = a;
          break;
        }
      }
      int unitsStart = 0;
      String unitConversion = input.substring(start, end);
      for (int i = 0; i < unitConversion.length(); i++) {
        if (input.charAt(i) == '(')
          i = parser.getEndIndex(unitConversion, i);
        if (input.charAt(i) == ' ') {
          unitsStart = i;
          break;
        }
      }
      unitConversion = units.convertUnits(unitConversion.substring(start, unitsStart) + ", " + unitConversion.substring(unitsStart, split) + ", " +
          unitConversion.substring(split + 2, end), parser);
      input = input.substring(0, start) + unitConversion + input.substring(end);
      //Resets the pattern to compensate for the changed string
      m = Pattern.compile("in").matcher(input);
    }
    return input;
  }*/
 
  public ConsCell convertAbbrevations(ConsCell input) throws SyntaxException, UnbalancedParenthesesException {
    String inp = input.toString();
    for (String abbrev : abbreviations.keySet())
      inp = inp.replaceAll(abbrev, "$1" + abbreviations.get(abbrev) + "$2");
    for (Unit unit : units.getUnits().values()) {
      String name = unit.getName();
      for (String abbreviation : unit.getAbbreviations()) {
        Pattern pattern = Pattern.compile("(^|\\A|\\s)(" + prefixPattern + ")?" + abbreviation + "($|\\z|\\s)");
        Matcher m;
        while ((m = pattern.matcher(inp)).find())
          inp = m.replaceFirst(m.group(1) + (m.group(2) != null ? abbreviationPrefixes.get(m.group(2)) : "") + name + m.group(3));
      }
    }
    return Tokenizer.tokenizeString(inp);
  }
 
  private int numberValue(String number) {
    number = number.toLowerCase();
    if (number.equals(".") || number.equals("point") || number.equals("dot"))
      return -1;
    return naturalNumbers.containsKey(number) ? naturalNumbers.get(number) : -2;
  }
 
  @Override
  public void loadInputFilter() {
    if (naturalMath == null) {
      naturalMath = new HashMap<ConsCell, ConsCell>();
      naturalNumbers = new HashMap<String, Integer>();
      abbreviations = new HashMap<String, String>();
      abbreviationPrefixes = new HashMap<String, String>();
      units = new Units(parser, this);
      naturalMath.put(new ConsCell("plus", ConsType.IDENTIFIER), new ConsCell('+', ConsType.OPERATOR));
      naturalMath.put(new ConsCell("minus", ConsType.IDENTIFIER), new ConsCell('-', ConsType.OPERATOR));
      naturalMath.put(new ConsCell("negative", ConsType.IDENTIFIER), new ConsCell('-', ConsType.OPERATOR));
      naturalMath.put(new ConsCell("times", ConsType.IDENTIFIER), new ConsCell('*', ConsType.OPERATOR));
      naturalMath.put(new ConsCell("divided", ConsType.IDENTIFIER, new ConsCell("by", ConsType.IDENTIFIER)), new ConsCell('/', ConsType.OPERATOR));
      naturalMath.put(new ConsCell("over", ConsType.IDENTIFIER), new ConsCell('/', ConsType.OPERATOR));
      naturalMath.put(new ConsCell("%", ConsType.OPERATOR, new ConsCell("of", ConsType.IDENTIFIER)), new ConsCell('/', ConsType.OPERATOR,
          new ConsCell(new BigDec(100), ConsType.NUMBER, new ConsCell('*', ConsType.OPERATOR))));
      naturalMath.put(new ConsCell("to", ConsType.IDENTIFIER, new ConsCell("the", ConsType.IDENTIFIER)), new ConsCell('^', ConsType.OPERATOR));
      naturalMath.put(new ConsCell("squared", ConsType.IDENTIFIER), new ConsCell('^', ConsType.OPERATOR, new ConsCell(new BigDec(2), ConsType.NUMBER)));
      naturalMath.put(new ConsCell("cubed", ConsType.IDENTIFIER), new ConsCell('^', ConsType.OPERATOR, new ConsCell(new BigDec(3), ConsType.NUMBER)));
      Object[] nNumbers = {"zero", 0, "one", 1, "two", 2, "three", 3, "four", 4, "five", 5, "six", 6, "seven", 7, "eight", 8, "nine", 9,
          "ten", 10, "eleven", 11, "twelve", 12, "thirteen", 13, "fourteen", 14, "fifteen", 15, "sixteen", 16, "seventeen", 17, "eightteen", 18, "nineteen", 19,
          "twenty", 20, "thirty", 30, "forty", 40, "fifty", 50, "sixty", 60, "seventy", 70, "eighty", 80, "ninety", 90};
      String[] kys = {"thousand", "million", "billion", "trillion", "quadrillion", "sextillion", "septillion", "octillion", "nonillion", "decillion", "undecillion",
          "duodecillion", "tredecillion", "quattuordecillion", "quindecillion", "sedecillion", "septendecillion", "octodecillion", "novemdecillion", "viginillion", "centillion"};
      String[] abbriv = {"ft\\.", "feet", "in\\.", "inch", "mph", "miles per (hour)", "cc", "centimeter^3"};
      String[] abbrivPre = {"y", "yocto", "z", "zepto", "a", "atto", "f", "femto", "p", "pico", "n", "nano", "m", "milli", "m", "mili", "c", "centi", "d", "deci",
          "da", "deca", "h", "hecto", "k", "kilo", "M", "mega", "G", "giga", "T", "tera", "P", "peta", "E", "exa", "Z", "zetta", "Y", "yotta"};
      for (int i = 0; i < nNumbers.length - 1; i += 2)
        naturalNumbers.put((String) nNumbers[i], (Integer) nNumbers[i + 1]);
      naturalNumbers.put("hundred", 100);
      naturalNumbers.put(".", -1);
      naturalNumbers.put("point", -1);
      naturalNumbers.put("dot", -1);
      int keyValue = 1;
      for (int i = 0; i < kys.length; i++)
        naturalNumbers.put(kys[i], (keyValue = keyValue * 1000));
      naturalNumbers.put("novendecillion", naturalNumbers.get("novemdecillion"));
      for (int i = 0; i < abbriv.length - 1; i++)
        abbreviations.put("(^|\\A|\\s)" + abbriv[i] + "($|\\z|\\s)", abbriv[++i]);
      StringBuilder b = new StringBuilder(abbrivPre.length + 1);
      b.append('(');
      for (int i = 0; i < abbrivPre.length - 1; i++) {
        b.append(abbrivPre[i] + "|");
        abbreviationPrefixes.put(abbrivPre[i], abbrivPre[++i]);
      }
      prefixPattern = Pattern.compile(b.toString() + ")");
    }
  }
 
  @Override
  public ConsCell preProcess(ConsCell input) throws ParserException {
    ConsCell backup = input.clone();
    try {
      input = convertMaths(input);
      input = convertNumbers(input);
      //input = convertInlineVariables(input);
      //input = findUnitConversion(input);
      input = convertAbbrevations(input);
      input = plusTax(input);
    }
    catch (NumberFormatException e) {
      return backup;
    }
    return input;
  }
 
  @Override
  public void unloadInputFilter() {
    naturalMath = null;
    naturalNumbers = null;
    abbreviations = null;
    abbreviationPrefixes = null;
    units = null;
  }
 
  @Override
  public void loadCommands() throws PluginConflictException {
    addCommand(new Command("addUnit", "add a unit to this unit converter.  Use: addUnit name conversionFactor SI-equivalent abbreviation abbreviation is optional", this));
    addCommand(new Command("removeUnit", "remove a unit from this converter.  Use: removeUnit name", this));
    for (String key : getDataMap()) {
      ArrayList<ConsCell> data = getData(key).splitOnSeparator();
      String[] abbreviations = {};
      if (data.size() > 3) {
        abbreviations = new String[data.size() - 3];
        for (int i = 3; i < data.size(); i++)
          abbreviations[i] = data.get(i).toString();
        units.addUnit(data.get(0).toString(), data.get(1).toString(), (BigDec) data.get(2).getCar(), abbreviations);
      }
    }
  }
 
  @Override
  public ConsCell runCommand(String command, ArrayList<ConsCell> arguments) throws ParserException {
    if (command.equals("addUnit") && arguments.size() >= 3) {
      arguments.set(2, parser.run(arguments.get(2)));
      if (arguments.get(2).getCarType() != ConsType.NUMBER && arguments.get(2).length() == 1)
        throw new InvalidUnitException("The conversion factor must evalulate to a number", this);
      String[] abbreviations = {};
      if (arguments.size() > 3) {
        abbreviations = new String[arguments.size() - 3];
        for (int i = 3; i < arguments.size(); i++)
          abbreviations[i] = arguments.get(i).toString();
        String unit = arguments.get(1).toString();
        units.addUnit(arguments.get(0).toString(), unit, (BigDec) arguments.get(2).getCar(), abbreviations);
        //Add the remaining information to unit
        for (int i = 2; i < arguments.size(); i++)
          unit = unit + ", " + arguments.get(i);
        setData(arguments.get(0).toString(), Tokenizer.tokenizeString(unit));
        return new ConsCell("Successfully saved " + arguments.get(0) + " to the unit list.", ConsType.IDENTIFIER);
      }
    }
    if (command.equals("removeUnit") && arguments.size() >= 1) {
      String arg = arguments.get(0).toString();
      if (getData(arg) != null) {
        removeData(arg);
        units.removeUnit(arg);
        return new ConsCell("Successfully removed " + arg + " from the unit list.", ConsType.IDENTIFIER);
      }
      else
        return new ConsCell(arg + " cannot be removed.", ConsType.IDENTIFIER);
    }
    return null;
  }
 
  @Override
  public void unloadCommands() throws PluginConflictException {/*Nothing to do here*/}
 
  public HashMap<String, String> getAbbreviations() {
    return abbreviations;
  }
 
  public HashMap<String, String> getAbbreviationPrefixes() {
    return abbreviationPrefixes;
  }
 
  public Pattern getPrefixPattern() {
    return prefixPattern;
  }
}
TOP

Related Classes of naturalLanguage.NaturalLanguage

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.