package naturalLanguage;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import lipstone.joshua.parser.Tokenizer;
import lipstone.joshua.parser.exceptions.ParserException;
import lipstone.joshua.parser.exceptions.PluginConflictException;
import lipstone.joshua.parser.exceptions.SyntaxException;
import lipstone.joshua.parser.exceptions.UnbalancedParenthesesException;
import lipstone.joshua.parser.exceptions.UndefinedResultException;
import lipstone.joshua.parser.plugin.ParserPlugin;
import lipstone.joshua.parser.plugin.helpdata.Command;
import lipstone.joshua.parser.plugin.types.CommandPlugin;
import lipstone.joshua.parser.plugin.types.InputFilterPlugin;
import lipstone.joshua.parser.types.BigDec;
import lipstone.joshua.parser.util.ConsCell;
import lipstone.joshua.parser.util.ConsType;
public class NaturalLanguage extends ParserPlugin implements InputFilterPlugin, CommandPlugin {
private HashMap<ConsCell, ConsCell> naturalMath = null;
private HashMap<String, String> abbreviations, abbreviationPrefixes = null;
private HashMap<String, Integer> naturalNumbers = null;
private Units units = null;
private BigDec tax = new BigDec(0.0825);
private Pattern prefixPattern;
public ConsCell convertMaths(ConsCell input) {
for (ConsCell key : naturalMath.keySet())
input.replaceAll(key, naturalMath.get(key));
return input;
}
/**
* make sure to run convertMaths prior to running this function as it relies on there being +-\*\/ signs in between
* numbers
*
* @param input
* the equation as a string with typed in numbers i.e. one hundred twenty three thousand ninety nine
* @param parser
* the parser that precipitated this method call
* @return the equation with the typed out numbers replaced with the appropriate numerals
* @throws UndefinedResultException
*/
public ConsCell convertNumbers(ConsCell input) throws NumberFormatException, UndefinedResultException {
ConsCell current = input;
int numeric = -2;
do {
if (current.getCarType() == ConsType.IDENTIFIER && (numeric = numberValue((String) current.getCar())) >= -1) {
ConsCell head = current;
int number = 0, working = 0, decimal = 0, step = 0;
boolean dec = false, first = current.isFirstConsCell();
do {
if (numeric > 100) {
number += working * numeric;
working = 0;
step = 0;
}
else if (numeric == 100) {
working *= numeric;
step = 0;
}
else if (numeric >= 10) {
if (step > 0) {
working *= 100;
number *= 100;
}
working += numeric;
step = 2;
}
else if (numeric >= 0) {
if (step == 1) {
working *= 10;
number *= 10;
}
working += numeric;
step = 1;
}
else
break;
} while ((current = current.getNextConsCell()).getCarType() == ConsType.IDENTIFIER && (numeric = numberValue((String) current.getCar())) > -1);
if (numeric == -1)
dec = true;
number += working;
working = 0;
step = 0;
if (dec) {
while ((current = current.getNextConsCell()).getCarType() == ConsType.IDENTIFIER && (numeric = numberValue((String) current.getCar())) > -1) {
if (numeric > 100) {
decimal += working * numeric;
working = 0;
step = 0;
}
else if (numeric == 100) {
working *= numeric;
step = 0;
}
else if (numeric >= 10) {
if (step > 0) {
working *= 100;
decimal *= 100;
}
working += numeric;
step = 2;
}
else if (numeric >= 0) {
if (step == 1) {
working *= 10;
decimal *= 10;
}
working += numeric;
step = 1;
}
else
break;
}
decimal += working;
}
if (first)
input = new ConsCell(new BigDec(number + (dec ? ((double) decimal) / Math.pow(10, ((int) Math.log10(decimal) + 1)) : 0)), ConsType.NUMBER, current);
else {
head = head.remove().getPreviousConsCell();
head.append(new ConsCell(new BigDec(number + (dec ? ((double) decimal) / Math.pow(10, ((int) Math.log10(decimal) + 1)) : 0)), ConsType.NUMBER, current));
}
if (current.isNull())
return input;
}
} while (!(current = current.getNextConsCell()).isNull());
return input;
}
/**
* Convert things like 10 tickets at $75 a ticket into something that this calculator can handle.
*
* @param input
* the equation, which can contain in-line variable syntax
* @param parser
* the parser that precipitated this method call
* @return the converted input
* @throws ParserException
*/
public String convertInlineVariables(String input) throws NumberFormatException, ParserException {
while (input.contains(" at ")) {
int mid = input.indexOf(" at "), start = 0, end = 0;
String number = "0", multiplier = "1", divisor = "1", var = "";
for (int i = mid; i >= 0; i--) {
if (parser.isNumber(input.charAt(i))) {
var = input.substring(i + 1, mid).trim();
for (start = i; start >= 0; start--) {
if (input.charAt(start) == '(')
start = parser.getEndIndex(input, start);
else if (!parser.isNumber(input.charAt(start)) && !parser.isOperator(input.charAt(start)))
break;
}
if (start > 0)
number = input.substring(start + 1, i + 1);
else
number = input.substring(0, i + 1);
break;
}
}
for (int i = mid + 5; i < input.length(); i++) {
if (parser.isNumber(input.charAt(i))) {
for (end = i; end < input.length(); end++) {
if (input.charAt(end) == '(')
end = parser.getEndIndex(input, end);
else if (!parser.isNumber(input.charAt(end)) && !parser.isOperator(input.charAt(end)))
break;
}
if (end < input.length() - 1)
multiplier = input.substring(i - 1, end);
else
multiplier = input.substring(i - 1);
break;
}
}
int finalEnd = input.length();
for (finalEnd = input.indexOf(var, mid) + var.length(); finalEnd < input.length(); finalEnd++) {
if (parser.isNumber(input.charAt(finalEnd)) || input.charAt(finalEnd) == ' ')
break;
}
int beginDiv = input.indexOf(" for ", mid);
if (beginDiv > mid && beginDiv < finalEnd) {
for (int i = beginDiv + 5; i < finalEnd; i++) {
if (parser.isNumber(input.charAt(i))) {
int a = 0;
for (a = i + 1; a < finalEnd; a++) {
if (input.charAt(a) == '(')
a = parser.getEndIndex(input, a);
else if (!parser.isNumber(input.charAt(a)) && !parser.isOperator(input.charAt(a)))
break;
}
divisor = input.substring(i, a);
break;
}
}
}
}
return input;
}
public ConsCell plusTax(ConsCell input) {
if (input.length() < 3)
return input;
ConsCell current = input;
int i = 0;
do {
if (current.getCar().equals("plus") && current.getNextConsCell().getCar().equals("tax")) {
current.getNextConsCell().remove();
ConsCell insert = input.clone();
insert.getNextConsCell(i).removeAll();
current.replaceCar(insert);
current.append(new ConsCell('*', ConsType.OPERATOR, new ConsCell(tax, ConsType.NUMBER, current.getNextConsCell())));
}
i++;
} while (!(current = current.getNextConsCell()).isNull() && !current.getNextConsCell().isNull()); //This and the one after it are not null
return input;
}
/**
* Finds unit conversion sets in the input
*
* @param input
* the input
* @return the unit with the unit conversions converted into a format that this parser can handle
* @throws ParserException
* @throws NumberFormatException
*/
public ConsCell findUnitConversion(ConsCell input) throws ParserException {
ArrayList<String> oldVars = parser.getVars(); //Save the variables for later
ConsCell current = input;
do {
if (current.getCarType() == ConsType.CONS_CELL)
current.replaceCar(findUnitConversion((ConsCell) current.getCar()));
if (current.getCarType() == ConsType.IDENTIFIER && ((String) current.getCar()).equals("in")) {
ConsCell leftUnits = new ConsCell(), rightUnits = new ConsCell();
ConsCell head = current.getPreviousConsCell();
boolean power = true;
do {
head.singular().append(leftUnits);
leftUnits = leftUnits.getPreviousConsCell();
ConsCell previous = head.getPreviousConsCell();
power = head.getCarType() != ConsType.NUMBER || (!previous.isNull() && (previous.getCarType() == ConsType.OPERATOR && ((char) previous.getCar() == '^' ||
((char) previous.getCar() == '+' || (char) previous.getCar() == '-' && previous.getPreviousConsCell().getCarType() == ConsType.OPERATOR &&
(char) previous.getPreviousConsCell().getCar() == '^'))));
} while (power && !(head = head.remove().getPreviousConsCell()).isNull() &&
(head.getCarType() == ConsType.OPERATOR || (head.getCarType() == ConsType.IDENTIFIER && units.getUnitNames().contains((String) head.getCar())) || head.getCarType() == ConsType.CONS_CELL));
head = current.getNextConsCell();
ConsCell temp = rightUnits;
power = false;
do {
temp = temp.append(head);
power = head.getCarType() == ConsType.OPERATOR && ((char) head.getCar() == '^' || (power && ((char) head.getCar() == '-' || (char) head.getCar() == '+')));
} while (!(head = head.remove()).isNull() &&
((head.getCarType() == ConsType.OPERATOR && ((char) head.getCar() == '^' || (power && ((char) head.getCar() == '-' || (char) head.getCar() == '+')))) ||
(head.getCarType() == ConsType.IDENTIFIER && units.getUnitNames().contains((String) head.getCar())) || (power && (head.getCarType() == ConsType.CONS_CELL ||
head.getCarType() == ConsType.NUMBER))));
//In theory, leftUnits and rightUnits are composed of units, the signs between them and powers
current.replaceCar(new ConsCell('*', ConsType.OPERATOR));
current.insert(new ConsCell(units.convertUnits(leftUnits, rightUnits), ConsType.NUMBER));
}
} while (!(current = current.getNextConsCell()).isNull());
parser.setVars(oldVars); //Restore the variables
return input;
}
/*public String findUnitConversion(String input, Parser parser) throws NumberFormatException, ParserException {
Matcher m = Pattern.compile("\\bin\\b").matcher(input);
while (m.find()) {
int start = 0, end = input.length(), split = m.start();
//Determine the start point of the quantity, factoring in numeric exponents on the units
for (int i = m.start(); i >= 0; i--) {
boolean isStart = false;
if (input.charAt(i) == ')')
i = parser.getStartIndex(input, i);
if (parser.isNumber(input.charAt(i))) {
for (int a = i; a >= 0; a--) {
if ((parser.isOperator(input.charAt(a)) || input.charAt(a) == ' ') && input.charAt(a) != '^') {
isStart = true;
start = a;
break;
}
if (parser.isNumber(input.charAt(a))) {
i = a;
break;
}
}
}
if (isStart)
break;
}
//Determine the end point of the units, factoring in numeric exponents on the the units
for (int i = m.end(); i < input.length(); i++) {
if (parser.isOperator(input.charAt(i)) && input.charAt(i) != '^') {
end = i;
break;
}
boolean isEnd = false;
if (input.charAt(i) == '(')
i = parser.getEndIndex(input, i);
int a = i + 1;
if (input.charAt(i) == '^') {
for (a = i + 1; a < input.length(); a++) {
if ((parser.isOperator(input.charAt(a)) || input.charAt(a) == ' ') && input.charAt(a) != '^') {
isEnd = true;
start = a;
break;
}
if (parser.isNumber(input.charAt(a))) {
i = a;
break;
}
}
}
if (isEnd) {
end = a;
break;
}
}
int unitsStart = 0;
String unitConversion = input.substring(start, end);
for (int i = 0; i < unitConversion.length(); i++) {
if (input.charAt(i) == '(')
i = parser.getEndIndex(unitConversion, i);
if (input.charAt(i) == ' ') {
unitsStart = i;
break;
}
}
unitConversion = units.convertUnits(unitConversion.substring(start, unitsStart) + ", " + unitConversion.substring(unitsStart, split) + ", " +
unitConversion.substring(split + 2, end), parser);
input = input.substring(0, start) + unitConversion + input.substring(end);
//Resets the pattern to compensate for the changed string
m = Pattern.compile("in").matcher(input);
}
return input;
}*/
public ConsCell convertAbbrevations(ConsCell input) throws SyntaxException, UnbalancedParenthesesException {
String inp = input.toString();
for (String abbrev : abbreviations.keySet())
inp = inp.replaceAll(abbrev, "$1" + abbreviations.get(abbrev) + "$2");
for (Unit unit : units.getUnits().values()) {
String name = unit.getName();
for (String abbreviation : unit.getAbbreviations()) {
Pattern pattern = Pattern.compile("(^|\\A|\\s)(" + prefixPattern + ")?" + abbreviation + "($|\\z|\\s)");
Matcher m;
while ((m = pattern.matcher(inp)).find())
inp = m.replaceFirst(m.group(1) + (m.group(2) != null ? abbreviationPrefixes.get(m.group(2)) : "") + name + m.group(3));
}
}
return Tokenizer.tokenizeString(inp);
}
private int numberValue(String number) {
number = number.toLowerCase();
if (number.equals(".") || number.equals("point") || number.equals("dot"))
return -1;
return naturalNumbers.containsKey(number) ? naturalNumbers.get(number) : -2;
}
@Override
public void loadInputFilter() {
if (naturalMath == null) {
naturalMath = new HashMap<ConsCell, ConsCell>();
naturalNumbers = new HashMap<String, Integer>();
abbreviations = new HashMap<String, String>();
abbreviationPrefixes = new HashMap<String, String>();
units = new Units(parser, this);
naturalMath.put(new ConsCell("plus", ConsType.IDENTIFIER), new ConsCell('+', ConsType.OPERATOR));
naturalMath.put(new ConsCell("minus", ConsType.IDENTIFIER), new ConsCell('-', ConsType.OPERATOR));
naturalMath.put(new ConsCell("negative", ConsType.IDENTIFIER), new ConsCell('-', ConsType.OPERATOR));
naturalMath.put(new ConsCell("times", ConsType.IDENTIFIER), new ConsCell('*', ConsType.OPERATOR));
naturalMath.put(new ConsCell("divided", ConsType.IDENTIFIER, new ConsCell("by", ConsType.IDENTIFIER)), new ConsCell('/', ConsType.OPERATOR));
naturalMath.put(new ConsCell("over", ConsType.IDENTIFIER), new ConsCell('/', ConsType.OPERATOR));
naturalMath.put(new ConsCell("%", ConsType.OPERATOR, new ConsCell("of", ConsType.IDENTIFIER)), new ConsCell('/', ConsType.OPERATOR,
new ConsCell(new BigDec(100), ConsType.NUMBER, new ConsCell('*', ConsType.OPERATOR))));
naturalMath.put(new ConsCell("to", ConsType.IDENTIFIER, new ConsCell("the", ConsType.IDENTIFIER)), new ConsCell('^', ConsType.OPERATOR));
naturalMath.put(new ConsCell("squared", ConsType.IDENTIFIER), new ConsCell('^', ConsType.OPERATOR, new ConsCell(new BigDec(2), ConsType.NUMBER)));
naturalMath.put(new ConsCell("cubed", ConsType.IDENTIFIER), new ConsCell('^', ConsType.OPERATOR, new ConsCell(new BigDec(3), ConsType.NUMBER)));
Object[] nNumbers = {"zero", 0, "one", 1, "two", 2, "three", 3, "four", 4, "five", 5, "six", 6, "seven", 7, "eight", 8, "nine", 9,
"ten", 10, "eleven", 11, "twelve", 12, "thirteen", 13, "fourteen", 14, "fifteen", 15, "sixteen", 16, "seventeen", 17, "eightteen", 18, "nineteen", 19,
"twenty", 20, "thirty", 30, "forty", 40, "fifty", 50, "sixty", 60, "seventy", 70, "eighty", 80, "ninety", 90};
String[] kys = {"thousand", "million", "billion", "trillion", "quadrillion", "sextillion", "septillion", "octillion", "nonillion", "decillion", "undecillion",
"duodecillion", "tredecillion", "quattuordecillion", "quindecillion", "sedecillion", "septendecillion", "octodecillion", "novemdecillion", "viginillion", "centillion"};
String[] abbriv = {"ft\\.", "feet", "in\\.", "inch", "mph", "miles per (hour)", "cc", "centimeter^3"};
String[] abbrivPre = {"y", "yocto", "z", "zepto", "a", "atto", "f", "femto", "p", "pico", "n", "nano", "m", "milli", "m", "mili", "c", "centi", "d", "deci",
"da", "deca", "h", "hecto", "k", "kilo", "M", "mega", "G", "giga", "T", "tera", "P", "peta", "E", "exa", "Z", "zetta", "Y", "yotta"};
for (int i = 0; i < nNumbers.length - 1; i += 2)
naturalNumbers.put((String) nNumbers[i], (Integer) nNumbers[i + 1]);
naturalNumbers.put("hundred", 100);
naturalNumbers.put(".", -1);
naturalNumbers.put("point", -1);
naturalNumbers.put("dot", -1);
int keyValue = 1;
for (int i = 0; i < kys.length; i++)
naturalNumbers.put(kys[i], (keyValue = keyValue * 1000));
naturalNumbers.put("novendecillion", naturalNumbers.get("novemdecillion"));
for (int i = 0; i < abbriv.length - 1; i++)
abbreviations.put("(^|\\A|\\s)" + abbriv[i] + "($|\\z|\\s)", abbriv[++i]);
StringBuilder b = new StringBuilder(abbrivPre.length + 1);
b.append('(');
for (int i = 0; i < abbrivPre.length - 1; i++) {
b.append(abbrivPre[i] + "|");
abbreviationPrefixes.put(abbrivPre[i], abbrivPre[++i]);
}
prefixPattern = Pattern.compile(b.toString() + ")");
}
}
@Override
public ConsCell preProcess(ConsCell input) throws ParserException {
ConsCell backup = input.clone();
try {
input = convertMaths(input);
input = convertNumbers(input);
//input = convertInlineVariables(input);
//input = findUnitConversion(input);
input = convertAbbrevations(input);
input = plusTax(input);
}
catch (NumberFormatException e) {
return backup;
}
return input;
}
@Override
public void unloadInputFilter() {
naturalMath = null;
naturalNumbers = null;
abbreviations = null;
abbreviationPrefixes = null;
units = null;
}
@Override
public void loadCommands() throws PluginConflictException {
addCommand(new Command("addUnit", "add a unit to this unit converter. Use: addUnit name conversionFactor SI-equivalent abbreviation abbreviation is optional", this));
addCommand(new Command("removeUnit", "remove a unit from this converter. Use: removeUnit name", this));
for (String key : getDataMap()) {
ArrayList<ConsCell> data = getData(key).splitOnSeparator();
String[] abbreviations = {};
if (data.size() > 3) {
abbreviations = new String[data.size() - 3];
for (int i = 3; i < data.size(); i++)
abbreviations[i] = data.get(i).toString();
units.addUnit(data.get(0).toString(), data.get(1).toString(), (BigDec) data.get(2).getCar(), abbreviations);
}
}
}
@Override
public ConsCell runCommand(String command, ArrayList<ConsCell> arguments) throws ParserException {
if (command.equals("addUnit") && arguments.size() >= 3) {
arguments.set(2, parser.run(arguments.get(2)));
if (arguments.get(2).getCarType() != ConsType.NUMBER && arguments.get(2).length() == 1)
throw new InvalidUnitException("The conversion factor must evalulate to a number", this);
String[] abbreviations = {};
if (arguments.size() > 3) {
abbreviations = new String[arguments.size() - 3];
for (int i = 3; i < arguments.size(); i++)
abbreviations[i] = arguments.get(i).toString();
String unit = arguments.get(1).toString();
units.addUnit(arguments.get(0).toString(), unit, (BigDec) arguments.get(2).getCar(), abbreviations);
//Add the remaining information to unit
for (int i = 2; i < arguments.size(); i++)
unit = unit + ", " + arguments.get(i);
setData(arguments.get(0).toString(), Tokenizer.tokenizeString(unit));
return new ConsCell("Successfully saved " + arguments.get(0) + " to the unit list.", ConsType.IDENTIFIER);
}
}
if (command.equals("removeUnit") && arguments.size() >= 1) {
String arg = arguments.get(0).toString();
if (getData(arg) != null) {
removeData(arg);
units.removeUnit(arg);
return new ConsCell("Successfully removed " + arg + " from the unit list.", ConsType.IDENTIFIER);
}
else
return new ConsCell(arg + " cannot be removed.", ConsType.IDENTIFIER);
}
return null;
}
@Override
public void unloadCommands() throws PluginConflictException {/*Nothing to do here*/}
public HashMap<String, String> getAbbreviations() {
return abbreviations;
}
public HashMap<String, String> getAbbreviationPrefixes() {
return abbreviationPrefixes;
}
public Pattern getPrefixPattern() {
return prefixPattern;
}
}