package it.halfone.bnfreader;
import it.halfone.exception.InvalidDocumentException;
import it.halfone.hava.container.Context;
import it.halfone.parser.Parser;
import it.halfone.parser.token.TokenParser;
import it.halfone.parser.token.impl.AnalyzerParser;
import it.halfone.parser.token.impl.AndTokenParser;
import it.halfone.parser.token.impl.BoostParser;
import it.halfone.parser.token.impl.BracketTokenParser;
import it.halfone.parser.token.impl.CharacterClassesParser;
import it.halfone.parser.token.impl.LiteralTokenParser;
import it.halfone.parser.token.impl.ModifierTokenParser;
import it.halfone.parser.token.impl.OrTokenParser;
import it.halfone.parser.token.impl.RegexReferenceParser;
import it.halfone.regex.Regex;
import java.io.BufferedReader;
import java.io.DataInputStream;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* BnfReader - 16/set/2011
*
* @author Andrea La Rosa
*/
public final class BnfReader {
// An id is a String of length 1 or more
private static final String ID = "([a-zA-Z]\\w*)";
// A token can't contain spaces
private static final String TOKEN = "(\\S+)";
// One or more token separated by pipe '|'
private static final String TOKEN_SEQUENCE = TOKEN + "(?: | " + TOKEN + ")*";
private static final String RULE = "(!)?" + ID;
private static final String DEFINITION = RULE + "\\s*=\\s*(" + TOKEN_SEQUENCE + ");$";
private static final String IMPORT = "import \"(.*)\";";
private static Pattern DEF_PATTERN = Pattern.compile(DEFINITION);
private static Pattern IMPORT_PATTERN = Pattern.compile(IMPORT);
private static final TokenParser[] parsers = new TokenParser[]{
new LiteralTokenParser(),
new AnalyzerParser(),
new CharacterClassesParser(),
new RegexReferenceParser(),
new BracketTokenParser(),
new BoostParser(),
new ModifierTokenParser(),
new AndTokenParser(),
new OrTokenParser()
};
/**
* @param filename
* @return
* @throws InvalidDocumentException
* @throws IOException
*/
public static Parser readBnf(String filename) throws IOException, InvalidDocumentException {
Map<String, Regex> ruleMap = new HashMap<String, Regex>();
String rootRule = readBnf(filename, ruleMap);
ruleMap.get(rootRule).validateOrThrow();
return ruleMap.get(rootRule).compile();
}
/**
* @param filename
* @param ruleMap
* @throws InvalidDocumentException
* @throws IOException
*/
private static String readBnf(String filename, Map<String, Regex> ruleMap) throws IOException, InvalidDocumentException{
FileInputStream fstream = new FileInputStream(filename + ".bnf");
DataInputStream in = new DataInputStream(fstream);
BufferedReader br = new BufferedReader(new InputStreamReader(in));
Matcher matcher;
String strLine;
String definition = "";
String retVal = null;
boolean validRule;
while ((strLine = br.readLine()) != null) {
strLine = strLine.trim();
validRule = false;
if (strLine.equals("") || strLine.startsWith("//")) {
validRule = true;
} else if((matcher = IMPORT_PATTERN.matcher(strLine)).find()) {
validRule = true;
readBnf(matcher.group(1), ruleMap);
} else {
definition = strLine;
while(definition.endsWith(";") == false && ((strLine = br.readLine()) != null)){
definition += strLine.trim();
}
matcher = DEF_PATTERN.matcher(definition);
if (matcher.find()) {
retVal = matcher.group(2);
if(ruleMap.get(retVal) != null){
throw new InvalidDocumentException("Duplicated rule name :: " + retVal);
}
validRule = true;
String value = matcher.group(3);
Regex rule;
validRule = ((rule = parseToken(retVal, value, ruleMap)) != null);
rule = rule.close();
retVal = matcher.group(2);
if (matcher.group(1) != null) {
rule = rule.mark(retVal);
}
ruleMap.put(retVal, rule);
}
}
if (validRule == false) {
throw new InvalidDocumentException("Not a valid line :: " + definition);
}
}
in.close();
return retVal;
}
/**
* @param token
* @return
*/
private static Regex parseToken(String ruleName, String token, Map<String, Regex> existingRule) throws InvalidDocumentException {
String unmodifiedToken = token;
Context ctx = new Context();
ctx.setValue("index", 0);
ctx.setValue("token", token);
ctx.setValue("regexList", new ArrayList<Regex>());
ctx.setValue("existingRule", existingRule);
ctx.setValue("ruleName", ruleName);
boolean modified = true;
while(modified){
for(TokenParser parser : parsers){
if(modified = parser.parseToken(ctx)){
break;
}
}
}
int index = ctx.getValue("index");
token = ctx.getValue("token");
List<Regex> regexList = ctx.getValue("regexList");
if (Pattern.matches(TokenParser.ID + "[\\d]+", token)) {
Regex retVal = regexList.get(index - 1);
return retVal;
} else {
throw new InvalidDocumentException("Invalid token::" + unmodifiedToken);
}
}
}