public void load(URL specModelURL, SpecificationModels featureSpecModels) throws MaltChainedException {
BufferedReader br = null;
Pattern tabPattern = Pattern.compile("\t");
if (specModelURL == null) {
throw new FeatureException("The feature specification file cannot be found. ");
}
try {
br = new BufferedReader(new InputStreamReader(specModelURL.openStream()));
} catch (IOException e) {
throw new FeatureException("Could not read the feature specification file '"+specModelURL.toString()+"'. ", e);
}
if (br != null) {
int specModelIndex = featureSpecModels.getNextIndex();
String fileLine;
String items[];
StringBuilder featureText = new StringBuilder();
String splitfeats = "";
ArrayList<String> fileLines = new ArrayList<String>();
ArrayList<String> orderFileLines = new ArrayList<String>();
while (true) {
try {
fileLine = br.readLine();
} catch (IOException e) {
throw new FeatureException("Could not read the feature specification file '"+specModelURL.toString()+"'. ", e);
}
if (fileLine == null) {
break;
}
if (fileLine.length() <= 1 && fileLine.trim().substring(0, 2).trim().equals("--")) {
continue;
}
fileLines.add(fileLine);
}
try {
br.close();
} catch (IOException e) {
throw new FeatureException("Could not close the feature specification file '"+specModelURL.toString()+"'. ", e);
}
for (int j = 0; j < fileLines.size(); j++) {
orderFileLines.add(fileLines.get(j));
}
boolean deprel = false;
for (int j=0; j < orderFileLines.size(); j++) {
deprel = false;
featureText.setLength(0);
splitfeats = "";
items = tabPattern.split(orderFileLines.get(j));
if (items.length < 2) {
throw new FeatureException("The feature specification file '"+specModelURL.toString()+"' must contain at least two columns.");
}
if (!(columnNameMap.containsKey(ColumnNames.valueOf(items[0].trim())) || columnNameMap.containsValue(items[0].trim()))) {
throw new FeatureException("Column one in the feature specification file '"+specModelURL.toString()+"' contains an unknown value '"+items[0].trim()+"'. ");
}
if (items[0].trim().equalsIgnoreCase("DEP") || items[0].trim().equalsIgnoreCase("DEPREL")) {
featureText.append("OutputColumn(DEPREL, ");
deprel = true;
} else {
if (columnNameMap.containsKey(ColumnNames.valueOf(items[0].trim()))) {
featureText.append("InputColumn("+columnNameMap.get(ColumnNames.valueOf(items[0].trim()))+", ");
} else if (columnNameMap.containsValue(items[0].trim())) {
featureText.append("InputColumn("+items[0].trim()+", ");
}
if (items[0].trim().equalsIgnoreCase("FEATS") && isUseSplitFeats()) {
splitfeats = "Split(";
}
}
if (!(items[1].trim().equalsIgnoreCase("STACK") || items[1].trim().equalsIgnoreCase("INPUT") || items[1].trim().equalsIgnoreCase("CONTEXT"))) {
throw new FeatureException("Column two in the feature specification file '"+specModelURL.toString()+"' should be either 'STACK', 'INPUT' or 'CONTEXT' (Covington), not '"+items[1].trim()+"'. ");
}
int offset = 0;
if (items.length >= 3) {
try {
offset = new Integer(Integer.parseInt(items[2]));
} catch (NumberFormatException e) {
throw new FeatureException("The feature specification file '"+specModelURL.toString()+"' contains a illegal integer value. ", e);
}
}
String functionArg = "";
if (items[1].trim().equalsIgnoreCase("CONTEXT")) {
if (offset >= 0) {
functionArg = dataStructuresMap.get(DataStructures.valueOf("LEFTCONTEXT"))+"["+offset+"]";
} else {
functionArg = dataStructuresMap.get(DataStructures.valueOf("RIGHTCONTEXT"))+"["+Math.abs(offset + 1)+"]";
}
} else if (dataStructuresMap.containsKey(DataStructures.valueOf(items[1].trim()))) {
if (covington == true) {
if (dataStructuresMap.get(DataStructures.valueOf(items[1].trim())).equalsIgnoreCase("Stack")) {
functionArg = "Left["+offset+"]";
} else {
functionArg = "Right["+offset+"]";
}
} else {
functionArg = dataStructuresMap.get(DataStructures.valueOf(items[1].trim()))+"["+offset+"]";
}
} else if (dataStructuresMap.containsValue(items[1].trim())) {
if (covington == true) {
if (items[1].trim().equalsIgnoreCase("Stack")) {
functionArg = "Left["+offset+"]";
} else {
functionArg = "Right["+offset+"]";
}
} else {
functionArg = items[1].trim()+"["+offset+"]";
}
} else {
throw new FeatureException("Column two in the feature specification file '"+specModelURL.toString()+"' should not contain the value '"+items[1].trim());
}
int linearOffset = 0;
int headOffset = 0;
int depOffset = 0;
int sibOffset = 0;
int suffixLength = 0;
if (items.length >= 4) { linearOffset = new Integer(Integer.parseInt(items[3])); }
if (items.length >= 5) { headOffset = new Integer(Integer.parseInt(items[4])); }
if (items.length >= 6) { depOffset = new Integer(Integer.parseInt(items[5])); }
if (items.length >= 7) { sibOffset = new Integer(Integer.parseInt(items[6])); }
if (items.length >= 8) { suffixLength = new Integer(Integer.parseInt(items[7])); }
if (linearOffset < 0) {
linearOffset = Math.abs(linearOffset);
for (int i = 0; i < linearOffset; i++) {
functionArg = "pred("+functionArg+")";
}
} else if (linearOffset > 0) {
for (int i = 0; i < linearOffset; i++) {
functionArg = "succ("+functionArg+")";
}
}
if (headOffset >= 0) {
for (int i = 0; i < headOffset; i++) {
functionArg = "head("+functionArg+")";
}
} else {
throw new FeatureException("The feature specification file '"+specModelURL.toString()+"' should not contain a negative head function value. ");
}
if (depOffset < 0) {
depOffset = Math.abs(depOffset);
for (int i = 0; i < depOffset; i++) {
functionArg = "ldep("+functionArg+")";