this.isReadingFromFile = true;
//### read file into tbls
HashMap<String,Integer> localStr2id = new HashMap<String,Integer>();
HashMap<Integer,String> localId2str = new HashMap<Integer,String>();
LineReader symboltableReader = new LineReader(fname);
try { for (String line : symboltableReader) {
String[] fds = Regex.spaces.split(line);
if (2 != fds.length) {
logger.warning("read index, bad line: " + line);
continue;
}
String str = fds[0].trim();
int id = Integer.parseInt(fds[1]);
String uniqueStr;
if (null != localStr2id.get(str)) { // it is quite possible that java will treat two stings as the same when other language (e.g., C or perl) treat them differently, due to unprintable symbols
logger.warning("duplicate string (add fake): " + line);
uniqueStr = str + id;//fake string
//System.exit(1);//TODO
} else {
uniqueStr = str;
}
localStr2id.put(uniqueStr, id);
//it is guaranteed that the strings in localId2str are different
if (null != localId2str.get(id)) {
throw new RuntimeException("duplicate id, have to exit; " + line);
} else {
localId2str.put(id, uniqueStr);
}
} } finally { symboltableReader.close(); }
/*if (localId2str.size() >= this.lm_end_sym_id - this.lm_start_sym_id) {
throw new RuntimeException("read symbol tbl, tlb is too big");
}*/