package joshua.decoder.ff.tm;
import java.io.IOException;
import java.util.Iterator;
import java.util.logging.Level;
import java.util.logging.Logger;
import joshua.corpus.vocab.SymbolTable;
import joshua.util.io.LineReader;
/**
* This is a base class for simple, ASCII line-based grammars that
* are stored on disk.
*
* @author Juri Ganitkevitch
*
*/
public abstract class GrammarReader<R extends Rule>
implements Iterable<R>, Iterator<R> {
protected static String fieldDelimiter;
protected static String nonTerminalRegEx;
protected static String nonTerminalCleanRegEx;
protected static String description;
protected SymbolTable symbolTable;
protected String fileName;
protected LineReader reader;
protected String lookAhead;
private static final Logger logger = Logger
.getLogger(GrammarReader.class.getName());
// dummy constructor for
public GrammarReader() {
this.symbolTable = null;
this.fileName = null;
}
public GrammarReader(String fileName, SymbolTable symbolTable) {
this.fileName = fileName;
this.symbolTable = symbolTable;
}
public void initialize() {
try {
this.reader = new LineReader(fileName);
} catch (IOException e) {
throw new RuntimeException(
"Error opening translation model file: " + fileName
+ (null != e.getMessage()
? e.getMessage()
: "No details available. Sorry."), e);
}
advanceReader();
}
// the reader is the iterator itself
public Iterator<R> iterator() {
return this;
}
/** Unsupported Iterator method. */
public void remove() throws UnsupportedOperationException {
throw new UnsupportedOperationException();
}
public void close() {
if (null != this.reader) {
try {
this.reader.close();
} catch (IOException e) {
// FIXME: is this the right logging level?
if (logger.isLoggable(Level.WARNING))
logger.info("Error closing grammar file stream: "
+ this.fileName);
}
this.reader = null;
}
}
/**
* For correct behavior <code>close</code> must be called
* on every GrammarReader, however this code attempts to
* avoid resource leaks.
*
* @see joshua.util.io.LineReader
*/
protected void finalize() throws Throwable {
logger.severe("Grammar file stream was not closed, this indicates a coding error: " + this.fileName);
this.close();
super.finalize();
}
public boolean hasNext() {
return lookAhead != null;
}
private void advanceReader() {
try {
lookAhead = reader.readLine();
} catch (IOException e) {
logger.severe("Error reading grammar from file: " + fileName);
}
if (lookAhead == null && reader != null) {
this.close();
}
}
public R next() {
String line = lookAhead;
advanceReader();
return parseLine(line);
}
protected abstract R parseLine(String line);
// TODO: keep these around or not?
public abstract String toWords(R rule);
public abstract String toWordsWithoutFeatureScores(R rule);
public abstract String toTokenIds(R rule);
public abstract String toTokenIdsWithoutFeatureScores(R rule);
public int cleanNonTerminal(int tokenID) {
// cleans NT of any markup, e.g., [X,1] may becomes [X], depending
return symbolTable.addNonterminal(
cleanNonTerminal(symbolTable.getWord(tokenID)));
}
public String cleanNonTerminal(String word) {
// cleans NT of any markup, e.g., [X,1] may becomes [X], depending on nonTerminalCleanRegEx
return word.replaceAll(nonTerminalCleanRegEx, "");
}
public static boolean isNonTerminal(final String word) {
// checks if word matches NT regex
return word.matches(nonTerminalRegEx);
}
public String getNonTerminalRegEx() {
return nonTerminalRegEx;
}
public String getNonTerminalCleanRegEx() {
return nonTerminalCleanRegEx;
}
}