Package joshua.decoder.ff.tm

Source Code of joshua.decoder.ff.tm.GrammarReader

package joshua.decoder.ff.tm;

import java.io.IOException;
import java.util.Iterator;
import java.util.logging.Level;
import java.util.logging.Logger;

import joshua.corpus.vocab.SymbolTable;
import joshua.util.io.LineReader;

/**
* This is a base class for simple, ASCII line-based grammars that
* are stored on disk.
*
* @author Juri Ganitkevitch
*
*/
public abstract class GrammarReader<R extends Rule>
implements Iterable<R>, Iterator<R> {

  protected static String fieldDelimiter;
  protected static String nonTerminalRegEx;
  protected static String nonTerminalCleanRegEx;

  protected static String description;

  protected SymbolTable symbolTable;

  protected String fileName;
  protected LineReader reader;
  protected String lookAhead;
   
  private static final Logger logger = Logger
      .getLogger(GrammarReader.class.getName());

  // dummy constructor for
  public GrammarReader() {
    this.symbolTable = null;
    this.fileName = null;
  }
 
  public GrammarReader(String fileName, SymbolTable symbolTable) {
    this.fileName = fileName;
    this.symbolTable = symbolTable;
  }

  public void initialize() {
    try {
      this.reader = new LineReader(fileName);
    } catch (IOException e) {
      throw new RuntimeException(
        "Error opening translation model file: " + fileName
        + (null != e.getMessage()
          ? e.getMessage()
          : "No details available. Sorry."), e);
    }
   
    advanceReader();
  }

  // the reader is the iterator itself
  public Iterator<R> iterator() {
    return this;
  }

  /** Unsupported Iterator method. */
  public void remove() throws UnsupportedOperationException {
    throw new UnsupportedOperationException();
  }
 
 
  public void close() {
    if (null != this.reader) {
      try {
        this.reader.close();
      } catch (IOException e) {
        // FIXME: is this the right logging level?
        if (logger.isLoggable(Level.WARNING))
          logger.info("Error closing grammar file stream: "
              + this.fileName);
      }
      this.reader = null;
    }
  }
 
 
  /**
   * For correct behavior <code>close</code> must be called
   * on every GrammarReader, however this code attempts to
   * avoid resource leaks.
   *
   * @see joshua.util.io.LineReader
   */
  protected void finalize() throws Throwable {
    logger.severe("Grammar file stream was not closed, this indicates a coding error: " + this.fileName);
   
    this.close();
    super.finalize();
  }
 
 
  public boolean hasNext() {
    return lookAhead != null;
  }
 
 
  private void advanceReader() {
    try {
      lookAhead = reader.readLine();
    } catch (IOException e) {     
      logger.severe("Error reading grammar from file: " + fileName);
    }
    if (lookAhead == null && reader != null) {
      this.close();
    }
  }

  public R next() {
    String line = lookAhead;
    advanceReader();
    return parseLine(line);
  }

  protected abstract R parseLine(String line);
   
  // TODO: keep these around or not?
  public abstract String toWords(R rule);
  public abstract String toWordsWithoutFeatureScores(R rule);

  public abstract String toTokenIds(R rule);
  public abstract String toTokenIdsWithoutFeatureScores(R rule);
 
  public int cleanNonTerminal(int tokenID) {
    // cleans NT of any markup, e.g., [X,1] may becomes [X], depending
    return symbolTable.addNonterminal(
        cleanNonTerminal(symbolTable.getWord(tokenID)));
  }

  public String cleanNonTerminal(String word) {
    // cleans NT of any markup,  e.g., [X,1] may becomes [X], depending on nonTerminalCleanRegEx
    return word.replaceAll(nonTerminalCleanRegEx, "");
  }

  public static boolean isNonTerminal(final String word) {
    // checks if word matches NT regex
    return word.matches(nonTerminalRegEx);
  }

  public String getNonTerminalRegEx() {
    return nonTerminalRegEx;
  }

  public String getNonTerminalCleanRegEx() {
    return nonTerminalCleanRegEx;
  }

}
TOP

Related Classes of joshua.decoder.ff.tm.GrammarReader

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.