Package joshua.decoder.segment_file

Examples of joshua.decoder.segment_file.SegmentFileParser


  }
 
 
  // BUG: log file is not properly handled for parallel decoding
  void decodeTestFile() throws IOException {
    SegmentFileParser segmentParser;
   
    // BUG: As written, this will need duplicating in DecoderFactory
    // TODO: Fix JoshuaConfiguration so we can make this less gross.
    //
    // TODO: maybe using real reflection would be cleaner. If it weren't for the argument for HackishSegmentParser then we could do all this over in the JoshuaConfiguration class instead
    final String className = JoshuaConfiguration.segmentFileParserClass;
    if (null == className) {
      // Use old behavior by default
      segmentParser = new HackishSegmentParser(this.startSentenceID);
     
    } else if ("PlainSegmentParser".equals(className)) {
      segmentParser = new PlainSegmentParser();
     
    } else if ("HackishSegmentParser".equals(className)) {
      segmentParser = new HackishSegmentParser(this.startSentenceID);
     
    } else if ("SAXSegmentParser".equals(className)) {
      segmentParser = new SAXSegmentParser();
     
    } else {
      throw new IllegalArgumentException(
        "Unknown SegmentFileParser class: " + className);
    }
   
   
    // TODO: we need to run the segmentParser over the file once in order to catch any errors before we do the actual translation. Getting formatting errors asynchronously after a long time is a Bad Thing(tm). Some errors may be recoverable (e.g. by skipping the sentence that's invalid), but we're going to call all exceptions errors for now.
    //
    // TODO: we should unwrapper SAXExceptions and give good error messages
    segmentParser.parseSegmentFile(
      LineReader.getInputStream(this.testFile),
      new CoIterator<Segment>() {
        public void coNext(Segment seg) {
          // Consume Segment and do nothing (for now)
        }
        public void finish() {
          // Nothing to clean up
        }
      });
   
    // TODO: we should also have the CoIterator<Segment> test compatibility with a given grammar, e.g. count of grammatical feature functions match, nonterminals match,...
   
    // TODO: we may also want to validate that all segments have different ids
   
   
    //=== Translate the test file
    this.nbestWriter = FileUtility.getWriteFileStream(this.nbestFile);   
    try {
      try {
        //this method will analyze the input file (to generate segments), and then translate segments one by one
        segmentParser.parseSegmentFile(
          LineReader.getInputStream(this.testFile),
          new TranslateCoiterator(
            null == this.oracleFile
              ? new NullReader<String>()
              : new LineReader(this.oracleFile)
View Full Code Here

TOP

Related Classes of joshua.decoder.segment_file.SegmentFileParser

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.