Source Code of com.twitter.elephantbird.util.LzoUtils

package com.twitter.elephantbird.util;


import java.io.DataOutputStream;
import java.io.IOException;
import java.io.OutputStream;


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;


import com.hadoop.compression.lzo.LzoIndex;
import com.hadoop.compression.lzo.LzoIndexer;
import com.hadoop.compression.lzo.LzopCodec;


/**
 * Miscellaneous lzo related utilities.
 */
public class LzoUtils {


  public static final Logger LOG = LoggerFactory.getLogger(LzoUtils.class);


  /**
   * A work-around to support environments with older versions of LzopCodec.
   * It might not be feasible for to select right version of hadoop-lzo
   * in some cases. This should be removed latest by EB-3.0.
   */
  private static boolean isLzopIndexSupported = false;
  static {
    try {
      isLzopIndexSupported =
        null != LzopCodec.class.getMethod("createIndexedOutputStream",
                                          OutputStream.class,
                                          DataOutputStream.class);
    } catch (Exception e) {
      // older version of hadoop-lzo.
    }
  }


  /**
   * Creates an lzop output stream. The index for the lzop is
   * also written to another at the same time if
   * <code>elephantbird.lzo.output.index</code> is set in configuration. <p>
   *
   * If the file size at closing is not larger than a single block,
   * the index file is deleted (in line with {@link LzoIndexer} behavior).
   */
  public static DataOutputStream
  getIndexedLzoOutputStream(Configuration conf, Path path) throws IOException {


    LzopCodec codec = new LzopCodec();
    codec.setConf(conf);


    final Path file = path;
    final FileSystem fs = file.getFileSystem(conf);
    FSDataOutputStream fileOut = fs.create(file, false);


    FSDataOutputStream indexOut = null;
    if (conf.getBoolean("elephantbird.lzo.output.index", false)) {
      if ( isLzopIndexSupported ) {
        Path indexPath = file.suffix(LzoIndex.LZO_TMP_INDEX_SUFFIX);
        indexOut = fs.create(indexPath, false);
      } else {
        LOG.warn("elephantbird.lzo.output.index is enabled, but LzopCodec "
            + "does not have createIndexedOutputStream method. "
            + "Please upgrade hadoop-lzo.");
      }
    }


    final boolean isIndexed = indexOut != null;


    OutputStream out = ( isIndexed ?
        codec.createIndexedOutputStream(fileOut, indexOut) :
        codec.createOutputStream(fileOut) );


    return new DataOutputStream(out) {
      // override close() to handle renaming index file.


      public void close() throws IOException {
        super.close();


        if ( isIndexed ) {
          // rename or remove the index file based on file size.


          Path tmpPath = file.suffix(LzoIndex.LZO_TMP_INDEX_SUFFIX);
          FileStatus stat = fs.getFileStatus(file);
          if (stat.getLen() <= stat.getBlockSize()) {
            fs.delete(tmpPath, false);
          } else {
            fs.rename(tmpPath, file.suffix(LzoIndex.LZO_INDEX_SUFFIX));
          }
        }
      }
    };
  }


}
Source Code of com.twitter.elephantbird.util.LzoUtils

Related Classes of com.twitter.elephantbird.util.LzoUtils