Package com.hadoop.compression.lzo

Source Code of com.hadoop.compression.lzo.LzopOutputStream$CountingOutputStream

/*
* This file is part of Hadoop-Gpl-Compression.
*
* Hadoop-Gpl-Compression is free software: you can redistribute it
* and/or modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* Hadoop-Gpl-Compression is distributed in the hope that it will be
* useful, but WITHOUT ANY WARRANTY; without even the implied warranty
* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Hadoop-Gpl-Compression.  If not, see
* <http://www.gnu.org/licenses/>.
*/

package com.hadoop.compression.lzo;

import java.io.DataOutputStream;
import java.io.FilterOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.zip.Adler32;

import org.apache.hadoop.io.DataOutputBuffer;
import org.apache.hadoop.io.compress.CodecPool;
import org.apache.hadoop.io.compress.CompressorStream;
import org.apache.hadoop.io.compress.Compressor;

public class LzopOutputStream extends CompressorStream {

  final int MAX_INPUT_SIZE;
  protected DataOutputStream indexOut;
  private CountingOutputStream cout;

  /**
   * Write an lzop-compatible header to the OutputStream provided.
   * @param out OutputStream
   * @param strategy The lzo compression algorithm
   * @throws IOException if lzop strategy is incompatible
   */
  protected static void writeLzopHeader(OutputStream out,
          LzoCompressor.CompressionStrategy strategy) throws IOException {
    DataOutputBuffer dob = new DataOutputBuffer();
    try {
      dob.writeShort(LzopCodec.LZOP_VERSION);
      dob.writeShort(LzoCompressor.LZO_LIBRARY_VERSION);
      dob.writeShort(LzopCodec.LZOP_COMPAT_VERSION);
      switch (strategy) {
      case LZO1X_1:
        dob.writeByte(1);
        dob.writeByte(5);
        break;
      case LZO1X_15:
        dob.writeByte(2);
        dob.writeByte(1);
        break;
      case LZO1X_999:
        dob.writeByte(3);
        dob.writeByte(9);
        break;
      default:
        throw new IOException("Incompatible lzop strategy: " + strategy);
      }
      dob.writeInt(0);                                    // all flags 0
      dob.writeInt(0x81A4);                               // mode
      dob.writeInt((int)(System.currentTimeMillis() / 1000)); // mtime
      dob.writeInt(0);                                    // gmtdiff ignored
      dob.writeByte(0);                                   // no filename
      Adler32 headerChecksum = new Adler32();
      headerChecksum.update(dob.getData(), 0, dob.getLength());
      int hc = (int)headerChecksum.getValue();
      dob.writeInt(hc);
      out.write(LzopCodec.LZO_MAGIC);
      out.write(dob.getData(), 0, dob.getLength());
    } finally {
      dob.close();
    }
  }

  public LzopOutputStream(OutputStream out, Compressor compressor,
          int bufferSize, LzoCompressor.CompressionStrategy strategy)
  throws IOException {
    this(out, null, compressor, bufferSize, strategy);
  }

  public LzopOutputStream(OutputStream out, DataOutputStream indexOut,
      Compressor compressor, int bufferSize,
      LzoCompressor.CompressionStrategy strategy)
      throws IOException {
    super(new CountingOutputStream(out), compressor, bufferSize);

    this.cout = (CountingOutputStream) this.out;
    this.indexOut = indexOut;
    int overhead = strategy.name().contains("LZO1") ?
      (bufferSize >> 4) + 64 + 3 : (bufferSize >> 3) + 128 + 3;
    MAX_INPUT_SIZE = bufferSize - overhead;

    writeLzopHeader(this.out, strategy);
  }

  /**
   * Close the underlying stream and write a null word to the output stream.
   */
  @Override
  public void close() throws IOException {
    if (!closed) {
      finish();
      out.write(new byte[]{ 0, 0, 0, 0 });
      out.close();
      if (indexOut != null) {
        indexOut.close();
      }
      closed = true;
      //return the compressor to the pool for later reuse;
      //the returnCompressor handles nulls.
      CodecPool.returnCompressor(compressor);
    }
  }

  @Override
  public void write(byte[] b, int off, int len) throws IOException {
    // TODO: LzopOutputStream used to inherit from BlockCompressorStream
    // but had a bug due to this inheritance chain. In order to fix the
    // bug we pulled down the implementation of the superclass, which
    // is overly general. Thus this function is not quite as succint
    // as it could be, now that it's LZOP-specific.
    // See: http://github.com/toddlipcon/hadoop-lzo/commit/5fe6dd4736a73fa33b86656ce8aeb011e7f2046c

    // Sanity checks
    if (compressor.finished()) {
      throw new IOException("write beyond end of stream");
    }
    if (b == null) {
      throw new NullPointerException();
    } else if ((off < 0) || (off > b.length) || (len < 0) ||
               ((off + len) > b.length)) {
      throw new IndexOutOfBoundsException();
    } else if (len == 0) {
      return;
    }

    long limlen = compressor.getBytesRead();
    if (len + limlen > MAX_INPUT_SIZE && limlen > 0) {
      // Adding this segment would exceed the maximum size.
      // Flush data if we have it.
      finish();
      compressor.reset();
    }

    if (len > MAX_INPUT_SIZE) {
      // The data we're given exceeds the maximum size. Any data
      // we had have been flushed, so we write out this chunk in segments
      // not exceeding the maximum size until it is exhausted.
      do {
        int bufLen = Math.min(len, MAX_INPUT_SIZE);

        compressor.setInput(b, off, bufLen);
        finish();
        compressor.reset();
        off += bufLen;
        len -= bufLen;
      } while (len > 0);
      return;
    }

    // Give data to the compressor
    compressor.setInput(b, off, len);
    if (!compressor.needsInput()) {
      // compressor buffer size might be smaller than the maximum
      // size, so we permit it to flush if required.
      do {
        compress();
      } while (!compressor.needsInput());
    }
  }

  @Override
  public void finish() throws IOException {
    if (!compressor.finished()) {
      compressor.finish();
      while (!compressor.finished()) {
        compress();
      }
    }
  }
  @Override
  protected void compress() throws IOException {
    int len = compressor.compress(buffer, 0, buffer.length);
    if (len > 0) {
      // new lzo block. write current position to index file.
      if (indexOut != null) {
        indexOut.writeLong(cout.bytesWritten);
      }

      rawWriteInt((int)compressor.getBytesRead());

      // If the compressed buffer is actually larger than the uncompressed buffer,
      // the LZO specification says that we should write the uncompressed bytes rather
      // than the compressed bytes.  The decompressor understands this because both sizes
      // get written to the stream.
      if (compressor.getBytesRead() <= compressor.getBytesWritten()) {
        // Compression actually increased the size of the buffer, so write the uncompressed bytes.
        byte[] uncompressed = ((LzoCompressor)compressor).uncompressedBytes();
        rawWriteInt(uncompressed.length);
        out.write(uncompressed, 0, uncompressed.length);
      } else {
        // Write out the compressed chunk.
        rawWriteInt(len);
        out.write(buffer, 0, len);
      }
    }
  }

  private void rawWriteInt(int v) throws IOException {
    out.write((v >>> 24) & 0xFF);
    out.write((v >>> 16) & 0xFF);
    out.write((v >>>  8) & 0xFF);
    out.write((v >>>  0) & 0xFF);
  }

  /* keeps count of number of bytes written. */
  private static class CountingOutputStream extends FilterOutputStream {
    public CountingOutputStream(OutputStream out) {
      super(out);
    }

    long bytesWritten = 0;

    public void write(byte[] b, int off, int len) throws IOException {
      out.write(b, off, len);
      bytesWritten += len;
    }

    public void write(int b) throws IOException {
      out.write(b);
      bytesWritten++;
    }
  }
}
TOP

Related Classes of com.hadoop.compression.lzo.LzopOutputStream$CountingOutputStream

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.