/*
* This file is part of lzo-java, an implementation of LZO in Java.
* https://github.com/Karmasphere/lzo-java
*
* The Java portion of this library is:
* Copyright (C) 2011 Shevek <shevek@anarres.org>
* All Rights Reserved.
*
* This file is based on a file from hadoop-gpl-compression.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty
* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with the LZO library; see the file COPYING.
* If not, see <http://www.gnu.org/licenses/> or write to the
* Free Software Foundation, Inc., 51 Franklin Street, Fifth
* Floor, Boston, MA 02110-1301, USA.
*/
package org.anarres.lzo.hadoop.codec;
import java.io.IOException;
import java.io.OutputStream;
import java.io.InputStream;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.io.compress.BlockCompressorStream;
import org.apache.hadoop.io.compress.BlockDecompressorStream;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionInputStream;
import org.apache.hadoop.io.compress.CompressionOutputStream;
import org.apache.hadoop.io.compress.Compressor;
import org.apache.hadoop.io.compress.Decompressor;
/**
* A {@link org.apache.hadoop.io.compress.CompressionCodec} for a streaming
* <b>lzo</b> compression/decompression pair.
* http://www.oberhumer.com/opensource/lzo/
*
*/
public class LzoCodec extends Configured implements CompressionCodec {
private static final Log LOG = LogFactory.getLog(LzoCodec.class.getName());
public static final String LZO_COMPRESSOR_KEY = "io.compression.codec.lzo.compressor";
public static final String LZO_DECOMPRESSOR_KEY = "io.compression.codec.lzo.decompressor";
public static final String LZO_COMPRESSION_LEVEL_KEY = "io.compression.codec.lzo.compression.level";
public static final String LZO_BUFFER_SIZE_KEY = "io.compression.codec.lzo.buffersize";
public static final int DEFAULT_LZO_BUFFER_SIZE = 256 * 1024;
public static final int MAX_BLOCK_SIZE = 64 * 1024 * 1024;
public static final int UNDEFINED_COMPRESSION_LEVEL = -999; // Constant from LzoCompressor.c
static LzoCompressor.CompressionStrategy getCompressionStrategy(Configuration conf) {
assert conf != null : "Configuration cannot be null!";
return LzoCompressor.CompressionStrategy.valueOf(
conf.get(LZO_COMPRESSOR_KEY,
LzoCompressor.CompressionStrategy.LZO1X_1.name()));
}
static LzoDecompressor.CompressionStrategy getDecompressionStrategy(Configuration conf) {
assert conf != null : "Configuration cannot be null!";
return LzoDecompressor.CompressionStrategy.valueOf(
conf.get(LZO_DECOMPRESSOR_KEY,
LzoDecompressor.CompressionStrategy.LZO1X.name()));
}
static int getCompressionLevel(Configuration conf) {
assert conf != null : "Configuration cannot be null!";
return conf.getInt(LZO_COMPRESSION_LEVEL_KEY, UNDEFINED_COMPRESSION_LEVEL);
}
static int getBufferSize(Configuration conf) {
assert conf != null : "Configuration cannot be null!";
return conf.getInt(LZO_BUFFER_SIZE_KEY, DEFAULT_LZO_BUFFER_SIZE);
}
public static void setCompressionStrategy(Configuration conf,
LzoCompressor.CompressionStrategy strategy) {
assert conf != null : "Configuration cannot be null!";
conf.set(LZO_COMPRESSOR_KEY, strategy.name());
}
public static void setDecompressionStrategy(Configuration conf,
LzoDecompressor.CompressionStrategy strategy) {
assert conf != null : "Configuration cannot be null!";
conf.set(LZO_DECOMPRESSOR_KEY, strategy.name());
}
public static void setCompressionLevel(Configuration conf, int compressionLevel) {
assert conf != null : "Configuration cannot be null!";
conf.setInt(LZO_COMPRESSION_LEVEL_KEY, compressionLevel);
}
public static void setBufferSize(Configuration conf, int bufferSize) {
assert conf != null : "Configuration cannot be null!";
conf.setInt(LZO_BUFFER_SIZE_KEY, bufferSize);
}
@Override
public CompressionOutputStream createOutputStream(OutputStream out)
throws IOException {
return createOutputStream(out, createCompressor());
}
@Override
public CompressionOutputStream createOutputStream(OutputStream out,
Compressor compressor) throws IOException {
/**
* <b>http://www.oberhumer.com/opensource/lzo/lzofaq.php</b>
*
* How much can my data expand during compression ?
* ================================================
* LZO will expand incompressible data by a little amount.
* I still haven't computed the exact values, but I suggest using
* these formulas for a worst-case expansion calculation:
*
* Algorithm LZO1, LZO1A, LZO1B, LZO1C, LZO1F, LZO1X, LZO1Y, LZO1Z:
* ----------------------------------------------------------------
* output_block_size = input_block_size + (input_block_size / 16) + 64 + 3
*
* This is about 106% for a large block size.
*
* Algorithm LZO2A:
* ----------------
* output_block_size = input_block_size + (input_block_size / 8) + 128 + 3
*/
// Create the lzo output-stream
Configuration conf = getConf();
LzoCompressor.CompressionStrategy strategy = getCompressionStrategy(conf);
int bufferSize = getBufferSize(conf);
int compressionOverhead = strategy.name().contains("LZO1")
? (bufferSize >> 4) + 64 + 3
: (bufferSize >> 3) + 128 + 3;
return new BlockCompressorStream(out, compressor, bufferSize,
compressionOverhead);
}
@Override
public Class<? extends Compressor> getCompressorType() {
return LzoCompressor.class;
}
@Override
public Compressor createCompressor() {
Configuration conf = getConf();
LzoCompressor.CompressionStrategy strategy = getCompressionStrategy(conf);
int bufferSize = getBufferSize(conf);
return new LzoCompressor(strategy, bufferSize);
}
@Override
public CompressionInputStream createInputStream(InputStream in)
throws IOException {
return createInputStream(in, createDecompressor());
}
@Override
public CompressionInputStream createInputStream(InputStream in,
Decompressor decompressor)
throws IOException {
Configuration conf = getConf();
return new BlockDecompressorStream(in, decompressor, getBufferSize(conf));
}
@Override
public Class<? extends Decompressor> getDecompressorType() {
return LzoDecompressor.class;
}
@Override
public Decompressor createDecompressor() {
Configuration conf = getConf();
LzoDecompressor.CompressionStrategy strategy = getDecompressionStrategy(conf);
int bufferSize = getBufferSize(conf);
return new LzoDecompressor(strategy, bufferSize);
}
/**
* Get the default filename extension for this kind of compression.
* @return the extension including the '.'
*/
@Override
public String getDefaultExtension() {
return ".lzo_deflate";
}
}