Package org.apache.hadoop.io.compress

Source Code of org.apache.hadoop.io.compress.BZip2Codec$BZip2CompressionInputStream

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.io.compress;

import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;

import org.apache.hadoop.io.compress.bzip2.BZip2DummyCompressor;
import org.apache.hadoop.io.compress.bzip2.BZip2DummyDecompressor;
import org.apache.hadoop.io.compress.bzip2.CBZip2InputStream;
import org.apache.hadoop.io.compress.bzip2.CBZip2OutputStream;

/**
* This class provides CompressionOutputStream and CompressionInputStream for
* compression and decompression. Currently we dont have an implementation of
* the Compressor and Decompressor interfaces, so those methods of
* CompressionCodec which have a Compressor or Decompressor type argument, throw
* UnsupportedOperationException.
*/
public class BZip2Codec implements
    org.apache.hadoop.io.compress.CompressionCodec {

  private static final String HEADER = "BZ";
  private static final int HEADER_LEN = HEADER.length();

  /**
  * Creates a new instance of BZip2Codec
  */
  public BZip2Codec() {
  }

  /**
  * Creates CompressionOutputStream for BZip2
  *
  * @param out
  *            The output Stream
  * @return The BZip2 CompressionOutputStream
  * @throws java.io.IOException
  *             Throws IO exception
  */
  public CompressionOutputStream createOutputStream(OutputStream out)
      throws IOException {
    return new BZip2CompressionOutputStream(out);
  }

  /**
   * This functionality is currently not supported.
   *
   * @throws java.lang.UnsupportedOperationException
   *             Throws UnsupportedOperationException
   */
  public CompressionOutputStream createOutputStream(OutputStream out,
      Compressor compressor) throws IOException {
    return createOutputStream(out);
  }

  /**
  * This functionality is currently not supported.
  *
  * @throws java.lang.UnsupportedOperationException
  *             Throws UnsupportedOperationException
  */
  public Class<? extends org.apache.hadoop.io.compress.Compressor> getCompressorType() {
    return BZip2DummyCompressor.class;
  }

  /**
  * This functionality is currently not supported.
  *
  * @throws java.lang.UnsupportedOperationException
  *             Throws UnsupportedOperationException
  */
  public Compressor createCompressor() {
    return new BZip2DummyCompressor();
  }

  /**
  * Creates CompressionInputStream to be used to read off uncompressed data.
  *
  * @param in
  *            The InputStream
  * @return Returns CompressionInputStream for BZip2
  * @throws java.io.IOException
  *             Throws IOException
  */
  public CompressionInputStream createInputStream(InputStream in)
      throws IOException {
    return new BZip2CompressionInputStream(in);
  }

  /**
  * This functionality is currently not supported.
  *
  * @throws java.lang.UnsupportedOperationException
  *             Throws UnsupportedOperationException
  */
  public CompressionInputStream createInputStream(InputStream in,
      Decompressor decompressor) throws IOException {
    return createInputStream(in);
  }

  /**
  * This functionality is currently not supported.
  *
  * @throws java.lang.UnsupportedOperationException
  *             Throws UnsupportedOperationException
  */
  public Class<? extends org.apache.hadoop.io.compress.Decompressor> getDecompressorType() {
    return BZip2DummyDecompressor.class;
  }

  /**
  * This functionality is currently not supported.
  *
  * @throws java.lang.UnsupportedOperationException
  *             Throws UnsupportedOperationException
  */
  public Decompressor createDecompressor() {
    return new BZip2DummyDecompressor();
  }

  /**
  * .bz2 is recognized as the default extension for compressed BZip2 files
  *
  * @return A String telling the default bzip2 file extension
  */
  public String getDefaultExtension() {
    return ".bz2";
  }

  private static class BZip2CompressionOutputStream extends CompressionOutputStream {

    // class data starts here//
    private CBZip2OutputStream output;
    private boolean needsReset;
    // class data ends here//

    public BZip2CompressionOutputStream(OutputStream out)
        throws IOException {
      super(out);
      needsReset = true;
    }

    private void writeStreamHeader() throws IOException {
      if (super.out != null) {
        // The compressed bzip2 stream should start with the
        // identifying characters BZ. Caller of CBZip2OutputStream
        // i.e. this class must write these characters.
        out.write(HEADER.getBytes());
      }
    }

    public void finish() throws IOException {
      this.output.finish();
      needsReset = true;
    }

    private void internalReset() throws IOException {
      if (needsReset) {
        needsReset = false;
        writeStreamHeader();
        this.output = new CBZip2OutputStream(out);
      }
    }   
   
    public void resetState() throws IOException {
      // Cannot write to out at this point because out might not be ready
      // yet, as in SequenceFile.Writer implementation.
      needsReset = true;
    }

    public void write(int b) throws IOException {
      if (needsReset) {
        internalReset();
      }
      this.output.write(b);
    }

    public void write(byte[] b, int off, int len) throws IOException {
      if (needsReset) {
        internalReset();
      }
      this.output.write(b, off, len);
    }

    public void close() throws IOException {
      this.output.flush();
      this.output.close();
      needsReset = true;
    }

  }// end of class BZip2CompressionOutputStream

  private static class BZip2CompressionInputStream extends CompressionInputStream {

    // class data starts here//
    private CBZip2InputStream input;
    boolean needsReset;
    // class data ends here//

    public BZip2CompressionInputStream(InputStream in) throws IOException {

      super(in);
      needsReset = true;
    }

    private BufferedInputStream readStreamHeader() throws IOException {
      // We are flexible enough to allow the compressed stream not to
      // start with the header of BZ. So it works fine either we have
      // the header or not.
      BufferedInputStream bufferedIn = null;
      if (super.in != null) {
        bufferedIn = new BufferedInputStream(super.in);
        bufferedIn.mark(HEADER_LEN);
        byte[] headerBytes = new byte[HEADER_LEN];
        int actualRead = bufferedIn.read(headerBytes, 0, HEADER_LEN);
        if (actualRead != -1) {
          String header = new String(headerBytes);
          if (header.compareTo(HEADER) != 0) {
            bufferedIn.reset();
          }
        }
      }

      if (bufferedIn == null) {
        throw new IOException("Failed to read bzip2 stream.");
      }

      return bufferedIn;

    }// end of method

    public void close() throws IOException {
      if (!needsReset) {
        input.close();
        needsReset = true;
      }
    }

    public int read(byte[] b, int off, int len) throws IOException {
      if (needsReset) {
        internalReset();
      }
      return this.input.read(b, off, len);

    }

    private void internalReset() throws IOException {
      if (needsReset) {
        needsReset = false;
        BufferedInputStream bufferedIn = readStreamHeader();
        input = new CBZip2InputStream(bufferedIn);
      }
    }   
   
    public void resetState() throws IOException {
      // Cannot read from bufferedIn at this point because bufferedIn might not be ready
      // yet, as in SequenceFile.Reader implementation.
      needsReset = true;
    }

    public int read() throws IOException {
      if (needsReset) {
        internalReset();
      }
      return this.input.read();
    }

  }// end of BZip2CompressionInputStream

}
TOP

Related Classes of org.apache.hadoop.io.compress.BZip2Codec$BZip2CompressionInputStream

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.