Package org.apache.hadoop.hbase.io.hfile

Source Code of org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoderImpl

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with this
* work for additional information regarding copyright ownership. The ASF
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
package org.apache.hadoop.hbase.io.hfile;

import java.io.ByteArrayOutputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;

import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
import org.apache.hadoop.hbase.io.hfile.HFileBlock;
import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Pair;

import com.google.common.base.Preconditions;

/**
* Do different kinds of data block encoding according to column family
* options.
*/
public class HFileDataBlockEncoderImpl implements HFileDataBlockEncoder {
  private final DataBlockEncoding onDisk;
  private final DataBlockEncoding inCache;

  public HFileDataBlockEncoderImpl(DataBlockEncoding encoding) {
    this(encoding, encoding);
  }

  /**
   * Do data block encoding with specified options.
   * @param onDisk What kind of data block encoding will be used before writing
   *          HFileBlock to disk. This must be either the same as inCache or
   *          {@link DataBlockEncoding#NONE}.
   * @param inCache What kind of data block encoding will be used in block
   *          cache.
   */
  public HFileDataBlockEncoderImpl(DataBlockEncoding onDisk,
      DataBlockEncoding inCache) {
    this.onDisk = onDisk != null ?
        onDisk : DataBlockEncoding.NONE;
    this.inCache = inCache != null ?
        inCache : DataBlockEncoding.NONE;
    Preconditions.checkArgument(onDisk == DataBlockEncoding.NONE ||
        onDisk == inCache, "on-disk encoding (" + onDisk + ") must be " +
        "either the same as in-cache encoding (" + inCache + ") or " +
        DataBlockEncoding.NONE);
  }

  public static HFileDataBlockEncoder createFromFileInfo(
      FileInfo fileInfo, DataBlockEncoding preferredEncodingInCache)
      throws IOException {
   
    boolean hasPreferredCacheEncoding = preferredEncodingInCache != null
        && preferredEncodingInCache != DataBlockEncoding.NONE;

    byte[] dataBlockEncodingType = fileInfo.get(DATA_BLOCK_ENCODING);
    if (dataBlockEncodingType == null && !hasPreferredCacheEncoding) {
      return NoOpDataBlockEncoder.INSTANCE;
    }

    DataBlockEncoding onDisk;
    if (dataBlockEncodingType == null) {
      onDisk = DataBlockEncoding.NONE;
    }else {
      String dataBlockEncodingStr = Bytes.toString(dataBlockEncodingType);
      try {
        onDisk = DataBlockEncoding.valueOf(dataBlockEncodingStr);
      } catch (IllegalArgumentException ex) {
        throw new IOException("Invalid data block encoding type in file info: "
            + dataBlockEncodingStr, ex);
      }
    }

    DataBlockEncoding inCache;
    if (onDisk == DataBlockEncoding.NONE) {
      // This is an "in-cache-only" encoding or fully-unencoded scenario.
      // Either way, we use the given encoding (possibly NONE) specified by
      // the column family in cache.
      inCache = preferredEncodingInCache;
    } else {
      // Leave blocks in cache encoded the same way as they are on disk.
      // If we switch encoding type for the CF or the in-cache-only encoding
      // flag, old files will keep their encoding both on disk and in cache,
      // but new files will be generated with the new encoding.
      inCache = onDisk;
    }
    return new HFileDataBlockEncoderImpl(onDisk, inCache);
  }

  @Override
  public void saveMetadata(HFile.Writer writer) throws IOException {
    writer.appendFileInfo(DATA_BLOCK_ENCODING, onDisk.getNameInBytes());
  }

  @Override
  public DataBlockEncoding getEncodingOnDisk() {
    return onDisk;
  }

  @Override
  public DataBlockEncoding getEncodingInCache() {
    return inCache;
  }

  @Override
  public DataBlockEncoding getEffectiveEncodingInCache(boolean isCompaction) {
    if (!useEncodedScanner(isCompaction)) {
      return DataBlockEncoding.NONE;
    }
    return inCache;
  }

  @Override
  public HFileBlock diskToCacheFormat(HFileBlock block, boolean isCompaction) {
    if (block.getBlockType() == BlockType.DATA) {
      if (!useEncodedScanner(isCompaction)) {
        // Unencoded block, and we don't want to encode in cache.
        return block;
      }
      // Encode the unencoded block with the in-cache encoding.
      return encodeDataBlock(block, inCache, block.doesIncludeMemstoreTS());
    }

    if (block.getBlockType() == BlockType.ENCODED_DATA) {
      if (block.getDataBlockEncodingId() == onDisk.getId()) {
        // The block is already in the desired in-cache encoding.
        return block;
      }
      // We don't want to re-encode a block in a different encoding. The HFile
      // reader should have been instantiated in such a way that we would not
      // have to do this.
      throw new AssertionError("Expected on-disk data block encoding " +
          onDisk + ", got " + block.getDataBlockEncoding());
    }
    return block;
  }

  /**
   * Precondition: a non-encoded buffer.
   * Postcondition: on-disk encoding.
   */
  @Override
  public Pair<ByteBuffer, BlockType> beforeWriteToDisk(ByteBuffer in,
      boolean includesMemstoreTS, byte[] dummyHeader) {
    if (onDisk == DataBlockEncoding.NONE) {
      // there is no need to encode the block before writing it to disk
      return new Pair<ByteBuffer, BlockType>(in, BlockType.DATA);
    }

    ByteBuffer encodedBuffer = encodeBufferToHFileBlockBuffer(in,
        onDisk, includesMemstoreTS, dummyHeader);
    return new Pair<ByteBuffer, BlockType>(encodedBuffer,
        BlockType.ENCODED_DATA);
  }

  @Override
  public boolean useEncodedScanner(boolean isCompaction) {
    if (isCompaction && onDisk == DataBlockEncoding.NONE) {
      return false;
    }
    return inCache != DataBlockEncoding.NONE;
  }

  private ByteBuffer encodeBufferToHFileBlockBuffer(ByteBuffer in,
      DataBlockEncoding algo, boolean includesMemstoreTS,
      byte[] dummyHeader) {
    ByteArrayOutputStream encodedStream = new ByteArrayOutputStream();
    DataOutputStream dataOut = new DataOutputStream(encodedStream);
    DataBlockEncoder encoder = algo.getEncoder();
    try {
      encodedStream.write(dummyHeader);
      algo.writeIdInBytes(dataOut);
      encoder.compressKeyValues(dataOut, in,
          includesMemstoreTS);
    } catch (IOException e) {
      throw new RuntimeException(String.format("Bug in data block encoder " +
          "'%s', it probably requested too much data", algo.toString()), e);
    }
    return ByteBuffer.wrap(encodedStream.toByteArray());
  }

  private HFileBlock encodeDataBlock(HFileBlock block,
      DataBlockEncoding algo, boolean includesMemstoreTS) {
    ByteBuffer compressedBuffer = encodeBufferToHFileBlockBuffer(
        block.getBufferWithoutHeader(), algo, includesMemstoreTS,
        block.getDummyHeaderForVersion());
    int sizeWithoutHeader = compressedBuffer.limit() - block.headerSize();
    HFileBlock encodedBlock = new HFileBlock(BlockType.ENCODED_DATA,
        block.getOnDiskSizeWithoutHeader(),
        sizeWithoutHeader, block.getPrevBlockOffset(),
        compressedBuffer, HFileBlock.FILL_HEADER, block.getOffset(),
        includesMemstoreTS, block.getMinorVersion(),
        block.getBytesPerChecksum(), block.getChecksumType(),
        block.getOnDiskDataSizeWithHeader());
    block.passSchemaMetricsTo(encodedBlock);
    return encodedBlock;
  }

  @Override
  public String toString() {
    return getClass().getSimpleName() + "(onDisk=" + onDisk + ", inCache=" +
        inCache + ")";
  }

}
TOP

Related Classes of org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoderImpl

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.