Package org.apache.hadoop.hbase.regionserver

Source Code of org.apache.hadoop.hbase.regionserver.StoreFile$Comparators

/**
* Copyright 2010 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.regionserver;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValue.KVComparator;
import org.apache.hadoop.hbase.io.HalfStoreFileReader;
import org.apache.hadoop.hbase.io.Reference;
import org.apache.hadoop.hbase.io.hfile.BlockCache;
import org.apache.hadoop.hbase.io.hfile.Compression;
import org.apache.hadoop.hbase.io.hfile.HFile;
import org.apache.hadoop.hbase.io.hfile.HFileScanner;
import org.apache.hadoop.hbase.io.hfile.LruBlockCache;
import org.apache.hadoop.hbase.util.BloomFilter;
import org.apache.hadoop.hbase.util.ByteBloomFilter;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Hash;
import org.apache.hadoop.util.StringUtils;

import com.google.common.base.Function;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Ordering;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.lang.management.ManagementFactory;
import java.lang.management.MemoryUsage;
import java.nio.ByteBuffer;
import java.text.NumberFormat;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.SortedSet;
import java.util.Random;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
* A Store data file.  Stores usually have one or more of these files.  They
* are produced by flushing the memstore to disk.  To
* create, call {@link #createWriter(FileSystem, Path, int)} and append data.  Be
* sure to add any metadata before calling close on the Writer
* (Use the appendMetadata convenience methods). On close, a StoreFile is
* sitting in the Filesystem.  To refer to it, create a StoreFile instance
* passing filesystem and path.  To read, call {@link #createReader()}.
* <p>StoreFiles may also reference store files in another Store.
*/
public class StoreFile {
  static final Log LOG = LogFactory.getLog(StoreFile.class.getName());

  private static final String HFILE_CACHE_SIZE_KEY = "hfile.block.cache.size";

  private static BlockCache hfileBlockCache = null;

  // Make default block size for StoreFiles 8k while testing.  TODO: FIX!
  // Need to make it 8k for testing.
  public static final int DEFAULT_BLOCKSIZE_SMALL = 8 * 1024;

  private final FileSystem fs;
  // This file's path.
  private final Path path;
  // If this storefile references another, this is the reference instance.
  private Reference reference;
  // If this StoreFile references another, this is the other files path.
  private Path referencePath;
  // Should the block cache be used or not.
  private boolean blockcache;
  // Is this from an in-memory store
  private boolean inMemory;

  // Keys for metadata stored in backing HFile.
  /** Constant for the max sequence ID meta */
  public static final byte [] MAX_SEQ_ID_KEY = Bytes.toBytes("MAX_SEQ_ID_KEY");
  // Set when we obtain a Reader.
  private long sequenceid = -1;

  /** Constant for major compaction meta */
  public static final byte [] MAJOR_COMPACTION_KEY =
    Bytes.toBytes("MAJOR_COMPACTION_KEY");

  // If true, this file was product of a major compaction.  Its then set
  // whenever you get a Reader.
  private AtomicBoolean majorCompaction = null;

  /** Meta key set when store file is a result of a bulk load */
  public static final byte[] BULKLOAD_TASK_KEY =
    Bytes.toBytes("BULKLOAD_SOURCE_TASK");
  public static final byte[] BULKLOAD_TIME_KEY =
    Bytes.toBytes("BULKLOAD_TIMESTAMP");


  static final String BLOOM_FILTER_META_KEY = "BLOOM_FILTER_META";
  static final String BLOOM_FILTER_DATA_KEY = "BLOOM_FILTER_DATA";
  static final byte[] BLOOM_FILTER_TYPE_KEY =
    Bytes.toBytes("BLOOM_FILTER_TYPE");

  /**
   * Map of the metadata entries in the corresponding HFile
   */
  private Map<byte[], byte[]> metadataMap;

  /*
   * Regex that will work for straight filenames and for reference names.
   * If reference, then the regex has more than just one group.  Group 1 is
   * this files id.  Group 2 the referenced region name, etc.
   */
  private static final Pattern REF_NAME_PARSER =
    Pattern.compile("^(\\d+)(?:\\.(.+))?$");

  private volatile StoreFile.Reader reader;

  // Used making file ids.
  private final static Random rand = new Random();
  private final Configuration conf;
  private final BloomType bloomType;


  /**
   * Constructor, loads a reader and it's indices, etc. May allocate a
   * substantial amount of ram depending on the underlying files (10-20MB?).
   *
   * @param fs  The current file system to use.
   * @param p  The path of the file.
   * @param blockcache  <code>true</code> if the block cache is enabled.
   * @param conf  The current configuration.
   * @param bt The bloom type to use for this store file
   * @throws IOException When opening the reader fails.
   */
  StoreFile(final FileSystem fs, final Path p, final boolean blockcache,
      final Configuration conf, final BloomType bt, final boolean inMemory)
  throws IOException {
    this.conf = conf;
    this.fs = fs;
    this.path = p;
    this.blockcache = blockcache;
    this.inMemory = inMemory;
    if (isReference(p)) {
      this.reference = Reference.read(fs, p);
      this.referencePath = getReferredToFile(this.path);
    }
    // ignore if the column family config says "no bloom filter"
    // even if there is one in the hfile.
    if (conf.getBoolean("io.hfile.bloom.enabled", true)) {
      this.bloomType = bt;
    } else {
      this.bloomType = BloomType.NONE;
      LOG.info("Ignoring bloom filter check for file (disabled in config)");
    }
  }

  /**
   * @return Path or null if this StoreFile was made with a Stream.
   */
  Path getPath() {
    return this.path;
  }

  /**
   * @return The Store/ColumnFamily this file belongs to.
   */
  byte [] getFamily() {
    return Bytes.toBytes(this.path.getParent().getName());
  }

  /**
   * @return True if this is a StoreFile Reference; call after {@link #open()}
   * else may get wrong answer.
   */
  boolean isReference() {
    return this.reference != null;
  }

  /**
   * @param p Path to check.
   * @return True if the path has format of a HStoreFile reference.
   */
  public static boolean isReference(final Path p) {
    return !p.getName().startsWith("_") &&
      isReference(p, REF_NAME_PARSER.matcher(p.getName()));
  }

  /**
   * @param p Path to check.
   * @param m Matcher to use.
   * @return True if the path has format of a HStoreFile reference.
   */
  public static boolean isReference(final Path p, final Matcher m) {
    if (m == null || !m.matches()) {
      LOG.warn("Failed match of store file name " + p.toString());
      throw new RuntimeException("Failed match of store file name " +
          p.toString());
    }
    return m.groupCount() > 1 && m.group(2) != null;
  }

  /*
   * Return path to the file referred to by a Reference.  Presumes a directory
   * hierarchy of <code>${hbase.rootdir}/tablename/regionname/familyname</code>.
   * @param p Path to a Reference file.
   * @return Calculated path to parent region file.
   * @throws IOException
   */
  static Path getReferredToFile(final Path p) {
    Matcher m = REF_NAME_PARSER.matcher(p.getName());
    if (m == null || !m.matches()) {
      LOG.warn("Failed match of store file name " + p.toString());
      throw new RuntimeException("Failed match of store file name " +
          p.toString());
    }
    // Other region name is suffix on the passed Reference file name
    String otherRegion = m.group(2);
    // Tabledir is up two directories from where Reference was written.
    Path tableDir = p.getParent().getParent().getParent();
    String nameStrippedOfSuffix = m.group(1);
    // Build up new path with the referenced region in place of our current
    // region in the reference path.  Also strip regionname suffix from name.
    return new Path(new Path(new Path(tableDir, otherRegion),
      p.getParent().getName()), nameStrippedOfSuffix);
  }

  /**
   * @return True if this file was made by a major compaction.
   */
  boolean isMajorCompaction() {
    if (this.majorCompaction == null) {
      throw new NullPointerException("This has not been set yet");
    }
    return this.majorCompaction.get();
  }

  /**
   * @return This files maximum edit sequence id.
   */
  public long getMaxSequenceId() {
    if (this.sequenceid == -1) {
      throw new IllegalAccessError("Has not been initialized");
    }
    return this.sequenceid;
  }

  /**
   * Return the highest sequence ID found across all storefiles in
   * the given list. Store files that were created by a mapreduce
   * bulk load are ignored, as they do not correspond to any edit
   * log items.
   * @return 0 if no non-bulk-load files are provided or, this is Store that
   * does not yet have any store files.
   */
  public static long getMaxSequenceIdInList(List<StoreFile> sfs) {
    long max = 0;
    for (StoreFile sf : sfs) {
      if (!sf.isBulkLoadResult()) {
        max = Math.max(max, sf.getMaxSequenceId());
      }
    }
    return max;
  }

  /**
   * @return true if this storefile was created by HFileOutputFormat
   * for a bulk load.
   */
  boolean isBulkLoadResult() {
    return metadataMap.containsKey(BULKLOAD_TIME_KEY);
  }

  /**
   * Return the timestamp at which this bulk load file was generated.
   */
  public long getBulkLoadTimestamp() {
    return Bytes.toLong(metadataMap.get(BULKLOAD_TIME_KEY));
  }

  /**
   * Returns the block cache or <code>null</code> in case none should be used.
   *
   * @param conf  The current configuration.
   * @return The block cache or <code>null</code>.
   */
  public static synchronized BlockCache getBlockCache(Configuration conf) {
    if (hfileBlockCache != null) return hfileBlockCache;

    float cachePercentage = conf.getFloat(HFILE_CACHE_SIZE_KEY, 0.0f);
    // There should be a better way to optimize this. But oh well.
    if (cachePercentage == 0L) return null;
    if (cachePercentage > 1.0) {
      throw new IllegalArgumentException(HFILE_CACHE_SIZE_KEY +
        " must be between 0.0 and 1.0, not > 1.0");
    }

    // Calculate the amount of heap to give the heap.
    MemoryUsage mu = ManagementFactory.getMemoryMXBean().getHeapMemoryUsage();
    long cacheSize = (long)(mu.getMax() * cachePercentage);
    LOG.info("Allocating LruBlockCache with maximum size " +
      StringUtils.humanReadableInt(cacheSize));
    hfileBlockCache = new LruBlockCache(cacheSize, DEFAULT_BLOCKSIZE_SMALL);
    return hfileBlockCache;
  }

  /**
   * @return the blockcache
   */
  public BlockCache getBlockCache() {
    return blockcache ? getBlockCache(conf) : null;
  }

  /**
   * Opens reader on this store file.  Called by Constructor.
   * @return Reader for the store file.
   * @throws IOException
   * @see #closeReader()
   */
  private StoreFile.Reader open()
  throws IOException {
    if (this.reader != null) {
      throw new IllegalAccessError("Already open");
    }
    if (isReference()) {
      this.reader = new HalfStoreFileReader(this.fs, this.referencePath,
          getBlockCache(), this.reference);
    } else {
      this.reader = new StoreFile.Reader(this.fs, this.path, getBlockCache(),
          this.inMemory);
    }
    // Load up indices and fileinfo.
    metadataMap = Collections.unmodifiableMap(this.reader.loadFileInfo());
    // Read in our metadata.
    byte [] b = metadataMap.get(MAX_SEQ_ID_KEY);
    if (b != null) {
      // By convention, if halfhfile, top half has a sequence number > bottom
      // half. Thats why we add one in below. Its done for case the two halves
      // are ever merged back together --rare.  Without it, on open of store,
      // since store files are distingushed by sequence id, the one half would
      // subsume the other.
      this.sequenceid = Bytes.toLong(b);
      if (isReference()) {
        if (Reference.isTopFileRegion(this.reference.getFileRegion())) {
          this.sequenceid += 1;
        }
      }

    }
    b = metadataMap.get(MAJOR_COMPACTION_KEY);
    if (b != null) {
      boolean mc = Bytes.toBoolean(b);
      if (this.majorCompaction == null) {
        this.majorCompaction = new AtomicBoolean(mc);
      } else {
        this.majorCompaction.set(mc);
      }
    }

    if (this.bloomType != BloomType.NONE) {
      this.reader.loadBloomfilter();
    }

    return this.reader;
  }

  /**
   * @return Reader for StoreFile. creates if necessary
   * @throws IOException
   */
  public StoreFile.Reader createReader() throws IOException {
    if (this.reader == null) {
      this.reader = open();
    }
    return this.reader;
  }

  /**
   * @return Current reader.  Must call createReader first else returns null.
   * @throws IOException
   * @see {@link #createReader()}
   */
  public StoreFile.Reader getReader() {
    return this.reader;
  }

  /**
   * @throws IOException
   */
  public synchronized void closeReader() throws IOException {
    if (this.reader != null) {
      this.reader.close();
      this.reader = null;
    }
  }

  /**
   * Delete this file
   * @throws IOException
   */
  public void deleteReader() throws IOException {
    closeReader();
    this.fs.delete(getPath(), true);
  }

  @Override
  public String toString() {
    return this.path.toString() +
      (isReference()? "-" + this.referencePath + "-" + reference.toString(): "");
  }

  /**
   * @return a length description of this StoreFile, suitable for debug output
   */
  public String toStringDetailed() {
    StringBuilder sb = new StringBuilder();
    sb.append(this.path.toString());
    sb.append(", isReference=").append(isReference());
    sb.append(", isBulkLoadResult=").append(isBulkLoadResult());
    if (isBulkLoadResult()) {
      sb.append(", bulkLoadTS=").append(getBulkLoadTimestamp());
    } else {
      sb.append(", seqid=").append(getMaxSequenceId());
    }
    sb.append(", majorCompaction=").append(isMajorCompaction());

    return sb.toString();
  }

  /**
   * Utility to help with rename.
   * @param fs
   * @param src
   * @param tgt
   * @return True if succeeded.
   * @throws IOException
   */
  public static Path rename(final FileSystem fs, final Path src,
      final Path tgt)
  throws IOException {
    if (!fs.exists(src)) {
      throw new FileNotFoundException(src.toString());
    }
    if (!fs.rename(src, tgt)) {
      throw new IOException("Failed rename of " + src + " to " + tgt);
    }
    return tgt;
  }

  /**
   * Get a store file writer. Client is responsible for closing file when done.
   * If metadata, add BEFORE closing using
   * {@link #appendMetadata(org.apache.hadoop.hbase.io.hfile.HFile.Writer, long)}.
   * @param fs
   * @param dir Path to family directory.  Makes the directory if doesn't exist.
   * Creates a file with a unique name in this directory.
   * @param blocksize size per filesystem block
   * @return HFile.Writer
   * @throws IOException
   */
  public static StoreFile.Writer createWriter(final FileSystem fs, final Path dir,
      final int blocksize)
  throws IOException {
    return createWriter(fs,dir,blocksize,null,null,null,BloomType.NONE,0);
  }

  /**
   * Create a store file writer. Client is responsible for closing file when done.
   * If metadata, add BEFORE closing using appendMetadata()
   * @param fs
   * @param dir Path to family directory.  Makes the directory if doesn't exist.
   * Creates a file with a unique name in this directory.
   * @param blocksize
   * @param algorithm Pass null to get default.
   * @param conf HBase system configuration. used with bloom filters
   * @param bloomType column family setting for bloom filters
   * @param c Pass null to get default.
   * @param maxKeySize peak theoretical entry size (maintains error rate)
   * @return HFile.Writer
   * @throws IOException
   */
  public static StoreFile.Writer createWriter(final FileSystem fs, final Path dir,
      final int blocksize, final Compression.Algorithm algorithm,
      final KeyValue.KVComparator c, final Configuration conf,
      BloomType bloomType, int maxKeySize)
  throws IOException {
    if (!fs.exists(dir)) {
      fs.mkdirs(dir);
    }
    Path path = getUniqueFile(fs, dir);
    if(conf == null || !conf.getBoolean("io.hfile.bloom.enabled", true)) {
      bloomType = BloomType.NONE;
    }

    return new StoreFile.Writer(fs, path, blocksize,
        algorithm == null? HFile.DEFAULT_COMPRESSION_ALGORITHM: algorithm,
        conf, c == null? KeyValue.COMPARATOR: c, bloomType, maxKeySize);
  }

  /**
   * @param fs
   * @param dir Directory to create file in.
   * @return random filename inside passed <code>dir</code>
   */
  public static Path getUniqueFile(final FileSystem fs, final Path dir)
  throws IOException {
    if (!fs.getFileStatus(dir).isDir()) {
      throw new IOException("Expecting " + dir.toString() +
        " to be a directory");
    }
    return fs.getFileStatus(dir).isDir()? getRandomFilename(fs, dir): dir;
  }

  /**
   *
   * @param fs
   * @param dir
   * @return Path to a file that doesn't exist at time of this invocation.
   * @throws IOException
   */
  static Path getRandomFilename(final FileSystem fs, final Path dir)
  throws IOException {
    return getRandomFilename(fs, dir, null);
  }

  /**
   *
   * @param fs
   * @param dir
   * @param suffix
   * @return Path to a file that doesn't exist at time of this invocation.
   * @throws IOException
   */
  static Path getRandomFilename(final FileSystem fs, final Path dir,
      final String suffix)
  throws IOException {
    long id = -1;
    Path p = null;
    do {
      id = Math.abs(rand.nextLong());
      p = new Path(dir, Long.toString(id) +
        ((suffix == null || suffix.length() <= 0)? "": suffix));
    } while(fs.exists(p));
    return p;
  }

  /*
   * Write out a split reference.
   * @param fs
   * @param splitDir Presumes path format is actually
   * <code>SOME_DIRECTORY/REGIONNAME/FAMILY</code>.
   * @param f File to split.
   * @param splitRow
   * @param range
   * @return Path to created reference.
   * @throws IOException
   */
  static Path split(final FileSystem fs, final Path splitDir,
    final StoreFile f, final byte [] splitRow, final Reference.Range range)
  throws IOException {
    // A reference to the bottom half of the hsf store file.
    Reference r = new Reference(splitRow, range);
    // Add the referred-to regions name as a dot separated suffix.
    // See REF_NAME_PARSER regex above.  The referred-to regions name is
    // up in the path of the passed in <code>f</code> -- parentdir is family,
    // then the directory above is the region name.
    String parentRegionName = f.getPath().getParent().getParent().getName();
    // Write reference with same file id only with the other region name as
    // suffix and into the new region location (under same family).
    Path p = new Path(splitDir, f.getPath().getName() + "." + parentRegionName);
    return r.write(fs, p);
  }

  public static enum BloomType {
    /**
     * Bloomfilters disabled
     */
    NONE,
    /**
     * Bloom enabled with Table row as Key
     */
    ROW,
    /**
     * Bloom enabled with Table row & column (family+qualifier) as Key
     */
    ROWCOL
  }

  /**
   *
   */
  public static class Reader extends HFile.Reader {
    /** Bloom Filter class.  Caches only meta, pass in data */
    protected BloomFilter bloomFilter = null;
    /** Type of bloom filter (e.g. ROW vs ROWCOL) */
    protected BloomType bloomFilterType;

    public Reader(FileSystem fs, Path path, BlockCache cache,
        boolean inMemory)
    throws IOException {
      super(fs, path, cache, inMemory);
    }

    public Reader(final FSDataInputStream fsdis, final long size,
        final BlockCache cache, final boolean inMemory) {
      super(fsdis,size,cache,inMemory);
      bloomFilterType = BloomType.NONE;
    }

    @Override
    public Map<byte [], byte []> loadFileInfo()
    throws IOException {
      Map<byte [], byte []> fi = super.loadFileInfo();

      byte[] b = fi.get(BLOOM_FILTER_TYPE_KEY);
      if (b != null) {
        bloomFilterType = BloomType.valueOf(Bytes.toString(b));
      }

      return fi;
    }

    /**
     * Load the bloom filter for this HFile into memory.
     * Assumes the HFile has already been loaded
     */
    public void loadBloomfilter() {
      if (this.bloomFilter != null) {
        return; // already loaded
      }

      // see if bloom filter information is in the metadata
      try {
        ByteBuffer b = getMetaBlock(BLOOM_FILTER_META_KEY, false);
        if (b != null) {
          if (bloomFilterType == BloomType.NONE) {
            throw new IOException("valid bloom filter type not found in FileInfo");
          }
          this.bloomFilter = new ByteBloomFilter(b);
          LOG.info("Loaded " + (bloomFilterType==BloomType.ROW? "row":"col")
                 + " bloom filter metadata for " + name);
        }
      } catch (IOException e) {
        LOG.error("Error reading bloom filter meta -- proceeding without", e);
        this.bloomFilter = null;
      } catch (IllegalArgumentException e) {
        LOG.error("Bad bloom filter meta -- proceeding without", e);
        this.bloomFilter = null;
      }
    }

    BloomFilter getBloomFilter() {
      return this.bloomFilter;
    }

    /**
     * @return bloom type information associated with this store file
     */
    public BloomType getBloomFilterType() {
      return this.bloomFilterType;
    }

    @Override
    public int getFilterEntries() {
      return (this.bloomFilter != null) ? this.bloomFilter.getKeyCount()
          : super.getFilterEntries();
    }

    @Override
    public HFileScanner getScanner(boolean cacheBlocks, final boolean pread) {
      return new Scanner(this, cacheBlocks, pread);
    }

    protected class Scanner extends HFile.Reader.Scanner {
      public Scanner(Reader r, boolean cacheBlocks, final boolean pread) {
        super(r, cacheBlocks, pread);
      }

      @Override
      public boolean shouldSeek(final byte[] row,
          final SortedSet<byte[]> columns) {
        if (bloomFilter == null) {
          return true;
        }

        byte[] key;
        switch(bloomFilterType) {
          case ROW:
            key = row;
            break;
          case ROWCOL:
            if (columns.size() == 1) {
              byte[] col = columns.first();
              key = Bytes.add(row, col);
              break;
            }
            //$FALL-THROUGH$
          default:
            return true;
        }

        try {
          ByteBuffer bloom = getMetaBlock(BLOOM_FILTER_DATA_KEY, true);
          if (bloom != null) {
            return bloomFilter.contains(key, bloom);
          }
        } catch (IOException e) {
          LOG.error("Error reading bloom filter data -- proceeding without",
              e);
          bloomFilter = null;
        } catch (IllegalArgumentException e) {
          LOG.error("Bad bloom filter data -- proceeding without", e);
          bloomFilter = null;
        }

        return true;
      }

    }
  }

  /**
   *
   */
  public static class Writer extends HFile.Writer {
    private final BloomFilter bloomFilter;
    private final BloomType bloomType;
    private KVComparator kvComparator;
    private KeyValue lastKv = null;
    private byte[] lastByteArray = null;

    /**
     * Creates an HFile.Writer that also write helpful meta data.
     * @param fs file system to write to
     * @param path file name to create
     * @param blocksize HDFS block size
     * @param compress HDFS block compression
     * @param conf user configuration
     * @param comparator key comparator
     * @param bloomType bloom filter setting
     * @param maxKeys maximum amount of keys to add (for blooms)
     * @throws IOException problem writing to FS
     */
    public Writer(FileSystem fs, Path path, int blocksize,
        Compression.Algorithm compress, final Configuration conf,
        final KVComparator comparator, BloomType bloomType, int maxKeys)
      throws IOException {
      super(fs, path, blocksize, compress, comparator.getRawComparator());

      this.kvComparator = comparator;

      if (bloomType != BloomType.NONE && conf != null) {
        float err = conf.getFloat("io.hfile.bloom.error.rate", (float)0.01);
        int maxFold = conf.getInt("io.hfile.bloom.max.fold", 7);

        this.bloomFilter = new ByteBloomFilter(maxKeys, err,
            Hash.getHashType(conf), maxFold);
        this.bloomFilter.allocBloom();
        this.bloomType = bloomType;
      } else {
        this.bloomFilter = null;
        this.bloomType = BloomType.NONE;
      }
    }

    /**
     * Writes meta data.
     * Call before {@link #close()} since its written as meta data to this file.
     * @param maxSequenceId Maximum sequence id.
     * @param majorCompaction True if this file is product of a major compaction
     * @throws IOException problem writing to FS
     */
    public void appendMetadata(final long maxSequenceId,
      final boolean majorCompaction)
    throws IOException {
      appendFileInfo(MAX_SEQ_ID_KEY, Bytes.toBytes(maxSequenceId));
      appendFileInfo(MAJOR_COMPACTION_KEY, Bytes.toBytes(majorCompaction));
    }

    @Override
    public void append(final KeyValue kv)
    throws IOException {
      if (this.bloomFilter != null) {
        // only add to the bloom filter on a new, unique key
        boolean newKey = true;
        if (this.lastKv != null) {
          switch(bloomType) {
          case ROW:
            newKey = ! kvComparator.matchingRows(kv, lastKv);
            break;
          case ROWCOL:
            newKey = ! kvComparator.matchingRowColumn(kv, lastKv);
            break;
          case NONE:
            newKey = false;
          }
        }
        if (newKey) {
          /*
           * http://2.bp.blogspot.com/_Cib_A77V54U/StZMrzaKufI/AAAAAAAAADo/ZhK7bGoJdMQ/s400/KeyValue.png
           * Key = RowLen + Row + FamilyLen + Column [Family + Qualifier] + TimeStamp
           *
           * 2 Types of Filtering:
           *  1. Row = Row
           *  2. RowCol = Row + Qualifier
           */
          switch (bloomType) {
          case ROW:
            this.bloomFilter.add(kv.getBuffer(), kv.getRowOffset(),
                kv.getRowLength());
            break;
          case ROWCOL:
            // merge(row, qualifier)
            int ro = kv.getRowOffset();
            int rl = kv.getRowLength();
            int qo = kv.getQualifierOffset();
            int ql = kv.getQualifierLength();
            byte [] result = new byte[rl + ql];
            System.arraycopy(kv.getBuffer(), ro, result, 0,  rl);
            System.arraycopy(kv.getBuffer(), qo, result, rl, ql);

            this.bloomFilter.add(result);
            break;
          default:
          }
          this.lastKv = kv;
        }
      }
      super.append(kv);
    }

    @Override
    public void append(final byte [] key, final byte [] value)
    throws IOException {
      if (this.bloomFilter != null) {
        // only add to the bloom filter on a new row
        if(this.lastByteArray == null || !Arrays.equals(key, lastByteArray)) {
          this.bloomFilter.add(key);
          this.lastByteArray = key;
        }
      }
      super.append(key, value);
    }

    @Override
    public void close()
    throws IOException {
      // make sure we wrote something to the bloom before adding it
      if (this.bloomFilter != null && this.bloomFilter.getKeyCount() > 0) {
        bloomFilter.finalize();
        if (this.bloomFilter.getMaxKeys() > 0) {
          int b = this.bloomFilter.getByteSize();
          int k = this.bloomFilter.getKeyCount();
          int m = this.bloomFilter.getMaxKeys();
          StoreFile.LOG.info("Bloom added to HFile.  " + b + "B, " +
              k + "/" + m + " (" + NumberFormat.getPercentInstance().format(
                ((double)k) / ((double)m)) + ")");
        }
        appendMetaBlock(BLOOM_FILTER_META_KEY, bloomFilter.getMetaWriter());
        appendMetaBlock(BLOOM_FILTER_DATA_KEY, bloomFilter.getDataWriter());
        appendFileInfo(BLOOM_FILTER_TYPE_KEY, Bytes.toBytes(bloomType.toString()));
      }
      super.close();
    }

  }

  /**
   * Useful comparators for comparing StoreFiles.
   */
  abstract static class Comparators {
    /**
     * Comparator that compares based on the flush time of
     * the StoreFiles. All bulk loads are placed before all non-
     * bulk loads, and then all files are sorted by sequence ID.
     * If there are ties, the path name is used as a tie-breaker.
     */
    static final Comparator<StoreFile> FLUSH_TIME =
      Ordering.compound(ImmutableList.of(
          Ordering.natural().onResultOf(new GetBulkTime()),
          Ordering.natural().onResultOf(new GetSeqId()),
          Ordering.natural().onResultOf(new GetPathName())
      ));

    private static class GetBulkTime implements Function<StoreFile, Long> {
      @Override
      public Long apply(StoreFile sf) {
        if (!sf.isBulkLoadResult()) return Long.MAX_VALUE;
        return sf.getBulkLoadTimestamp();
      }
    }
    private static class GetSeqId implements Function<StoreFile, Long> {
      @Override
      public Long apply(StoreFile sf) {
        if (sf.isBulkLoadResult()) return -1L;
        return sf.getMaxSequenceId();
      }
    }
    private static class GetPathName implements Function<StoreFile, String> {
      @Override
      public String apply(StoreFile sf) {
        return sf.getPath().getName();
      }
    }

  }
}
TOP

Related Classes of org.apache.hadoop.hbase.regionserver.StoreFile$Comparators

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.
m/analytics.js','ga'); ga('create', 'UA-20639858-1', 'auto'); ga('send', 'pageview');