Package org.kiji.schema

Source Code of org.kiji.schema.KijiRowKeySplitter

/**
* (c) Copyright 2012 WibiData, Inc.
*
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.kiji.schema;

import java.util.Arrays;

import com.google.common.base.Preconditions;
import org.apache.hadoop.hbase.util.Bytes;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.kiji.annotations.ApiAudience;
import org.kiji.annotations.ApiStability;
import org.kiji.schema.avro.RowKeyFormat;
import org.kiji.schema.avro.RowKeyFormat2;
import org.kiji.schema.avro.TableLayoutDesc;
import org.kiji.schema.util.ByteArrayFormatter;

/**
* Utility class for splitting the Kiji row key space.
*/
@ApiAudience.Framework
@ApiStability.Evolving
public final class KijiRowKeySplitter {
  private static final Logger LOG = LoggerFactory.getLogger(KijiRowKeySplitter.class);

  /** No public constructor since this is a factory class. */
  private KijiRowKeySplitter() {}

  /**
   * Creates an instance of KijiRowKeySplitter for the default MD5 hash algorithm.
   *
   * @return a new splitter for Kiji row keys.
   */
  public static KijiRowKeySplitter get() {
    return new KijiRowKeySplitter();
  }

  /**
   * Resolution (in number of bytes) when generating evenly spaced HBase row keys.
   *
   * @return the number of bytes required by an md5 hash.
   */
  @Deprecated
  public int getRowKeyResolution() {
    return 16;
  }

  /**
   * Gets a table's row-key hash resolution (in number of bytes)
   * for use in evenly spacing HBase row keys.
   *
   * @param tableLayout the layout of the table
   * @return the table's hash resolution.
   */
  public static int getRowKeyResolution(TableLayoutDesc tableLayout) {
    // Get hashSize from layout.
    int hashSize = 16;
    // No assumptions make about the RKF.
    if (RowKeyFormat.class.equals(tableLayout.getKeysFormat().getClass())) {
      hashSize = ((RowKeyFormat) tableLayout.getKeysFormat()).getHashSize();
    } else if (RowKeyFormat2.class.equals(tableLayout.getKeysFormat().getClass())) {
      RowKeyFormat2 format = (RowKeyFormat2) tableLayout.getKeysFormat();
      if (null == format.getSalt()) {
        throw new IllegalArgumentException(
            "This table layout defines an entityId format without hashing enabled.");
      }
      hashSize = ((RowKeyFormat2) tableLayout.getKeysFormat()).getSalt().getHashSize();
    }
    return hashSize;
  }

  /**
   * Returns the split keys for the given number of regions.  This assumes that the keys are
   * byte strings of default size 16 for MD5.
   *
   * @param numRegions The number of desired regions.
   * @return The row keys that serve as the boundaries between the regions.
   */
  @Deprecated
  public byte[][] getSplitKeys(int numRegions) {
    return getSplitKeys(numRegions, getRowKeyResolution());
  }

  /**
   * Generates a list of region boundaries evenly spaced in the HBase row key space.
   *
   * <p>  </p>
   *
   * @param numRegions Number of desired regions. Must be greater than 2 and less than the maximum
   *     number of regions allowed given the requested key precision.
   * @param precision Precision of the boundary split keys, in number of bytes.
   * @return The row keys that serve as the boundaries between the regions.
   *     Each boundary split key has the requested precision, in number of bytes.
   *     The number of boundaries is numRegions - 1.
   */
  public byte[][] getSplitKeys(int numRegions, int precision) {
    Preconditions.checkArgument(precision >= 1, "Invalid precision: %s.", precision);
    Preconditions.checkArgument(numRegions >= 2,
        "Number of regions must be at least 2, got %s.", numRegions);
    if (precision < 4) {
      // 4 bytes give more precision than a Java integer achieves:
      final long maxRegions = 1L << (8 * precision);
      Preconditions.checkArgument(numRegions <= maxRegions,
          "Number of regions must be at most %s, got %s.", maxRegions, numRegions);
    }

    // Create a byte array of all zeros.
    final byte[] startKey = new byte[precision];
    Arrays.fill(startKey, (byte) 0x00);

    // Create a byte array of all ones.
    final byte[] limitKey = new byte[precision];
    Arrays.fill(limitKey, (byte) 0xFF);

    // This result includes numRegions + 1 keys in it (includes the startKey and limitKey).
    final byte[][] ends = Bytes.split(startKey, limitKey, true, numRegions - 1);

    if (LOG.isDebugEnabled()) {
      for (int i = 0; i < ends.length; i++) {
        LOG.debug("Generated split key: {}", ByteArrayFormatter.toHex(ends[i], ':'));
      }
    }

    // Remove the startKey from the beginning and the limitKey from the end.
    return Arrays.copyOfRange(ends, 1, ends.length - 1);
  }
}
TOP

Related Classes of org.kiji.schema.KijiRowKeySplitter

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.