Package org.kiji.schema.layout.impl

Source Code of org.kiji.schema.layout.impl.ColumnId

/**
* (c) Copyright 2012 WibiData, Inc.
*
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.kiji.schema.layout.impl;

import java.util.HashMap;
import java.util.Map;

import com.google.common.base.Preconditions;
import org.apache.hadoop.hbase.util.Bytes;

import org.kiji.annotations.ApiAudience;
import org.kiji.schema.InternalKijiError;
import org.kiji.schema.impl.InvalidColumnNameException;

/**
* <p>A very short physical identifier for a column family or qualifier to be used in HBase.</p>
*
* <p>Since HBase is a sparse storage system, every cell's data must be stored along with
* its full address: its row key, family name, column qualifier, and timestamp.  Because
* of this, it is important to keep the names of families and qualifiers as short as
* possible.</p>
*
* <p>A ColumnId is the physical byte[] that is used as a family or qualifier name in
* HBase.  It is an encoded number using a 64-character alphabet with the more significant
* digits to the right.  This class allows you to convert between the integers and the
* UTF-8 encoded physical names.  For example:</p>
*
*   <table>
*     <tr><th>id</th><th>name</th></tr>
*     <tr><td>1</td><td>B</td></tr>
*     <tr><td>1</td><td>BA</td></tr>
*     <tr><td>1</td><td>BAA</td></tr>
*     <tr><td>2</td><td>C</td></tr>
*     <tr><td>25</td><td>Z</td></tr>
*     <tr><td>26</td><td>a</td></tr>
*     <tr><td>51</td><td>z</td></tr>
*     <tr><td>52</td><td>0</td></tr>
*     <tr><td>61</td><td>9</td></tr>
*     <tr><td>62</td><td>+</td></tr>
*     <tr><td>63</td><td>/</td></tr>
*     <tr><td>64</td><td>AB</td></tr>
*     <tr><td>65</td><td>BB</td></tr>
*     <tr><td>66</td><td>CB</td></tr>
*   </table>
*
* <p>The benefit here is that until a user has defined at least 64 column names in their
* layout, all of the names used in HBase will only be one byte.  The next 64 names will
* only be two bytes, and so on.
*/
@ApiAudience.Private
public final class ColumnId {
  /**
   * The special value reserved to mean that a symbolic name has not been assigned a column id.
   */
  public static final int UNASSIGNED = 0;

  /** The alphabet used to generate the short physical names. */
  public static final String ALPHABET =
      "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";

  /** The size of the alphabet for a digit, which is the radix of our printed number. */
  public static final int RADIX = 64;

  /**
   * The base 2 logarithm of the size of the alphabet (64), which is the number of bits
   * we can encode with a single digit.
   */
  public static final int BITS_PER_DIGIT = 6;

  /** A map from characters in the alphabet to the integer it represents. */
  public static final Map<Character, Integer> VALUE_MAP;
  static {
    // Make sure our alphabet is the size we expect.
    if (RADIX != ALPHABET.length()) {
      throw new InternalKijiError(
          "Expected ColumnId alphabet size to be " + RADIX + " but was " + ALPHABET.length());
    }

    // Initialize the map from digit to value.
    VALUE_MAP = new HashMap<Character, Integer>();
    for (int i = 0; i < ALPHABET.length(); i++) {
      VALUE_MAP.put(ALPHABET.charAt(i), i);
    }
  }

  /** The integer encoded by this column id. */
  private final int mId;

  /** The Base64 string encoding of this column id. */
  private final String mStringEncoding;

  /**
   * Constructs a column id that encodes the given integer.
   *
   * @param id The integer identifier for this column.
   */
  public ColumnId(int id) {
    Preconditions.checkArgument(id >= 0, "id may not be negative");
    mId = id;
    mStringEncoding = intToBase64(mId);
  }

  /**
   * Converts the given integer to Base64 encoding with the MSB to the right.
   * @param id is the decimal number to convert.
   * @return a string representing the Base64 encoding of the incoming integer.
   */
  public static String intToBase64(int id) {
    StringBuilder sb = new StringBuilder();
    int val = id;
    do {
      sb.append(ALPHABET.charAt(val % ALPHABET.length()));
      val >>= BITS_PER_DIGIT;
    } while (val > 0);
    return sb.toString();
  }

  /**
   * Translates HBase column names to ColumnIds.  The HBase byte arrays are UTF-8 encoded
   * numbers from our base-64 alphabet.
   *
   * @param encoded The family or qualifier bytes from HBase.
   * @return A ColumnId from the byte array.
   */
  public static ColumnId fromByteArray(byte[] encoded) {
    return fromString(Bytes.toString(encoded));
  }

  /**
   * Translates HBase column names to ColumnIds.  The HBase names are UTF-8 encoded
   * numbers from our base-64 alphabet.
   *
   * @param encoded The family or qualifier bytes from HBase.
   * @return A ColumnId from the encoded name.
   */
  public static ColumnId fromString(String encoded) {
    int val = 0;
    for (int i = 0; i < encoded.length(); i++) {
      try {
        val += VALUE_MAP.get(encoded.charAt(i)) << i * BITS_PER_DIGIT;
      } catch (NullPointerException e) {
        throw new InvalidColumnNameException("Contained a character not in the alphabet: "
            + encoded);
      }
    }
    return new ColumnId(val);
  }

  /** @return the column id. */
  public int getId() {
    return mId;
  }

  /**
   * <p>Translates ColumnIds to HBase column names.</p>
   *
   * <p>Encodes to a byte array making it as short as possible. We use characters that
   * HBase allows (no control characters and no colon).</p>
   *
   * @return The column id encoded as an HBase-friendly byte array.
   */
  public byte[] toByteArray() {
    return Bytes.toBytes(toString());
  }

  /**
   * Gets the id as a string of digits from our alphabet.
   *
   * @return The digit string, with the significant digits on the right.
   */
  @Override
  public String toString() {
    return mStringEncoding;
  }

  /** {@inheritDoc} */
  @Override
  public boolean equals(Object other) {
    if (!(other instanceof ColumnId)) {
      return false;
    }
    return getId() == ((ColumnId) other).getId();
  }

  /** {@inheritDoc} */
  @Override
  public int hashCode() {
    return getId();
  }
}
TOP

Related Classes of org.kiji.schema.layout.impl.ColumnId

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.