Source Code of com.salesforce.hbase.index.covered.example.CoveredColumnIndexCodec

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.salesforce.hbase.index.covered.example;


import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map.Entry;


import org.apache.commons.lang.ArrayUtils;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Mutation;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Pair;


import com.google.common.collect.Lists;
import com.salesforce.hbase.index.covered.IndexUpdate;
import com.salesforce.hbase.index.covered.TableState;
import com.salesforce.hbase.index.scanner.Scanner;
import com.salesforce.phoenix.index.BaseIndexCodec;


/**
 *
 */
public class CoveredColumnIndexCodec extends BaseIndexCodec {


  private static final byte[] EMPTY_BYTES = new byte[0];
  public static final byte[] INDEX_ROW_COLUMN_FAMILY = Bytes.toBytes("INDEXED_COLUMNS");


  private List<ColumnGroup> groups;


  /**
   * @param groups to initialize the codec with
   * @return an instance that is initialized with the given {@link ColumnGroup}s, for testing
   *         purposes
   */
  public static CoveredColumnIndexCodec getCodecForTesting(List<ColumnGroup> groups) {
    CoveredColumnIndexCodec codec = new CoveredColumnIndexCodec();
    codec.groups = Lists.newArrayList(groups);
    return codec;
  }


  @Override
  public void initialize(RegionCoprocessorEnvironment env) {
    groups = CoveredColumnIndexSpecifierBuilder.getColumns(env.getConfiguration());
  }


  @Override
  public Iterable<IndexUpdate> getIndexUpserts(TableState state) {
    List<IndexUpdate> updates = new ArrayList<IndexUpdate>();
    for (ColumnGroup group : groups) {
      IndexUpdate update = getIndexUpdateForGroup(group, state);
      updates.add(update);
    }
    return updates;
  }


  /**
   * @param group
   * @param state
   * @return the update that should be made to the table
   */
  private IndexUpdate getIndexUpdateForGroup(ColumnGroup group, TableState state) {
    List<CoveredColumn> refs = group.getColumns();
    try {
      Pair<Scanner, IndexUpdate> stateInfo = state.getIndexedColumnsTableState(refs);
      Scanner kvs = stateInfo.getFirst();
      Pair<Integer, List<ColumnEntry>> columns =
          getNextEntries(refs, kvs, state.getCurrentRowKey());
      // make sure we close the scanner
      kvs.close();
      if (columns.getFirst().intValue() == 0) {
        return stateInfo.getSecond();
      }
      // have all the column entries, so just turn it into a Delete for the row
      // convert the entries to the needed values
      byte[] rowKey =
          composeRowKey(state.getCurrentRowKey(), columns.getFirst(), columns.getSecond());
      Put p = new Put(rowKey, state.getCurrentTimestamp());
      // add the columns to the put
      addColumnsToPut(p, columns.getSecond());


      // update the index info
      IndexUpdate update = stateInfo.getSecond();
      update.setTable(Bytes.toBytes(group.getTable()));
      update.setUpdate(p);
      return update;
    } catch (IOException e) {
      throw new RuntimeException("Unexpected exception when getting state for columns: " + refs);
    }
  }


  private static void addColumnsToPut(Put indexInsert, List<ColumnEntry> columns) {
    // add each of the corresponding families to the put
    int count = 0;
    for (ColumnEntry column : columns) {
      indexInsert.add(INDEX_ROW_COLUMN_FAMILY,
        ArrayUtils.addAll(Bytes.toBytes(count++), toIndexQualifier(column.ref)), null);
    }
  }


  private static byte[] toIndexQualifier(CoveredColumn column) {
    return ArrayUtils.addAll(Bytes.toBytes(column.familyString + CoveredColumn.SEPARATOR),
      column.getQualifier());
  }


  @Override
  public Iterable<IndexUpdate> getIndexDeletes(TableState state) {
    List<IndexUpdate> deletes = new ArrayList<IndexUpdate>();
    for (ColumnGroup group : groups) {
      deletes.add(getDeleteForGroup(group, state));
    }
    return deletes;
  }




  /**
   * Get all the deletes necessary for a group of columns - logically, the cleanup the index table
   * for a given index.
   * @param group index information
   * @return the cleanup for the given index, or <tt>null</tt> if no cleanup is necessary
   */
  private IndexUpdate getDeleteForGroup(ColumnGroup group, TableState state) {
    List<CoveredColumn> refs = group.getColumns();
    try {
      Pair<Scanner, IndexUpdate> kvs = state.getIndexedColumnsTableState(refs);
      Pair<Integer, List<ColumnEntry>> columns =
          getNextEntries(refs, kvs.getFirst(), state.getCurrentRowKey());
      // make sure we close the scanner reference
      kvs.getFirst().close();
      // no change, just return the passed update
      if (columns.getFirst() == 0) {
        return kvs.getSecond();
      }
      // have all the column entries, so just turn it into a Delete for the row
      // convert the entries to the needed values
      byte[] rowKey =
          composeRowKey(state.getCurrentRowKey(), columns.getFirst(), columns.getSecond());
      Delete d = new Delete(rowKey);
      d.setTimestamp(state.getCurrentTimestamp());
      IndexUpdate update = kvs.getSecond();
      update.setUpdate(d);
      update.setTable(Bytes.toBytes(group.getTable()));
      return update;
    } catch (IOException e) {
      throw new RuntimeException("Unexpected exception when getting state for columns: " + refs);
    }
  }


  /**
   * Get the next batch of primary table values for the given columns
   * @param refs columns to match against
   * @param state
   * @return the total length of all values found and the entries to add for the index
   */
  private Pair<Integer, List<ColumnEntry>> getNextEntries(List<CoveredColumn> refs, Scanner kvs,
      byte[] currentRow) throws IOException {
    int totalValueLength = 0;
    List<ColumnEntry> entries = new ArrayList<ColumnEntry>(refs.size());


    // pull out the latest state for each column reference, in order
    for (CoveredColumn ref : refs) {
      KeyValue first = ref.getFirstKeyValueForRow(currentRow);
      if (!kvs.seek(first)) {
        // no more keys, so add a null value
        entries.add(new ColumnEntry(null, ref));
        continue;
      }
      // there is a next value - we only care about the current value, so we can just snag that
      KeyValue next = kvs.next();
      if (ref.matchesFamily(next.getFamily()) && ref.matchesQualifier(next.getQualifier())) {
        byte[] v = next.getValue();
        totalValueLength += v.length;
        entries.add(new ColumnEntry(v, ref));
      } else {
        // this first one didn't match at all, so we have to put in a null entry
        entries.add(new ColumnEntry(null, ref));
        continue;
      }
      // here's where is gets a little tricky - we either need to decide if we should continue
      // adding entries (matches all qualifiers) or if we are done (matches a single qualifier)
      if (!ref.allColumns()) {
        continue;
      }
      // matches all columns, so we need to iterate until we hit the next column with the same
      // family as the current key
      byte[] lastQual = next.getQualifier();
      byte[] nextQual = null;
      while ((next = kvs.next()) != null) {
        // different family, done with this column
        if (!ref.matchesFamily(next.getFamily())) {
          break;
        }
        nextQual = next.getQualifier();
        // we are still on the same qualifier - skip it, since we already added a column for it
        if (Arrays.equals(lastQual, nextQual)) {
          continue;
        }
        // this must match the qualifier since its an all-qualifiers specifier, so we add it
        byte[] v = next.getValue();
        totalValueLength += v.length;
        entries.add(new ColumnEntry(v, ref));
        // update the last qualifier to check against
        lastQual = nextQual;
      }
    }
    return new Pair<Integer, List<ColumnEntry>>(totalValueLength, entries);
  }


  static class ColumnEntry {
    byte[] value = EMPTY_BYTES;
    CoveredColumn ref;


    public ColumnEntry(byte[] value, CoveredColumn ref) {
      this.value = value == null ? EMPTY_BYTES : value;
      this.ref = ref;
    }
  }


  /**
   * Compose the final index row key.
   * <p>
   * This is faster than adding each value independently as we can just build a single a array and
   * copy everything over once.
   * @param pk primary key of the original row
   * @param length total number of bytes of all the values that should be added
   * @param values to use when building the key
   * @return
   */
  static byte[] composeRowKey(byte[] pk, int length, List<ColumnEntry> values) {
    // now build up expected row key, each of the values, in order, followed by the PK and then some
    // info about lengths so we can deserialize each value
    byte[] output = new byte[length + pk.length];
    int pos = 0;
    int[] lengths = new int[values.size()];
    int i = 0;
    for (ColumnEntry entry : values) {
      byte[] v = entry.value;
      // skip doing the copy attempt, if we don't need to
      if (v.length != 0) {
        System.arraycopy(v, 0, output, pos, v.length);
        pos += v.length;
      }
      lengths[i++] = v.length;
    }


    // add the primary key to the end of the row key
    System.arraycopy(pk, 0, output, pos, pk.length);


    // add the lengths as suffixes so we can deserialize the elements again
    for (int l : lengths) {
      output = ArrayUtils.addAll(output, Bytes.toBytes(l));
    }


    // and the last integer is the number of values
    return ArrayUtils.addAll(output, Bytes.toBytes(values.size()));
  }


  /**
   * Essentially a short-cut from building a {@link Put}.
   * @param pk row key
   * @param timestamp timestamp of all the keyvalues
   * @param values expected value--column pair
   * @return a keyvalues that the index contains for a given row at a timestamp with the given value
   *         -- column pairs.
   */
  public static List<KeyValue> getIndexKeyValueForTesting(byte[] pk, long timestamp,
      List<Pair<byte[], CoveredColumn>> values) {
  
    int length = 0;
    List<ColumnEntry> expected = new ArrayList<ColumnEntry>(values.size());
    for (Pair<byte[], CoveredColumn> value : values) {
      ColumnEntry entry = new ColumnEntry(value.getFirst(), value.getSecond());
      length += value.getFirst().length;
      expected.add(entry);
    }
  
    byte[] rowKey = CoveredColumnIndexCodec.composeRowKey(pk, length, expected);
    Put p = new Put(rowKey, timestamp);
    CoveredColumnIndexCodec.addColumnsToPut(p, expected);
    List<KeyValue> kvs = new ArrayList<KeyValue>();
    for (Entry<byte[], List<KeyValue>> entry : p.getFamilyMap().entrySet()) {
      kvs.addAll(entry.getValue());
    }
  
    return kvs;
  }


  public static List<byte[]> getValues(byte[] bytes) {
    // get the total number of keys in the bytes
    int keyCount = CoveredColumnIndexCodec.getPreviousInteger(bytes, bytes.length);
    List<byte[]> keys = new ArrayList<byte[]>(keyCount);
    int[] lengths = new int[keyCount];
    int lengthPos = keyCount - 1;
    int pos = bytes.length - Bytes.SIZEOF_INT;
    // figure out the length of each key
    for (int i = 0; i < keyCount; i++) {
      lengths[lengthPos--] = CoveredColumnIndexCodec.getPreviousInteger(bytes, pos);
      pos -= Bytes.SIZEOF_INT;
    }


    int current = 0;
    for (int length : lengths) {
      byte[] key = Arrays.copyOfRange(bytes, current, current + length);
      keys.add(key);
      current += length;
    }


    return keys;
  }


  /**
   * Read an integer from the preceding {@value Bytes#SIZEOF_INT} bytes
   * @param bytes array to read from
   * @param start start point, backwards from which to read. For example, if specifying "25", we
   *          would try to read an integer from 21 -> 25
   * @return an integer from the proceeding {@value Bytes#SIZEOF_INT} bytes, if it exists.
   */
  private static int getPreviousInteger(byte[] bytes, int start) {
    return Bytes.toInt(bytes, start - Bytes.SIZEOF_INT);
  }


  /**
   * Check to see if an row key just contains a list of null values.
   * @param bytes row key to examine
   * @return <tt>true</tt> if all the values are zero-length, <tt>false</tt> otherwise
   */
  public static boolean checkRowKeyForAllNulls(byte[] bytes) {
    int keyCount = CoveredColumnIndexCodec.getPreviousInteger(bytes, bytes.length);
    int pos = bytes.length - Bytes.SIZEOF_INT;
    for (int i = 0; i < keyCount; i++) {
      int next = CoveredColumnIndexCodec.getPreviousInteger(bytes, pos);
      if (next > 0) {
        return false;
      }
      pos -= Bytes.SIZEOF_INT;
    }


    return true;
  }


  @Override
  public boolean isEnabled(Mutation m) {
    // this could be a bit smarter, looking at the groups for the mutation, but we leave it at this
    // simple check for the moment.
    return groups.size() > 0;
  }
}
Source Code of com.salesforce.hbase.index.covered.example.CoveredColumnIndexCodec

Related Classes of com.salesforce.hbase.index.covered.example.CoveredColumnIndexCodec