Package org.kiji.schema.impl.hbase

Source Code of org.kiji.schema.impl.hbase.HBaseDataRequestAdapter$NameTranslatingFilterContext

/**
* (c) Copyright 2012 WibiData, Inc.
*
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.kiji.schema.impl.hbase;

import java.io.IOException;
import java.util.Map;
import java.util.NavigableSet;

import com.google.common.base.Preconditions;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.ColumnPaginationFilter;
import org.apache.hadoop.hbase.filter.ColumnPrefixFilter;
import org.apache.hadoop.hbase.filter.CompareFilter;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.FilterList;
import org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.kiji.annotations.ApiAudience;
import org.kiji.schema.EntityId;
import org.kiji.schema.HBaseEntityId;
import org.kiji.schema.InternalKijiError;
import org.kiji.schema.KijiColumnName;
import org.kiji.schema.KijiDataRequest;
import org.kiji.schema.NoSuchColumnException;
import org.kiji.schema.filter.KijiColumnFilter;
import org.kiji.schema.hbase.HBaseColumnName;
import org.kiji.schema.hbase.HBaseScanOptions;
import org.kiji.schema.layout.HBaseColumnNameTranslator;
import org.kiji.schema.layout.KijiTableLayout;
import org.kiji.schema.layout.KijiTableLayout.LocalityGroupLayout.FamilyLayout;
import org.kiji.schema.platform.SchemaPlatformBridge;

/**
* Wraps a KijiDataRequest to expose methods that generate meaningful objects in HBase
* land, like {@link org.apache.hadoop.hbase.client.Put}s and {@link
* org.apache.hadoop.hbase.client.Get}s.
*/
@ApiAudience.Private
public final class HBaseDataRequestAdapter {
  private static final Logger LOG = LoggerFactory.getLogger(HBaseDataRequestAdapter.class);

  /** The wrapped KijiDataRequest. */
  private final KijiDataRequest mKijiDataRequest;
  /** The translator for generating HBase column names. */
  private final HBaseColumnNameTranslator mColumnNameTranslator;

  /**
   * Creates a new HBaseDataRequestAdapter for a given data request using a given
   * KijiColumnNameTranslator.
   *
   * @param kijiDataRequest the data request to adapt for HBase.
   * @param translator the name translator for getting HBase column names.
   */
  public HBaseDataRequestAdapter(
      final KijiDataRequest kijiDataRequest,
      final HBaseColumnNameTranslator translator
  ) {
    mKijiDataRequest = kijiDataRequest;
    mColumnNameTranslator = translator;
  }

  /**
   * Constructs an HBase Scan that describes the data requested in the KijiDataRequest.
   *
   * @param tableLayout The layout of the Kiji table to read from.  This is required for
   *     determining the mapping between Kiji columns and HBase columns.
   * @return An HBase Scan descriptor.
   * @throws IOException If there is an error.
   */
  public Scan toScan(final KijiTableLayout tableLayout) throws IOException {
    return toScan(tableLayout, new HBaseScanOptions());
  }

  /**
   * Constructs an HBase Scan that describes the data requested in the KijiDataRequest.
   *
   * @param tableLayout The layout of the Kiji table to read from.  This is required for
   *     determining the mapping between Kiji columns and HBase columns.
   * @param scanOptions Custom options for this scan.
   * @return An HBase Scan descriptor.
   * @throws IOException If there is an error.
   */
  public Scan toScan(
      final KijiTableLayout tableLayout,
      final HBaseScanOptions scanOptions
  ) throws IOException {
    // Unfortunately in HBase 95+, we can no longer create empty gets.
    // So create a fake one for this table and fill in the fields of a new scan.
    final Get tempGet = toGet(HBaseEntityId.fromHBaseRowKey(new byte[1]), tableLayout);
    final Scan scan = new Scan();
    scan.setFilter(tempGet.getFilter());
    scan.setCacheBlocks(tempGet.getCacheBlocks());
    scan.setMaxVersions(tempGet.getMaxVersions());
    scan.setTimeRange(tempGet.getTimeRange().getMin(), tempGet.getTimeRange().getMax());
    scan.setFamilyMap(tempGet.getFamilyMap());
    configureScan(scan, scanOptions);
    return scan;
  }

  /**
   * Like toScan(), but mutates a given Scan object to include everything in the data
   * request instead of returning a new one.
   *
   * <p>Any existing request settings in the Scan object will be preserved.</p>
   *
   * @param scan The existing scan object to apply the data request to.
   * @param tableLayout The layout of the Kiji table the scan will read from.
   * @throws IOException If there is an error.
   */
  public void applyToScan(
      final Scan scan,
      final KijiTableLayout tableLayout
  ) throws IOException {
    final Scan newScan = toScan(tableLayout);

    // It's okay to put columns into the Scan that are already there.
    for (Map.Entry<byte[], NavigableSet<byte[]>> columnRequest
             : newScan.getFamilyMap().entrySet()) {
      byte[] family = columnRequest.getKey();
      if (null == columnRequest.getValue()) {
        // Request all columns in the family.
        scan.addFamily(family);
      } else {
        // Calls to Scan.addColumn() will invalidate any previous calls to Scan.addFamily(),
        // so we only do it if:
        //   1. No data from the family has been added to the request yet, OR
        //   2. Only specific columns from the family have been requested so far.
        if (!scan.getFamilyMap().containsKey(family)
            || null != scan.getFamilyMap().get(family)) {
          for (byte[] qualifier : columnRequest.getValue()) {
            scan.addColumn(family, qualifier);
          }
        }
      }
    }
  }

  /**
   * Constructs an HBase Get that describes the data requested in the KijiDataRequest for
   * a particular entity/row.
   *
   * @param entityId The row to build an HBase Get request for.
   * @param tableLayout The layout of the Kiji table to read from.  This is required for
   *     determining the mapping between Kiji columns and HBase columns.
   * @return An HBase Get descriptor.
   * @throws IOException If there is an error.
   */
  public Get toGet(
      final EntityId entityId,
      final KijiTableLayout tableLayout
  ) throws IOException {

    // Context to translate user Kiji filters into HBase filters:
    final KijiColumnFilter.Context filterContext =
        new NameTranslatingFilterContext(mColumnNameTranslator);

    // Get request we are building and returning:
    final Get get = new Get(entityId.getHBaseRowKey());

    // Filters for each requested column: OR(<filter-for-column-1>, <filter-for-column2>, ...)
    final FilterList columnFilters = new FilterList(FilterList.Operator.MUST_PASS_ONE);

    // There's a shortcoming in the HBase API that doesn't allow us to specify per-column
    // filters for timestamp ranges and max versions.  We need to generate a request that
    // will include all versions that we need, and add filters for the individual columns.

    // As of HBase 0.94, the ColumnPaginationFilter, which we had been using to permit per-column
    // maxVersions settings, no longer pages over multiple versions for the same column. We can
    // still use it, however, to limit fully-qualified columns with maxVersions = 1 to return only
    // the most recent version in the request's time range. All other columns will use the largest
    // maxVersions seen on any column request.

    // Fortunately, although we may retrieve more versions per column than we need from HBase, we
    // can still honor the user's requested maxVersions when returning the versions in
    // HBaseKijiRowData.

    // Largest of the max-versions from all the requested columns.
    // Columns with paging are excluded (max-versions does not make sense when paging):
    int largestMaxVersions = 1;

    // If every column is paged, we should add a keyonly filter to a single column, so we can have
    // access to entityIds in our KijiRowData that is constructed.
    boolean completelyPaged = mKijiDataRequest.isPagingEnabled() ? true : false;

    for (KijiDataRequest.Column columnRequest : mKijiDataRequest.getColumns()) {
      final KijiColumnName kijiColumnName = columnRequest.getColumnName();
      final HBaseColumnName hbaseColumnName =
          mColumnNameTranslator.toHBaseColumnName(kijiColumnName);

      if (!columnRequest.isPagingEnabled()) {
        completelyPaged = false;

        // Do not include max-versions from columns with paging enabled:
        largestMaxVersions = Math.max(largestMaxVersions, columnRequest.getMaxVersions());
      }

      if (kijiColumnName.isFullyQualified()) {
        // Requests a fully-qualified column.
        // Adds this column to the Get request, and also as a filter.
        //
        // Filters are required here because we might end up requesting all cells from the
        // HBase family (ie. from the Kiji locality group), if a map-type family from that
        // locality group is also requested.
        addColumn(get, hbaseColumnName);
        columnFilters.addFilter(toFilter(columnRequest, hbaseColumnName, filterContext));

      } else {
        final FamilyLayout fLayout = tableLayout.getFamilyMap().get(kijiColumnName.getFamily());
        if (fLayout.isGroupType()) {
          // Requests all columns in a Kiji group-type family.
          // Expand the family request into individual column requests:
          for (String qualifier : fLayout.getColumnMap().keySet()) {
            final KijiColumnName fqKijiColumnName =
                KijiColumnName.create(kijiColumnName.getFamily(), qualifier);
            final HBaseColumnName fqHBaseColumnName =
                mColumnNameTranslator.toHBaseColumnName(fqKijiColumnName);
            addColumn(get, fqHBaseColumnName);
            columnFilters.addFilter(toFilter(columnRequest, fqHBaseColumnName, filterContext));
          }

        } else if (fLayout.isMapType()) {
          // Requests all columns in a Kiji map-type family.
          // We need to request all columns in the HBase family (ie. in the Kiji locality group)
          // and add a column prefix-filter to select only the columns from that Kiji family:
          get.addFamily(hbaseColumnName.getFamily());
          columnFilters.addFilter(toFilter(columnRequest, hbaseColumnName, filterContext));

        } else {
          throw new InternalKijiError("Family is neither group-type nor map-type");
        }
      }
    }

    if (completelyPaged) {
      // All requested columns have paging enabled.
      Preconditions.checkState(largestMaxVersions == 1);

      // We just need to know whether a row has data in at least one of the requested columns.
      // Stop at the first valid key using AND(columnFilters, FirstKeyOnlyFilter):
      get.setFilter(new FilterList(
          FilterList.Operator.MUST_PASS_ALL, columnFilters, new FirstKeyOnlyFilter()));
    } else {
      get.setFilter(columnFilters);
    }

    return get
        .setTimeRange(mKijiDataRequest.getMinTimestamp(), mKijiDataRequest.getMaxTimestamp())
        .setMaxVersions(largestMaxVersions);
  }

  /**
   * Adds a fully-qualified column to an HBase Get request, if necessary.
   *
   * <p>
   *   If the entire HBase family is already requested, the column does not need to be added.
   * </p>
   *
   * @param get Adds the column to this Get request.
   * @param column Fully-qualified HBase column to add to the Get request.
   * @return the Get request.
   */
  private static Get addColumn(final Get get, final HBaseColumnName column) {
    // Calls to Get.addColumn() invalidate previous calls to Get.addFamily(),
    // so we only do it if:
    //   1. No data from the family has been added to the request yet,
    // OR
    //   2. Only specific columns from the family have been requested so far.
    // Note: the Get family-map uses null values to indicate requests for an entire HBase family.
    if (!get.familySet().contains(column.getFamily())
        || (get.getFamilyMap().get(column.getFamily()) != null)) {
      get.addColumn(column.getFamily(), column.getQualifier());
    }
    return get;
  }

  /**
   * Configures a Scan with the options specified on HBaseScanOptions.
   * Whenever an option is not specified on <code>scanOptions</code>,
   * the hbase default will be used instead.
   *
   * @param scan The Scan to configure.
   * @param scanOptions The options to configure this Scan with.
   */
  private void configureScan(final Scan scan, final HBaseScanOptions scanOptions) {
    if (null != scanOptions.getClientBufferSize()) {
      scan.setBatch(scanOptions.getClientBufferSize());
    }
    if (null != scanOptions.getServerPrefetchSize()) {
      scan.setCaching(scanOptions.getServerPrefetchSize());
    }
    if (null != scanOptions.getCacheBlocks()) {
      scan.setCacheBlocks(scanOptions.getCacheBlocks());
    }
  }

  /**
   * Constructs and returns the HBase filter that returns only the
   * data in a given Kiji column request.
   *
   * @param columnRequest A kiji column request.
   * @param hbaseColumnName HBase column name.
   * @param filterContext Context to translate Kiji column filters to HBase filters.
   * @return An HBase filter that retrieves only the data for the column request.
   * @throws IOException If there is an error.
   */
  private static Filter toFilter(
      final KijiDataRequest.Column columnRequest,
      final HBaseColumnName hbaseColumnName,
      final KijiColumnFilter.Context filterContext
  ) throws IOException {

    final KijiColumnName kijiColumnName = columnRequest.getColumnName();

    // Builds an HBase filter for the specified column:
    //     (HBase-family = Kiji-locality-group)
    // AND (HBase-qualifier = Kiji-family:qualifier / prefixed by Kiji-family:)
    // AND (ColumnPaginationFilter(limit=1))  // when paging or if max-versions is 1
    // AND (custom user filter)
    // AND (FirstKeyOnlyFilter)  // only when paging
    //
    // Note:
    //     We cannot use KeyOnlyFilter as this filter uses Filter.transform() which applies
    //     unconditionally on all the KeyValue in the HBase Result.
    final FilterList filter = new FilterList(FilterList.Operator.MUST_PASS_ALL);

    // Only let cells from the locality-group (ie. HBase family) the column belongs to, ie:
    //     HBase-family = Kiji-locality-group
    filter.addFilter(SchemaPlatformBridge.get().createFamilyFilter(
        CompareFilter.CompareOp.EQUAL,
        hbaseColumnName.getFamily()));

    if (kijiColumnName.isFullyQualified()) {
      // Only let cells from the fully-qualified column ie.:
      //     HBase-qualifier = Kiji-family:qualifier
      filter.addFilter(SchemaPlatformBridge.get().createQualifierFilter(
          CompareFilter.CompareOp.EQUAL,
          hbaseColumnName.getQualifier()));
    } else {
      // Only let cells from the map-type family ie.:
      //     HBase-qualifier starts with "Kiji-family:"
      filter.addFilter(new ColumnPrefixFilter(hbaseColumnName.getQualifier()));
    }

    if (columnRequest.isPagingEnabled()
        || (kijiColumnName.isFullyQualified() && (columnRequest.getMaxVersions() == 1))) {
      // For fully qualified columns where maxVersions = 1, we can use the
      // ColumnPaginationFilter to restrict the number of versions returned to at most 1.
      //
      // Other columns' maxVersions will be filtered client-side in HBaseKijiRowData.
      //
      // Prior to HBase 0.94, we could use this optimization for all fully qualified
      // columns' maxVersions requests, due to different behavior in the
      // ColumnPaginationFilter.
      //
      // Note: we could also use this for a map-type family if max-versions == 1,
      //     by setting limit = Integer.MAX_VALUE.
      final int limit = 1;
      final int offset = 0;
      filter.addFilter(new ColumnPaginationFilter(limit, offset));
    }

    // Add the optional user-specified column filter, if specified:
    if (columnRequest.getFilter() != null) {
      filter.addFilter(
          columnRequest.getFilter().toHBaseFilter(kijiColumnName, filterContext));
    }

    // If column has paging enabled, we just want to know about the existence of a cell:
    if (columnRequest.isPagingEnabled()) {
      filter.addFilter(new FirstKeyOnlyFilter());

      // TODO(SCHEMA-334) KeyOnlyFilter uses Filter.transform() which applies unconditionally.
      //     There is a chance that Filter.transform() may apply conditionally in the future,
      //     in which case we may re-introduce the KeyOnlyFilter.
      //     An alternative is to provide a custom HBase filter to handle Kiji data requests
      //     efficiently.
    }

    return filter;
  }

  /**
   * A Context for KijiColumnFilters that translates column names to their HBase
   * representation.
   */
  public static final class NameTranslatingFilterContext extends KijiColumnFilter.Context {
    /** The translator to use. */
    private final HBaseColumnNameTranslator mTranslator;

    /**
     * Initialize this context with the specified column name translator.
     *
     * @param translator the translator to use.
     */
    public NameTranslatingFilterContext(final HBaseColumnNameTranslator translator) {
      mTranslator = translator;
    }

    /** {@inheritDoc} */
    @Override
    public HBaseColumnName getHBaseColumnName(
        final KijiColumnName kijiColumnName
    ) throws NoSuchColumnException {
      return mTranslator.toHBaseColumnName(kijiColumnName);
    }
  }
}
TOP

Related Classes of org.kiji.schema.impl.hbase.HBaseDataRequestAdapter$NameTranslatingFilterContext

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.