Package org.apache.hcatalog.hbase

Source Code of org.apache.hcatalog.hbase.HBaseInputFormat

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.hcatalog.hbase;

import java.io.IOException;
import java.util.List;

import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapred.TableSplit;
import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
import org.apache.hadoop.mapred.HCatMapRedUtil;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hcatalog.common.HCatConstants;
import org.apache.hcatalog.common.HCatUtil;
import org.apache.hcatalog.mapreduce.InputJobInfo;

/**
* This class HBaseInputFormat is a wrapper class of TableInputFormat in HBase.
*/
class HBaseInputFormat implements InputFormat<ImmutableBytesWritable, Result> {

    private final TableInputFormat inputFormat;

    public HBaseInputFormat() {
        inputFormat = new TableInputFormat();
    }

    /*
     * @param instance of InputSplit
     *
     * @param instance of TaskAttemptContext
     *
     * @return RecordReader
     *
     * @throws IOException
     *
     * @throws InterruptedException
     *
     * @see
     * org.apache.hadoop.mapreduce.InputFormat#createRecordReader(org.apache
     * .hadoop.mapreduce.InputSplit,
     * org.apache.hadoop.mapreduce.TaskAttemptContext)
     */
    @Override
    public RecordReader<ImmutableBytesWritable, Result> getRecordReader(
        InputSplit split, JobConf job, Reporter reporter)
        throws IOException {
        String jobString = job.get(HCatConstants.HCAT_KEY_JOB_INFO);
        InputJobInfo inputJobInfo = (InputJobInfo) HCatUtil.deserialize(jobString);

        String tableName = job.get(TableInputFormat.INPUT_TABLE);
        TableSplit tSplit = (TableSplit) split;
        HbaseSnapshotRecordReader recordReader = new HbaseSnapshotRecordReader(inputJobInfo, job);
        inputFormat.setConf(job);
        Scan inputScan = inputFormat.getScan();
        // TODO: Make the caching configurable by the user
        inputScan.setCaching(200);
        inputScan.setCacheBlocks(false);
        Scan sc = new Scan(inputScan);
        sc.setStartRow(tSplit.getStartRow());
        sc.setStopRow(tSplit.getEndRow());
        recordReader.setScan(sc);
        recordReader.setHTable(new HTable(job, tableName));
        recordReader.init();
        return recordReader;
    }

    /*
     * @param jobContext
     *
     * @return List of InputSplit
     *
     * @throws IOException
     *
     * @throws InterruptedException
     *
     * @see
     * org.apache.hadoop.mapreduce.InputFormat#getSplits(org.apache.hadoop.mapreduce
     * .JobContext)
     */
    @Override
    public org.apache.hadoop.mapred.InputSplit[] getSplits(JobConf job, int numSplits)
        throws IOException {
        inputFormat.setConf(job);
        return convertSplits(inputFormat.getSplits(HCatMapRedUtil.createJobContext(job, null,
            Reporter.NULL)));
    }

    private InputSplit[] convertSplits(List<org.apache.hadoop.mapreduce.InputSplit> splits) {
        InputSplit[] converted = new InputSplit[splits.size()];
        for (int i = 0; i < splits.size(); i++) {
            org.apache.hadoop.hbase.mapreduce.TableSplit tableSplit =
                (org.apache.hadoop.hbase.mapreduce.TableSplit) splits.get(i);
            TableSplit newTableSplit = new TableSplit(tableSplit.getTableName(),
                tableSplit.getStartRow(),
                tableSplit.getEndRow(), tableSplit.getRegionLocation());
            converted[i] = newTableSplit;
        }
        return converted;
    }

}
TOP

Related Classes of org.apache.hcatalog.hbase.HBaseInputFormat

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.