Package org.apache.pig.backend.hadoop.hbase

Source Code of org.apache.pig.backend.hadoop.hbase.HBaseTableInputFormat$HBaseTableIFBuilder

/* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.pig.backend.hadoop.hbase;

import java.io.IOException;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.util.List;
import java.util.ListIterator;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.BinaryComparator;
import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
import org.apache.hadoop.hbase.filter.RowFilter;
import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
import org.apache.hadoop.hbase.mapreduce.TableRecordReader;
import org.apache.hadoop.hbase.mapreduce.TableSplit;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.mapreduce.InputSplit;

public class HBaseTableInputFormat extends TableInputFormat {
    private static final Log LOG = LogFactory.getLog(HBaseTableInputFormat.class);

    protected final byte[] gt_;
    protected final byte[] gte_;
    protected final byte[] lt_;
    protected final byte[] lte_;

    public HBaseTableInputFormat() {
        this(-1, null, null, null, null);
    }

    protected HBaseTableInputFormat(long limit, byte[] gt, byte[] gte, byte[] lt, byte[] lte) {
        super();
        setTableRecordReader(new HBaseTableRecordReader(limit));
        gt_ = gt;
        gte_ = gte;
        lt_ = lt;
        lte_ = lte;
    }

    public static class HBaseTableIFBuilder {
        protected byte[] gt_;
        protected byte[] gte_;
        protected byte[] lt_;
        protected byte[] lte_;
        protected long limit_;
        protected Configuration conf_;

        public HBaseTableIFBuilder withGt(byte[] gt) { gt_ = gt; return this; }
        public HBaseTableIFBuilder withGte(byte[] gte) { gte_ = gte; return this; }
        public HBaseTableIFBuilder withLt(byte[] lt) { lt_ = lt; return this; }
        public HBaseTableIFBuilder withLte(byte[] lte) { lte_ = lte; return this; }
        public HBaseTableIFBuilder withLimit(long limit) { limit_ = limit; return this; }
        public HBaseTableIFBuilder withConf(Configuration conf) { conf_ = conf; return this; }

        public HBaseTableInputFormat build() {
            HBaseTableInputFormat inputFormat = new HBaseTableInputFormat(limit_, gt_, gte_, lt_, lte_);
            if (conf_ != null) inputFormat.setConf(conf_);
            return inputFormat;
        }

    }

    @Override
    public List<InputSplit> getSplits(org.apache.hadoop.mapreduce.JobContext context)
    throws IOException {
        List<InputSplit> splits = super.getSplits(context);
        ListIterator<InputSplit> splitIter = splits.listIterator();
        LOG.info("Got " + splits.size() + " splits.");
        while (splitIter.hasNext()) {
            TableSplit split = (TableSplit) splitIter.next();
            byte[] startKey = split.getStartRow();
            byte[] endKey = split.getEndRow();
            // Skip if the region doesn't satisfy configured options.
            if ((skipRegion(CompareOp.LESS, startKey, lt_)) ||
                    (skipRegion(CompareOp.GREATER, endKey, gt_)) ||
                    (skipRegion(CompareOp.GREATER, endKey, gte_)) ||
                    (skipRegion(CompareOp.LESS_OR_EQUAL, startKey, lte_)) )  {
              LOG.info("Removing split " + split + " " + Bytes.toStringBinary(startKey) + " - " + Bytes.toStringBinary(endKey));
                splitIter.remove();
            }
        }
        LOG.info("Returning " + splits.size() + " splits.");
        return splits;
    }

    private boolean skipRegion(CompareOp op, byte[] key, byte[] option ) {

        if (key.length == 0 || option == null)
            return false;

        BinaryComparator comp = new BinaryComparator(option);
        RowFilter rowFilter = new RowFilter(op, comp);
        return rowFilter.filterRowKey(key, 0, key.length);
    }

    protected class HBaseTableRecordReader extends TableRecordReader {

        private long recordsSeen = 0;
        private final long limit_;
        private byte[] startRow_;
        private byte[] endRow_;
        private transient byte[] currRow_;

        private BigInteger bigStart_;
        private BigInteger bigEnd_;
        private BigDecimal bigRange_;
        private transient float progressSoFar_ = 0;

        public HBaseTableRecordReader(long limit) {
            limit_ = limit;
        }

        @Override
        public void setScan(Scan scan) {
            super.setScan(scan);

            startRow_ = scan.getStartRow();
            endRow_ = scan.getStopRow();
            byte[] startPadded;
            byte[] endPadded;
            if (startRow_.length < endRow_.length) {
                startPadded = Bytes.padTail(startRow_, endRow_.length - startRow_.length);
                endPadded = endRow_;
            } else if (endRow_.length < startRow_.length) {
                startPadded = startRow_;
                endPadded = Bytes.padTail(endRow_, startRow_.length - endRow_.length);
            } else {
                startPadded = startRow_;
                endPadded = endRow_;
            }
            currRow_ = startRow_;
            byte [] prependHeader = {1, 0};
            bigStart_ = new BigInteger(Bytes.add(prependHeader, startPadded));
            bigEnd_ = new BigInteger(Bytes.add(prependHeader, endPadded));
            bigRange_ = new BigDecimal(bigEnd_.subtract(bigStart_));
            LOG.info("setScan with ranges: " + bigStart_ + " - " + bigEnd_ + " ( " + bigRange_ + ")");
        }

        @Override
        public boolean nextKeyValue() throws IOException, InterruptedException {
            if (limit_ > 0 && ++recordsSeen > limit_) {
                return false;
            }
            boolean hasMore = super.nextKeyValue();
            if (hasMore) {
                currRow_ = getCurrentKey().get();
            }
            return hasMore;

        }

        @Override
        public float getProgress() {
            if (currRow_ == null || currRow_.length == 0 || endRow_.length == 0 || endRow_ == HConstants.LAST_ROW) {
                return 0;
            }
            byte[] lastPadded = currRow_;
            if (currRow_.length < endRow_.length) {
                lastPadded = Bytes.padTail(currRow_, endRow_.length - currRow_.length);
            }
            if (currRow_.length < startRow_.length) {
                lastPadded = Bytes.padTail(currRow_, startRow_.length - currRow_.length);
            }
            byte [] prependHeader = {1, 0};
            BigInteger bigLastRow = new BigInteger(Bytes.add(prependHeader, lastPadded));
            if (bigLastRow.compareTo(bigEnd_) > 0) {
                return progressSoFar_;
            }
            BigDecimal processed = new BigDecimal(bigLastRow.subtract(bigStart_));
            try {
                BigDecimal progress = processed.setScale(3).divide(bigRange_, BigDecimal.ROUND_HALF_DOWN);
                progressSoFar_ = progress.floatValue();
                return progressSoFar_;
            } catch (java.lang.ArithmeticException e) {
                return 0;
            }
        }

    }
}
TOP

Related Classes of org.apache.pig.backend.hadoop.hbase.HBaseTableInputFormat$HBaseTableIFBuilder

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.