Package org.apache.hcatalog.hbase

Source Code of org.apache.hcatalog.hbase.HBaseHCatStorageHandler

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hcatalog.hbase;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;

import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.MasterNotRunningException;
import org.apache.hadoop.hbase.ZooKeeperConnectionException;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.mapred.TableOutputFormat;
import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hive.hbase.HBaseSerDe;
import org.apache.hadoop.hive.metastore.HiveMetaHook;
import org.apache.hadoop.hive.metastore.MetaStoreUtils;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider;
import org.apache.hadoop.hive.serde2.SerDe;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.OutputFormat;
import org.apache.hadoop.util.StringUtils;
import org.apache.hcatalog.common.HCatConstants;
import org.apache.hcatalog.common.HCatUtil;
import org.apache.hcatalog.data.schema.HCatSchema;
import org.apache.hcatalog.hbase.HBaseBulkOutputFormat.HBaseBulkOutputCommitter;
import org.apache.hcatalog.hbase.HBaseDirectOutputFormat.HBaseDirectOutputCommitter;
import org.apache.hcatalog.hbase.snapshot.RevisionManager;
import org.apache.hcatalog.hbase.snapshot.Transaction;
import org.apache.hcatalog.mapreduce.HCatOutputFormat;
import org.apache.hcatalog.mapreduce.HCatTableInfo;
import org.apache.hcatalog.mapreduce.InputJobInfo;
import org.apache.hcatalog.mapreduce.OutputJobInfo;
import org.apache.hcatalog.mapreduce.HCatStorageHandler;
import org.apache.thrift.TBase;
import org.apache.zookeeper.ZooKeeper;

import com.facebook.fb303.FacebookBase;
import com.google.common.util.concurrent.ThreadFactoryBuilder;

/**
* This class HBaseHCatStorageHandler provides functionality to create HBase
* tables through HCatalog. The implementation is very similar to the
* HiveHBaseStorageHandler, with more details to suit HCatalog.
*/
public class HBaseHCatStorageHandler extends HCatStorageHandler implements HiveMetaHook, Configurable {

    public final static String DEFAULT_PREFIX = "default.";
    private final static String PROPERTY_INT_OUTPUT_LOCATION = "hcat.hbase.mapreduce.intermediateOutputLocation";

    private Configuration hbaseConf;
    private Configuration jobConf;
    private HBaseAdmin admin;

    @Override
    public void configureInputJobProperties(TableDesc tableDesc, Map<String, String> jobProperties) {
        // Populate jobProperties with input table name, table columns, RM snapshot,
        // hbase-default.xml and hbase-site.xml
        Map<String, String> tableJobProperties = tableDesc.getJobProperties();
        String jobString = tableJobProperties.get(HCatConstants.HCAT_KEY_JOB_INFO);
        try {
            InputJobInfo inputJobInfo = (InputJobInfo) HCatUtil.deserialize(jobString);
            HCatTableInfo tableInfo = inputJobInfo.getTableInfo();
            String qualifiedTableName = HBaseHCatStorageHandler.getFullyQualifiedHBaseTableName(tableInfo);
            jobProperties.put(TableInputFormat.INPUT_TABLE, qualifiedTableName);

            Configuration jobConf = getJobConf();
            addHbaseResources(jobConf, jobProperties);
            JobConf copyOfConf = new JobConf(jobConf);
            HBaseConfiguration.addHbaseResources(copyOfConf);
            //Getting hbase delegation token in getInputSplits does not work with PIG. So need to
            //do it here
            if (jobConf instanceof JobConf) { //Should be the case
                HBaseUtil.addHBaseDelegationToken(copyOfConf);
                ((JobConf)jobConf).getCredentials().addAll(copyOfConf.getCredentials());
            }

            String outputSchema = jobConf.get(HCatConstants.HCAT_KEY_OUTPUT_SCHEMA);
            jobProperties.put(TableInputFormat.SCAN_COLUMNS, getScanColumns(tableInfo, outputSchema));

            String serSnapshot = (String) inputJobInfo.getProperties().get(
                    HBaseConstants.PROPERTY_TABLE_SNAPSHOT_KEY);
            if (serSnapshot == null) {
                HCatTableSnapshot snapshot = HBaseRevisionManagerUtil.createSnapshot(copyOfConf,
                        qualifiedTableName, tableInfo);
                jobProperties.put(HBaseConstants.PROPERTY_TABLE_SNAPSHOT_KEY,
                        HCatUtil.serialize(snapshot));
            }

            //This adds it directly to the jobConf. Setting in jobProperties does not get propagated
            //to JobConf as of now as the jobProperties is maintained per partition
            //TODO: Remove when HCAT-308 is fixed
            addOutputDependencyJars(jobConf);
            jobProperties.put("tmpjars", jobConf.get("tmpjars"));

        } catch (IOException e) {
            throw new IllegalStateException("Error while configuring job properties", e);
        }
    }

    @Override
    public void configureOutputJobProperties(TableDesc tableDesc, Map<String, String> jobProperties) {
        // Populate jobProperties with output table name, hbase-default.xml, hbase-site.xml, OutputJobInfo
        // Populate RM transaction in OutputJobInfo
        // In case of bulk mode, populate intermediate output location
        Map<String, String> tableJobProperties = tableDesc.getJobProperties();
        String jobString = tableJobProperties.get(HCatConstants.HCAT_KEY_OUTPUT_INFO);
        try {
            OutputJobInfo outputJobInfo = (OutputJobInfo) HCatUtil.deserialize(jobString);
            HCatTableInfo tableInfo = outputJobInfo.getTableInfo();
            String qualifiedTableName = HBaseHCatStorageHandler.getFullyQualifiedHBaseTableName(tableInfo);
            jobProperties.put(HBaseConstants.PROPERTY_OUTPUT_TABLE_NAME_KEY, qualifiedTableName);
            jobProperties.put(TableOutputFormat.OUTPUT_TABLE, qualifiedTableName);

            Configuration jobConf = getJobConf();
            addHbaseResources(jobConf, jobProperties);

            Configuration copyOfConf = new Configuration(jobConf);
            HBaseConfiguration.addHbaseResources(copyOfConf);

            String txnString = outputJobInfo.getProperties().getProperty(
                    HBaseConstants.PROPERTY_WRITE_TXN_KEY);
            Transaction txn = null;
            if (txnString == null) {
                txn = HBaseRevisionManagerUtil.beginWriteTransaction(qualifiedTableName, tableInfo, copyOfConf);
                String serializedTxn = HCatUtil.serialize(txn);
                outputJobInfo.getProperties().setProperty(HBaseConstants.PROPERTY_WRITE_TXN_KEY,
                        serializedTxn);
            } else {
                txn = (Transaction) HCatUtil.deserialize(txnString);
            }
            if (isBulkMode(outputJobInfo)) {
                String tableLocation = tableInfo.getTableLocation();
                String location = new Path(tableLocation, "REVISION_" + txn.getRevisionNumber())
                        .toString();
                outputJobInfo.getProperties().setProperty(PROPERTY_INT_OUTPUT_LOCATION, location);
                // We are writing out an intermediate sequenceFile hence
                // location is not passed in OutputJobInfo.getLocation()
                // TODO replace this with a mapreduce constant when available
                jobProperties.put("mapred.output.dir", location);
                jobProperties.put("mapred.output.committer.class", HBaseBulkOutputCommitter.class.getName());
            } else {
                jobProperties.put("mapred.output.committer.class", HBaseDirectOutputCommitter.class.getName());
            }

            jobProperties.put(HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(outputJobInfo));
            addOutputDependencyJars(jobConf);
            jobProperties.put("tmpjars", jobConf.get("tmpjars"));

        } catch (IOException e) {
            throw new IllegalStateException("Error while configuring job properties", e);
        }
    }

    /*
    * @return instance of HiveAuthorizationProvider
    *
    * @throws HiveException
    *
    * @see org.apache.hcatalog.storagehandler.HCatStorageHandler#
    * getAuthorizationProvider()
    */
    @Override
    public HiveAuthorizationProvider getAuthorizationProvider()
            throws HiveException {

        HBaseAuthorizationProvider hbaseAuth = new HBaseAuthorizationProvider();
        hbaseAuth.init(getConf());
        return hbaseAuth;
    }

    /*
     * @param table
     *
     * @throws MetaException
     *
     * @see org.apache.hcatalog.storagehandler.HCatStorageHandler
     * #commitCreateTable(org.apache.hadoop.hive.metastore.api.Table)
     */
    @Override
    public void commitCreateTable(Table table) throws MetaException {
    }

    /*
     * @param instance of table
     *
     * @param deleteData
     *
     * @throws MetaException
     *
     * @see org.apache.hcatalog.storagehandler.HCatStorageHandler
     * #commitDropTable(org.apache.hadoop.hive.metastore.api.Table, boolean)
     */
    @Override
    public void commitDropTable(Table tbl, boolean deleteData)
            throws MetaException {
        checkDeleteTable(tbl);

    }

    /*
     * @param instance of table
     *
     * @throws MetaException
     *
     * @see org.apache.hcatalog.storagehandler.HCatStorageHandler
     * #preCreateTable(org.apache.hadoop.hive.metastore.api.Table)
     */
    @Override
    public void preCreateTable(Table tbl) throws MetaException {
        boolean isExternal = MetaStoreUtils.isExternalTable(tbl);

        hbaseConf = getConf();

        if (tbl.getSd().getLocation() != null) {
            throw new MetaException("LOCATION may not be specified for HBase.");
        }

        try {
            String tableName = getFullyQualifiedHBaseTableName(tbl);
            String hbaseColumnsMapping = tbl.getParameters().get(
                    HBaseSerDe.HBASE_COLUMNS_MAPPING);

            if (hbaseColumnsMapping == null) {
                throw new MetaException(
                        "No hbase.columns.mapping defined in table"
                                + " properties.");
            }

            List<String> hbaseColumnFamilies = new ArrayList<String>();
            List<String> hbaseColumnQualifiers = new ArrayList<String>();
            List<byte[]> hbaseColumnFamiliesBytes = new ArrayList<byte[]>();
            int iKey = HBaseUtil.parseColumnMapping(hbaseColumnsMapping,
                    hbaseColumnFamilies, hbaseColumnFamiliesBytes,
                    hbaseColumnQualifiers, null);

            HTableDescriptor tableDesc;
            Set<String> uniqueColumnFamilies = new HashSet<String>();
            if (!getHBaseAdmin().tableExists(tableName)) {
                // if it is not an external table then create one
                if (!isExternal) {
                    // Create the column descriptors
                    tableDesc = new HTableDescriptor(tableName);
                    uniqueColumnFamilies.addAll(hbaseColumnFamilies);
                    uniqueColumnFamilies.remove(hbaseColumnFamilies.get(iKey));

                    for (String columnFamily : uniqueColumnFamilies) {
                        HColumnDescriptor familyDesc = new HColumnDescriptor(Bytes
                                .toBytes(columnFamily));
                        familyDesc.setMaxVersions(Integer.MAX_VALUE);
                        tableDesc.addFamily(familyDesc);
                    }

                    getHBaseAdmin().createTable(tableDesc);
                } else {
                    // an external table
                    throw new MetaException("HBase table " + tableName
                            + " doesn't exist while the table is "
                            + "declared as an external table.");
                }

            } else {
                if (!isExternal) {
                    throw new MetaException("Table " + tableName
                            + " already exists within HBase."
                            + " Use CREATE EXTERNAL TABLE instead to"
                            + " register it in HCatalog.");
                }
                // make sure the schema mapping is right
                tableDesc = getHBaseAdmin().getTableDescriptor(
                        Bytes.toBytes(tableName));

                for (int i = 0; i < hbaseColumnFamilies.size(); i++) {
                    if (i == iKey) {
                        continue;
                    }

                    if (!tableDesc.hasFamily(hbaseColumnFamiliesBytes.get(i))) {
                        throw new MetaException("Column Family "
                                + hbaseColumnFamilies.get(i)
                                + " is not defined in hbase table " + tableName);
                    }
                }
            }

            // ensure the table is online
            new HTable(hbaseConf, tableDesc.getName());

            //Set up table in revision manager.
            RevisionManager rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(hbaseConf);
            rm.createTable(tableName, new ArrayList<String>(uniqueColumnFamilies));

        } catch (MasterNotRunningException mnre) {
            throw new MetaException(StringUtils.stringifyException(mnre));
        } catch (IOException ie) {
            throw new MetaException(StringUtils.stringifyException(ie));
        } catch (IllegalArgumentException iae) {
            throw new MetaException(StringUtils.stringifyException(iae));
        }

    }

    /*
     * @param table
     *
     * @throws MetaException
     *
     * @see org.apache.hcatalog.storagehandler.HCatStorageHandler
     * #preDropTable(org.apache.hadoop.hive.metastore.api.Table)
     */
    @Override
    public void preDropTable(Table table) throws MetaException {
    }

    /*
     * @param table
     *
     * @throws MetaException
     *
     * @see org.apache.hcatalog.storagehandler.HCatStorageHandler
     * #rollbackCreateTable(org.apache.hadoop.hive.metastore.api.Table)
     */
    @Override
    public void rollbackCreateTable(Table table) throws MetaException {
        checkDeleteTable(table);
    }

    /*
     * @param table
     *
     * @throws MetaException
     *
     * @see org.apache.hcatalog.storagehandler.HCatStorageHandler
     * #rollbackDropTable(org.apache.hadoop.hive.metastore.api.Table)
     */
    @Override
    public void rollbackDropTable(Table table) throws MetaException {
    }

    /*
     * @return instance of HiveMetaHook
     *
     * @see org.apache.hcatalog.storagehandler.HCatStorageHandler#getMetaHook()
     */
    @Override
    public HiveMetaHook getMetaHook() {
        return this;
    }

    private HBaseAdmin getHBaseAdmin() throws MetaException {
        try {
            if (admin == null) {
                admin = new HBaseAdmin(this.getConf());
            }
            return admin;
        } catch (MasterNotRunningException mnre) {
            throw new MetaException(StringUtils.stringifyException(mnre));
        } catch (ZooKeeperConnectionException zkce) {
            throw new MetaException(StringUtils.stringifyException(zkce));
        }
    }

    private String getFullyQualifiedHBaseTableName(Table tbl) {
        String tableName = tbl.getParameters().get(HBaseSerDe.HBASE_TABLE_NAME);
        if (tableName == null) {
            tableName = tbl.getSd().getSerdeInfo().getParameters()
                    .get(HBaseSerDe.HBASE_TABLE_NAME);
        }
        if (tableName == null) {
            if (tbl.getDbName().equals(MetaStoreUtils.DEFAULT_DATABASE_NAME)) {
                tableName = tbl.getTableName();
            } else {
                tableName = tbl.getDbName() + "." + tbl.getTableName();
            }
            tableName = tableName.toLowerCase();
        }
        return tableName;
    }

    static String getFullyQualifiedHBaseTableName(HCatTableInfo tableInfo){
        String qualifiedName = tableInfo.getStorerInfo().getProperties()
                .getProperty(HBaseSerDe.HBASE_TABLE_NAME);
        if (qualifiedName == null) {
            String databaseName = tableInfo.getDatabaseName();
            String tableName = tableInfo.getTableName();
            if ((databaseName == null)
                    || (databaseName.equals(MetaStoreUtils.DEFAULT_DATABASE_NAME))) {
                qualifiedName = tableName;
            } else {
                qualifiedName = databaseName + "." + tableName;
            }
            qualifiedName = qualifiedName.toLowerCase();
        }
        return qualifiedName;
    }

    @Override
    public Class<? extends InputFormat> getInputFormatClass() {
        return HBaseInputFormat.class;
    }

    @Override
    public Class<? extends OutputFormat> getOutputFormatClass() {
        return HBaseBaseOutputFormat.class;
    }

    /*
    * @return subclass of SerDe
    *
    * @throws UnsupportedOperationException
    *
    * @see
    * org.apache.hcatalog.storagehandler.HCatStorageHandler#getSerDeClass()
    */
    @Override
    public Class<? extends SerDe> getSerDeClass()
            throws UnsupportedOperationException {
        return HBaseSerDe.class;
    }

    public Configuration getJobConf() {
        return jobConf;
    }

    @Override
    public Configuration getConf() {

        if (hbaseConf == null) {
            hbaseConf = HBaseConfiguration.create();
        }
        return hbaseConf;
    }

    @Override
    public void setConf(Configuration conf) {
        //setConf is called both during DDL operations and  mapred read/write jobs.
        //Creating a copy of conf for DDL and adding hbase-default and hbase-site.xml to it.
        //For jobs, maintaining a reference instead of cloning as we need to
        //  1) add hbase delegation token to the Credentials.
        //  2) set tmpjars on it. Putting in jobProperties does not get propagated to JobConf
        //     in case of InputFormat as they are maintained per partition.
        //Not adding hbase-default.xml and hbase-site.xml to jobConf as it will override any
        //hbase properties set in the JobConf by the user. In configureInputJobProperties and
        //configureOutputJobProperties, we take care of adding the default properties
        //that are not already present. TODO: Change to a copy for jobs after HCAT-308 is fixed.
        jobConf = conf;
        hbaseConf = HBaseConfiguration.create(conf);
    }

    private void checkDeleteTable(Table table) throws MetaException {
        boolean isExternal = MetaStoreUtils.isExternalTable(table);
        String tableName = getFullyQualifiedHBaseTableName(table);
        RevisionManager rm = null;
        try {
            if (!isExternal && getHBaseAdmin().tableExists(tableName)) {
                // we have created an HBase table, so we delete it to roll back;
                if (getHBaseAdmin().isTableEnabled(tableName)) {
                    getHBaseAdmin().disableTable(tableName);
                }
                getHBaseAdmin().deleteTable(tableName);

                //Drop table in revision manager.
                rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(hbaseConf);
                rm.dropTable(tableName);
            }
        } catch (IOException ie) {
            throw new MetaException(StringUtils.stringifyException(ie));
        } finally {
            HBaseRevisionManagerUtil.closeRevisionManagerQuietly(rm);
        }
    }

    /**
     * Helper method for users to add the required depedency jars to distributed cache.
     * @param conf
     * @throws IOException
     */
    private void addOutputDependencyJars(Configuration conf) throws IOException {
        TableMapReduceUtil.addDependencyJars(conf,
                //ZK
                ZooKeeper.class,
                //HBase
                HTable.class,
                //Hive
                HiveException.class,
                //HCatalog jar
                HCatOutputFormat.class,
                //hcat hbase storage handler jar
                HBaseHCatStorageHandler.class,
                //hive hbase storage handler jar
                HBaseSerDe.class,
                //hive jar
                Table.class,
                //libthrift jar
                TBase.class,
                //hbase jar
                Bytes.class,
                //thrift-fb303 .jar
                FacebookBase.class,
                //guava jar
                ThreadFactoryBuilder.class);
    }

    /**
     * Utility method to add hbase-default.xml and hbase-site.xml properties to a new map
     * if they are not already present in the jobConf.
     * @param jobConf Job configuration
     * @param newJobProperties  Map to which new properties should be added
     */
    private void addHbaseResources(Configuration jobConf,
            Map<String, String> newJobProperties) {
        Configuration conf = new Configuration(false);
        HBaseConfiguration.addHbaseResources(conf);
        for (Entry<String, String> entry : conf) {
            if (jobConf.get(entry.getKey()) == null)
                newJobProperties.put(entry.getKey(), entry.getValue());
        }
    }

    public static boolean isBulkMode(OutputJobInfo outputJobInfo) {
        //Default is false
        String bulkMode = outputJobInfo.getTableInfo().getStorerInfo().getProperties()
                .getProperty(HBaseConstants.PROPERTY_BULK_OUTPUT_MODE_KEY,
                        "false");
        return "true".equals(bulkMode);
    }

    private String getScanColumns(HCatTableInfo tableInfo, String outputColSchema) throws IOException {
        StringBuilder builder = new StringBuilder();
        String hbaseColumnMapping = tableInfo.getStorerInfo().getProperties()
                .getProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING);
        if (outputColSchema == null) {
            String[] splits = hbaseColumnMapping.split("[,]");
            for (int i = 0; i < splits.length; i++) {
                if (!splits[i].equals(HBaseSerDe.HBASE_KEY_COL))
                    builder.append(splits[i]).append(" ");
            }
        } else {
            HCatSchema outputSchema = (HCatSchema) HCatUtil.deserialize(outputColSchema);
            HCatSchema tableSchema = tableInfo.getDataColumns();
            List<String> outputFieldNames = outputSchema.getFieldNames();
            List<Integer> outputColumnMapping = new ArrayList<Integer>();
            for(String fieldName: outputFieldNames){
                int position = tableSchema.getPosition(fieldName);
                outputColumnMapping.add(position);
            }
            List<String> columnFamilies = new ArrayList<String>();
            List<String> columnQualifiers = new ArrayList<String>();
            HBaseUtil.parseColumnMapping(hbaseColumnMapping, columnFamilies, null,
                    columnQualifiers, null);
            for (int i = 0; i < outputColumnMapping.size(); i++) {
                int cfIndex = outputColumnMapping.get(i);
                String cf = columnFamilies.get(cfIndex);
                // We skip the key column.
                if (cf.equals(HBaseSerDe.HBASE_KEY_COL) == false) {
                    String qualifier = columnQualifiers.get(i);
                    builder.append(cf);
                    builder.append(":");
                    if (qualifier != null) {
                        builder.append(qualifier);
                    }
                    builder.append(" ");
                }
            }
        }
        //Remove the extra space delimiter
        builder.deleteCharAt(builder.length() - 1);
        return builder.toString();
    }

}
TOP

Related Classes of org.apache.hcatalog.hbase.HBaseHCatStorageHandler

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.