Package org.apache.hcatalog.common

Source Code of org.apache.hcatalog.common.HCatUtil

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.hcatalog.common;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Properties;

import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.hive.common.JavaUtils;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
import org.apache.hadoop.hive.metastore.MetaStoreUtils;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
import org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat;
import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.hive.thrift.DelegationTokenIdentifier;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenIdentifier;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hcatalog.data.Pair;
import org.apache.hcatalog.data.schema.HCatFieldSchema;
import org.apache.hcatalog.data.schema.HCatSchema;
import org.apache.hcatalog.data.schema.HCatSchemaUtils;
import org.apache.hcatalog.mapreduce.FosterStorageHandler;
import org.apache.hcatalog.mapreduce.HCatOutputFormat;
import org.apache.hcatalog.mapreduce.HCatStorageHandler;
import org.apache.hcatalog.mapreduce.InputJobInfo;
import org.apache.hcatalog.mapreduce.OutputJobInfo;
import org.apache.hcatalog.mapreduce.PartInfo;
import org.apache.hcatalog.mapreduce.StorerInfo;
import org.apache.thrift.TException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.security.auth.login.LoginException;

public class HCatUtil {

    private static final Logger LOG = LoggerFactory.getLogger(HCatUtil.class);
    private static volatile HiveClientCache hiveClientCache;
    private final static int DEFAULT_HIVE_CACHE_EXPIRY_TIME_SECONDS = 2 * 60;

    public static boolean checkJobContextIfRunningFromBackend(JobContext j) {
        if (j.getConfiguration().get("mapred.task.id", "").equals("") &&
                !("true".equals(j.getConfiguration().get("pig.illustrating")))) {
            return false;
        }
        return true;
    }

    public static String serialize(Serializable obj) throws IOException {
        if (obj == null) {
            return "";
        }
        try {
            ByteArrayOutputStream serialObj = new ByteArrayOutputStream();
            ObjectOutputStream objStream = new ObjectOutputStream(serialObj);
            objStream.writeObject(obj);
            objStream.close();
            return encodeBytes(serialObj.toByteArray());
        } catch (Exception e) {
            throw new IOException("Serialization error: " + e.getMessage(), e);
        }
    }

    public static Object deserialize(String str) throws IOException {
        if (str == null || str.length() == 0) {
            return null;
        }
        try {
            ByteArrayInputStream serialObj = new ByteArrayInputStream(
                decodeBytes(str));
            ObjectInputStream objStream = new ObjectInputStream(serialObj);
            return objStream.readObject();
        } catch (Exception e) {
            throw new IOException("Deserialization error: " + e.getMessage(), e);
        }
    }

    public static String encodeBytes(byte[] bytes) {
        StringBuffer strBuf = new StringBuffer();

        for (int i = 0; i < bytes.length; i++) {
            strBuf.append((char) (((bytes[i] >> 4) & 0xF) + ('a')));
            strBuf.append((char) (((bytes[i]) & 0xF) + ('a')));
        }

        return strBuf.toString();
    }

    public static byte[] decodeBytes(String str) {
        byte[] bytes = new byte[str.length() / 2];
        for (int i = 0; i < str.length(); i += 2) {
            char c = str.charAt(i);
            bytes[i / 2] = (byte) ((c - 'a') << 4);
            c = str.charAt(i + 1);
            bytes[i / 2] += (c - 'a');
        }
        return bytes;
    }

    public static List<HCatFieldSchema> getHCatFieldSchemaList(
        FieldSchema... fields) throws HCatException {
        List<HCatFieldSchema> result = new ArrayList<HCatFieldSchema>(
            fields.length);

        for (FieldSchema f : fields) {
            result.add(HCatSchemaUtils.getHCatFieldSchema(f));
        }

        return result;
    }

    public static List<HCatFieldSchema> getHCatFieldSchemaList(
        List<FieldSchema> fields) throws HCatException {
        if (fields == null) {
            return null;
        } else {
            List<HCatFieldSchema> result = new ArrayList<HCatFieldSchema>();
            for (FieldSchema f : fields) {
                result.add(HCatSchemaUtils.getHCatFieldSchema(f));
            }
            return result;
        }
    }

    public static HCatSchema extractSchema(Table table) throws HCatException {
        return new HCatSchema(HCatUtil.getHCatFieldSchemaList(table.getCols()));
    }

    public static HCatSchema extractSchema(Partition partition) throws HCatException {
        return new HCatSchema(HCatUtil.getHCatFieldSchemaList(partition.getCols()));
    }

    public static List<FieldSchema> getFieldSchemaList(
        List<HCatFieldSchema> hcatFields) {
        if (hcatFields == null) {
            return null;
        } else {
            List<FieldSchema> result = new ArrayList<FieldSchema>();
            for (HCatFieldSchema f : hcatFields) {
                result.add(HCatSchemaUtils.getFieldSchema(f));
            }
            return result;
        }
    }

    public static Table getTable(HiveMetaStoreClient client, String dbName, String tableName)
        throws NoSuchObjectException, TException, MetaException {
        return new Table(client.getTable(dbName, tableName));
    }

    public static HCatSchema getTableSchemaWithPtnCols(Table table) throws IOException {
        HCatSchema tableSchema = new HCatSchema(HCatUtil.getHCatFieldSchemaList(table.getCols()));

        if (table.getPartitionKeys().size() != 0) {

            // add partition keys to table schema
            // NOTE : this assumes that we do not ever have ptn keys as columns
            // inside the table schema as well!
            for (FieldSchema fs : table.getPartitionKeys()) {
                tableSchema.append(HCatSchemaUtils.getHCatFieldSchema(fs));
            }
        }
        return tableSchema;
    }

    /**
     * return the partition columns from a table instance
     *
     * @param table the instance to extract partition columns from
     * @return HCatSchema instance which contains the partition columns
     * @throws IOException
     */
    public static HCatSchema getPartitionColumns(Table table) throws IOException {
        HCatSchema cols = new HCatSchema(new LinkedList<HCatFieldSchema>());
        if (table.getPartitionKeys().size() != 0) {
            for (FieldSchema fs : table.getPartitionKeys()) {
                cols.append(HCatSchemaUtils.getHCatFieldSchema(fs));
            }
        }
        return cols;
    }

    /**
     * Validate partition schema, checks if the column types match between the
     * partition and the existing table schema. Returns the list of columns
     * present in the partition but not in the table.
     *
     * @param table the table
     * @param partitionSchema the partition schema
     * @return the list of newly added fields
     * @throws IOException Signals that an I/O exception has occurred.
     */
    public static List<FieldSchema> validatePartitionSchema(Table table,
                                                            HCatSchema partitionSchema) throws IOException {
        Map<String, FieldSchema> partitionKeyMap = new HashMap<String, FieldSchema>();

        for (FieldSchema field : table.getPartitionKeys()) {
            partitionKeyMap.put(field.getName().toLowerCase(), field);
        }

        List<FieldSchema> tableCols = table.getCols();
        List<FieldSchema> newFields = new ArrayList<FieldSchema>();

        for (int i = 0; i < partitionSchema.getFields().size(); i++) {

            FieldSchema field = HCatSchemaUtils.getFieldSchema(partitionSchema
                .getFields().get(i));

            FieldSchema tableField;
            if (i < tableCols.size()) {
                tableField = tableCols.get(i);

                if (!tableField.getName().equalsIgnoreCase(field.getName())) {
                    throw new HCatException(
                        ErrorType.ERROR_SCHEMA_COLUMN_MISMATCH,
                        "Expected column <" + tableField.getName()
                            + "> at position " + (i + 1)
                            + ", found column <" + field.getName()
                            + ">");
                }
            } else {
                tableField = partitionKeyMap.get(field.getName().toLowerCase());

                if (tableField != null) {
                    throw new HCatException(
                        ErrorType.ERROR_SCHEMA_PARTITION_KEY, "Key <"
                        + field.getName() + ">");
                }
            }

            if (tableField == null) {
                // field present in partition but not in table
                newFields.add(field);
            } else {
                // field present in both. validate type has not changed
                TypeInfo partitionType = TypeInfoUtils
                    .getTypeInfoFromTypeString(field.getType());
                TypeInfo tableType = TypeInfoUtils
                    .getTypeInfoFromTypeString(tableField.getType());

                if (!partitionType.equals(tableType)) {
                    throw new HCatException(
                        ErrorType.ERROR_SCHEMA_TYPE_MISMATCH, "Column <"
                        + field.getName() + ">, expected <"
                        + tableType.getTypeName() + ">, got <"
                        + partitionType.getTypeName() + ">");
                }
            }
        }

        return newFields;
    }

    /**
     * Test if the first FsAction is more permissive than the second. This is
     * useful in cases where we want to ensure that a file owner has more
     * permissions than the group they belong to, for eg. More completely(but
     * potentially more cryptically) owner-r >= group-r >= world-r : bitwise
     * and-masked with 0444 => 444 >= 440 >= 400 >= 000 owner-w >= group-w >=
     * world-w : bitwise and-masked with &0222 => 222 >= 220 >= 200 >= 000
     * owner-x >= group-x >= world-x : bitwise and-masked with &0111 => 111 >=
     * 110 >= 100 >= 000
     *
     * @return true if first FsAction is more permissive than the second, false
     *         if not.
     */
    public static boolean validateMorePermissive(FsAction first, FsAction second) {
        if ((first == FsAction.ALL) || (second == FsAction.NONE)
            || (first == second)) {
            return true;
        }
        switch (first) {
        case READ_EXECUTE:
            return ((second == FsAction.READ) || (second == FsAction.EXECUTE));
        case READ_WRITE:
            return ((second == FsAction.READ) || (second == FsAction.WRITE));
        case WRITE_EXECUTE:
            return ((second == FsAction.WRITE) || (second == FsAction.EXECUTE));
        }
        return false;
    }

    /**
     * Ensure that read or write permissions are not granted without also
     * granting execute permissions. Essentially, r-- , rw- and -w- are invalid,
     * r-x, -wx, rwx, ---, --x are valid
     *
     * @param perms The FsAction to verify
     * @return true if the presence of read or write permission is accompanied
     *         by execute permissions
     */
    public static boolean validateExecuteBitPresentIfReadOrWrite(FsAction perms) {
        if ((perms == FsAction.READ) || (perms == FsAction.WRITE)
            || (perms == FsAction.READ_WRITE)) {
            return false;
        }
        return true;
    }

    public static Token<org.apache.hadoop.mapreduce.security.token.delegation.DelegationTokenIdentifier> getJobTrackerDelegationToken(
        Configuration conf, String userName) throws Exception {
        // LOG.info("getJobTrackerDelegationToken("+conf+","+userName+")");
        JobClient jcl = new JobClient(new JobConf(conf, HCatOutputFormat.class));
        Token<org.apache.hadoop.mapreduce.security.token.delegation.DelegationTokenIdentifier> t = jcl
            .getDelegationToken(new Text(userName));
        // LOG.info("got "+t);
        return t;

        // return null;
    }

    public static Token<? extends AbstractDelegationTokenIdentifier> extractThriftToken(
        String tokenStrForm, String tokenSignature) throws MetaException,
        TException, IOException {
        // LOG.info("extractThriftToken("+tokenStrForm+","+tokenSignature+")");
        Token<? extends AbstractDelegationTokenIdentifier> t = new Token<DelegationTokenIdentifier>();
        t.decodeFromUrlString(tokenStrForm);
        t.setService(new Text(tokenSignature));
        // LOG.info("returning "+t);
        return t;
    }

    /**
     * Create an instance of a storage handler defined in storerInfo. If one cannot be found
     * then FosterStorageHandler is used to encapsulate the InputFormat, OutputFormat and SerDe.
     * This StorageHandler assumes the other supplied storage artifacts are for a file-based storage system.
     * @param conf job's configuration will be used to configure the Configurable StorageHandler
     * @param storerInfo StorerInfo to definining the StorageHandler and InputFormat, OutputFormat and SerDe
     * @return storageHandler instance
     * @throws IOException
     */
    public static HCatStorageHandler getStorageHandler(Configuration conf, StorerInfo storerInfo) throws IOException {
        return getStorageHandler(conf,
            storerInfo.getStorageHandlerClass(),
            storerInfo.getSerdeClass(),
            storerInfo.getIfClass(),
            storerInfo.getOfClass());
    }

    public static HCatStorageHandler getStorageHandler(Configuration conf, PartInfo partitionInfo) throws IOException {
        return HCatUtil.getStorageHandler(
            conf,
            partitionInfo.getStorageHandlerClassName(),
            partitionInfo.getSerdeClassName(),
            partitionInfo.getInputFormatClassName(),
            partitionInfo.getOutputFormatClassName());
    }

    /**
     * Create an instance of a storage handler. If storageHandler == null,
     * then surrrogate StorageHandler is used to encapsulate the InputFormat, OutputFormat and SerDe.
     * This StorageHandler assumes the other supplied storage artifacts are for a file-based storage system.
     * @param conf job's configuration will be used to configure the Configurable StorageHandler
     * @param storageHandler fully qualified class name of the desired StorageHandle instance
     * @param serDe fully qualified class name of the desired SerDe instance
     * @param inputFormat fully qualified class name of the desired InputFormat instance
     * @param outputFormat fully qualified class name of the desired outputFormat instance
     * @return storageHandler instance
     * @throws IOException
     */
    public static HCatStorageHandler getStorageHandler(Configuration conf,
                                                       String storageHandler,
                                                       String serDe,
                                                       String inputFormat,
                                                       String outputFormat)
        throws IOException {

        if ((storageHandler == null) || (storageHandler.equals(FosterStorageHandler.class.getName()))) {
            try {
                FosterStorageHandler fosterStorageHandler =
                    new FosterStorageHandler(inputFormat, outputFormat, serDe);
                fosterStorageHandler.setConf(conf);
                return fosterStorageHandler;
            } catch (ClassNotFoundException e) {
                throw new IOException("Failed to load "
                    + "foster storage handler", e);
            }
        }

        try {
            Class<? extends HCatStorageHandler> handlerClass =
                (Class<? extends HCatStorageHandler>) Class
                    .forName(storageHandler, true, JavaUtils.getClassLoader());
            return (HCatStorageHandler) ReflectionUtils.newInstance(
                handlerClass, conf);
        } catch (ClassNotFoundException e) {
            throw new IOException("Error in loading storage handler."
                + e.getMessage(), e);
        }
    }

    public static Pair<String, String> getDbAndTableName(String tableName) throws IOException {
        String[] dbTableNametokens = tableName.split("\\.");
        if (dbTableNametokens.length == 1) {
            return new Pair<String, String>(MetaStoreUtils.DEFAULT_DATABASE_NAME, tableName);
        } else if (dbTableNametokens.length == 2) {
            return new Pair<String, String>(dbTableNametokens[0], dbTableNametokens[1]);
        } else {
            throw new IOException("tableName expected in the form "
                + "<databasename>.<table name> or <table name>. Got " + tableName);
        }
    }

    public static Map<String, String>
    getInputJobProperties(HCatStorageHandler storageHandler,
                          InputJobInfo inputJobInfo) {
        TableDesc tableDesc = new TableDesc(storageHandler.getSerDeClass(),
            storageHandler.getInputFormatClass(),
            storageHandler.getOutputFormatClass(),
            inputJobInfo.getTableInfo().getStorerInfo().getProperties());
        if (tableDesc.getJobProperties() == null) {
            tableDesc.setJobProperties(new HashMap<String, String>());
        }

        Map<String, String> jobProperties = new HashMap<String, String>();
        try {
            tableDesc.getJobProperties().put(
                HCatConstants.HCAT_KEY_JOB_INFO,
                HCatUtil.serialize(inputJobInfo));

            storageHandler.configureInputJobProperties(tableDesc,
                jobProperties);

        } catch (IOException e) {
            throw new IllegalStateException(
                "Failed to configure StorageHandler", e);
        }

        return jobProperties;
    }

    @InterfaceAudience.Private
    @InterfaceStability.Evolving
    public static void
    configureOutputStorageHandler(HCatStorageHandler storageHandler,
                                  Configuration conf,
                                  OutputJobInfo outputJobInfo) {
        //TODO replace IgnoreKeyTextOutputFormat with a
        //HiveOutputFormatWrapper in StorageHandler
        TableDesc tableDesc = new TableDesc(storageHandler.getSerDeClass(),
            storageHandler.getInputFormatClass(),
            IgnoreKeyTextOutputFormat.class,
            outputJobInfo.getTableInfo().getStorerInfo().getProperties());
        if (tableDesc.getJobProperties() == null)
            tableDesc.setJobProperties(new HashMap<String, String>());
        for (Map.Entry<String, String> el : conf) {
            tableDesc.getJobProperties().put(el.getKey(), el.getValue());
        }

        Map<String, String> jobProperties = new HashMap<String, String>();
        try {
            tableDesc.getJobProperties().put(
                HCatConstants.HCAT_KEY_OUTPUT_INFO,
                HCatUtil.serialize(outputJobInfo));

            storageHandler.configureOutputJobProperties(tableDesc,
                jobProperties);

            for (Map.Entry<String, String> el : jobProperties.entrySet()) {
                conf.set(el.getKey(), el.getValue());
            }
        } catch (IOException e) {
            throw new IllegalStateException(
                "Failed to configure StorageHandler", e);
        }
    }

    /**
     * Replace the contents of dest with the contents of src
     * @param src
     * @param dest
     */
    public static void copyConf(Configuration src, Configuration dest) {
        dest.clear();
        for (Map.Entry<String, String> el : src) {
            dest.set(el.getKey(), el.getValue());
        }
    }

    /**
     * Get or create a hive client depending on whether it exits in cache or not
     * @param hiveConf The hive configuration
     * @return the client
     * @throws MetaException When HiveMetaStoreClient couldn't be created
     * @throws IOException
     */
    public static HiveMetaStoreClient getHiveClient(HiveConf hiveConf)
        throws MetaException, IOException {

        // Singleton behaviour: create the cache instance if required. The cache needs to be created lazily and
        // using the expiry time available in hiveConf.

        if (hiveClientCache == null) {
            synchronized (HiveMetaStoreClient.class) {
                if (hiveClientCache == null) {
                    hiveClientCache = new HiveClientCache(hiveConf.getInt(HCatConstants.HCAT_HIVE_CLIENT_EXPIRY_TIME,
                        DEFAULT_HIVE_CACHE_EXPIRY_TIME_SECONDS));
                }
            }
        }
        try {
            return hiveClientCache.get(hiveConf);
        } catch (LoginException e) {
            throw new IOException("Couldn't create hiveMetaStoreClient, Error getting UGI for user", e);
        }
    }

    public static void closeHiveClientQuietly(HiveMetaStoreClient client) {
        try {
            if (client != null)
                client.close();
        } catch (Exception e) {
            LOG.debug("Error closing metastore client. Ignored the error.", e);
        }
    }

    public static HiveConf getHiveConf(Configuration conf)
        throws IOException {

        HiveConf hiveConf = new HiveConf(conf, HCatUtil.class);

        //copy the hive conf into the job conf and restore it
        //in the backend context
        if (conf.get(HCatConstants.HCAT_KEY_HIVE_CONF) == null) {
            conf.set(HCatConstants.HCAT_KEY_HIVE_CONF,
                HCatUtil.serialize(hiveConf.getAllProperties()));
        } else {
            //Copy configuration properties into the hive conf
            Properties properties = (Properties) HCatUtil.deserialize(
                conf.get(HCatConstants.HCAT_KEY_HIVE_CONF));

            for (Map.Entry<Object, Object> prop : properties.entrySet()) {
                if (prop.getValue() instanceof String) {
                    hiveConf.set((String) prop.getKey(), (String) prop.getValue());
                } else if (prop.getValue() instanceof Integer) {
                    hiveConf.setInt((String) prop.getKey(),
                        (Integer) prop.getValue());
                } else if (prop.getValue() instanceof Boolean) {
                    hiveConf.setBoolean((String) prop.getKey(),
                        (Boolean) prop.getValue());
                } else if (prop.getValue() instanceof Long) {
                    hiveConf.setLong((String) prop.getKey(), (Long) prop.getValue());
                } else if (prop.getValue() instanceof Float) {
                    hiveConf.setFloat((String) prop.getKey(),
                        (Float) prop.getValue());
                }
            }
        }

        if (conf.get(HCatConstants.HCAT_KEY_TOKEN_SIGNATURE) != null) {
            hiveConf.set("hive.metastore.token.signature",
                conf.get(HCatConstants.HCAT_KEY_TOKEN_SIGNATURE));
        }

        return hiveConf;
    }


    public static JobConf getJobConfFromContext(JobContext jobContext) {
        JobConf jobConf;
        // we need to convert the jobContext into a jobConf
        // 0.18 jobConf (Hive) vs 0.20+ jobContext (HCat)
        // begin conversion..
        jobConf = new JobConf(jobContext.getConfiguration());
        // ..end of conversion


        return jobConf;
    }

    public static void copyJobPropertiesToJobConf(
        Map<String, String> jobProperties, JobConf jobConf) {
        for (Map.Entry<String, String> entry : jobProperties.entrySet()) {
            jobConf.set(entry.getKey(), entry.getValue());
        }
    }
   

    public static boolean isHadoop23() {
        String version = org.apache.hadoop.util.VersionInfo.getVersion();
        if (version.matches("\\b0\\.23\\..+\\b")||version.matches("\\b2\\..*"))
            return true;
        return false;
    }
}
TOP

Related Classes of org.apache.hcatalog.common.HCatUtil

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.