Package com.sap.hadoop

Source Code of com.sap.hadoop.HiveUtils

package com.sap.hadoop;

import java.io.File;
import java.net.URL;
import java.net.URLClassLoader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;

import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
import org.apache.hadoop.hive.metastore.MetaStoreUtils;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.ql.exec.ColumnInfo;
import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
import org.apache.hadoop.hive.ql.parse.RowResolver;
import org.apache.hadoop.hive.serde2.Deserializer;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.InputFormat;
//import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
//import org.apache.hadoop.mapred.Reporter;
//import org.apache.hadoop.mapred.TextInputFormat;

import com.sap.hadoop.metadata.Utils;
import com.sap.hadoop.windowing.WindowingException;

public class HiveUtils
{
  private static final Log LOG = LogFactory.getLog("com.sap.hadoop.windowing");
 
 
  public static Hive getHive(HiveConf hCfg) throws WindowingException
  {
    try
    {
      return Hive.get(hCfg);
    }
    catch(HiveException he)
    {
      throw new WindowingException(he);
    }
  }
 
  @SuppressWarnings("unchecked")
  public static List<FieldSchema> addTableasJobInput(String db, String table, JobConf job, FileSystem fs) throws WindowingException
  {
    LOG.info("HiveUtils::addTableasJobInput invoked");
    try
    {
      HiveMetaStoreClient client = getClient(job);

      // 1. get Table details from Hive metastore
      db = validateDB(client, db);
      Table t = getTable(client, db, table);
      StorageDescriptor sd = t.getSd();
     
      // 2. add table's location to job input
      FileInputFormat.addInputPath(job, new Path(sd.getLocation()));
     
      // 3. set job inputFormatClass, extract from StorageDescriptor
      Class<? extends InputFormat<? extends Writable, ? extends Writable>> inputFormatClass =
        (Class<? extends InputFormat<? extends Writable, ? extends Writable>>) Class.forName(sd.getInputFormat());
        job.setInputFormat(inputFormatClass);

        // 4. set job OutputValueClass
        //  - instantiate InputFormat
        //  - get inputSplits
        //  - instantiate Reader for the 1st split
        //  - get valueClass from reader
       
       
        /*
         * Creating a namespace and reading the Splits is not needed here anymore
         */
//      InputFormat<? extends Writable, ? extends Writable> iFmt = inputFormatClass.newInstance();
//      if (iFmt instanceof TextInputFormat)
//        ((TextInputFormat)iFmt).configure(job);
//      InputSplit[] iSplits = iFmt.getSplits(job, 1);
//      org.apache.hadoop.mapred.RecordReader<Writable, Writable> rdr =
//        (org.apache.hadoop.mapred.RecordReader<Writable, Writable>) iFmt.getRecordReader(iSplits[0], job, Reporter.NULL);
       
      /*
       * this is no more always valid. The MapOutput Value calss is dictated by the query.input.deserializer
       */
      //job.setMapOutputValueClass(rdr.createValue().getClass());
       
        // 5. set num reducers to number of input splits
//      job.setNumReduceTasks(iSplits.length);
     
      return client.getFields(db, table);
    }
    catch(WindowingException w)
    {
      throw w;
    }
    catch(Exception e)
    {
      throw new WindowingException(e);
    }
   
  }
 
  public static List<FieldSchema> getFields(String db, String table, JobConf job) throws WindowingException
  {
    LOG.info("HiveUtils::getFields invoked");
    try
    {
      HiveMetaStoreClient client = getClient(job);
      db = validateDB(client, db);
      getTable(client, db, table);
      return client.getFields(db, table);
    }
    catch(WindowingException w)
    {
      throw w;
    }
    catch(Exception e)
    {
      throw new WindowingException(e);
    }
  }
 
  public static Deserializer getDeserializer(String db, String table, Configuration conf) throws WindowingException
  {
    LOG.info("HiveUtils::getDeserializer invoked");
    try
    {
      HiveMetaStoreClient client = getClient(conf);
      db = validateDB(client, db);
      Table t = getTable(client, db, table);
      return MetaStoreUtils.getDeserializer(conf, t);
    }
    catch(WindowingException w)
    {
      throw w;
    }
    catch(Exception e)
    {
      throw new WindowingException(e);
    }
  }
 
  public static Deserializer getDeserializer(HiveConf conf, Table t) throws WindowingException
  {
    LOG.info("HiveUtils::getDeserializer invoked");
    try
    {
      return MetaStoreUtils.getDeserializer(conf, t);
    }
    catch(Exception e)
    {
      throw new WindowingException(e);
    }
  }
 
 
  public static HiveMetaStoreClient getClient(Configuration conf) throws WindowingException
  {
    LOG.info("HiveUtils::getClient invoked");
    try
    {
      HiveConf hConf = new HiveConf(conf, conf.getClass());
      return new HiveMetaStoreClient(hConf);
    }
    catch(MetaException me)
    {
      throw new WindowingException(me);
    }
  }
 
  public static HiveConf getHiveConf(Configuration conf) throws WindowingException
  {
    LOG.info("HiveUtils::getHiveConf invoked");
    try
    {
      return new HiveConf(conf, conf.getClass());
    }
    catch(Exception me)
    {
      throw new WindowingException(me);
    }
  }
 
  public static String validateDB(HiveMetaStoreClient client, String db) throws WindowingException
  {
    LOG.info("HiveUtils::validateDB invoked");
    try
    {
      List<String> dbs = client.getAllDatabases();
 
      if ( db == null )
      {
        return "default";
      }
     
      for(String d : dbs)
      {
        if (d.equals(db)) return db;
      }
      throw new WindowingException(Utils.sprintf("Unknown database %s", db));
    }
    catch(MetaException me)
    {
      throw new WindowingException(me);
    }
  }
 
  public static Table getTable(HiveMetaStoreClient client, String db, String tableName) throws WindowingException
  {
    LOG.info("HiveUtils::getTable invoked on " + tableName);
    try
    {
      return client.getTable(db, tableName);
    }
    catch(Exception me)
    {
      throw new WindowingException(me);
    }
  }
 
  public static Table getTable(String db, String tableName, Configuration conf) throws WindowingException
  {
    HiveMetaStoreClient client = getClient(conf);
    db = validateDB(client, db);
    return getTable(client, db, tableName);
  }
 
  /**
   * copied from hive.ql.exec.Utilities: keep dependency on exec jar to minimum
   * @param cloader
   * @param newPaths
   * @return
   * @throws Exception
   */
  public static ClassLoader addToClassPath(ClassLoader cloader,
      String[] newPaths) throws Exception
  {
    LOG.info("HiveUtils::addToClassPath invoked");
    URLClassLoader loader = (URLClassLoader) cloader;
    List<URL> curPath = Arrays.asList(loader.getURLs());
    ArrayList<URL> newPath = new ArrayList<URL>();

    // get a list with the current classpath components
    for (URL onePath : curPath)
    {
      newPath.add(onePath);
    }
    curPath = newPath;

    for (String onestr : newPaths)
    {
      // special processing for hadoop-17. file:// needs to be removed
      if (StringUtils.indexOf(onestr, "file://") == 0)
      {
        onestr = StringUtils.substring(onestr, 7);
      }

      @SuppressWarnings("deprecation")
      URL oneurl = (new File(onestr)).toURL();
      if (!curPath.contains(oneurl))
      {
        curPath.add(oneurl);
      }
    }

    return new URLClassLoader(curPath.toArray(new URL[0]), loader);
  }
 
  public static RowResolver getRowResolver(String db, String table, String alias, HiveConf conf) throws WindowingException
  {
    LOG.info("HiveUtils::getRowResolver invoked on " + table);
    try
    {
      HiveMetaStoreClient client = getClient(conf);

      db = validateDB(client, db);
      org.apache.hadoop.hive.ql.metadata.Table t = Hive.get(conf).getTable(db, table);
       StructObjectInspector rowObjectInspector = (StructObjectInspector) t.getDeserializer().getObjectInspector();
      RowResolver rwsch = getRowResolver(alias, rowObjectInspector ) ;
      
       for (FieldSchema part_col : t.getPartCols())
       {
              LOG.trace("Adding partition col: " + part_col);
              rwsch.put(alias, part_col.getName(),
                  new ColumnInfo(part_col.getName(),
                      TypeInfoFactory.getPrimitiveTypeInfo(part_col.getType()), alias, true)
              );
        }
      
       Iterator<VirtualColumn> vcs = VirtualColumn.getRegistry(conf).iterator();
          //use a list for easy cumtomize
          List<VirtualColumn> vcList = new ArrayList<VirtualColumn>();
          while (vcs.hasNext())
          {
            VirtualColumn vc = vcs.next();
            rwsch.put(alias, vc.getName(),
                new ColumnInfo(vc.getName(),
                    vc.getTypeInfo(), alias, true, vc.getIsHidden()
                    )
            );
            vcList.add(vc);
          }
     
     
      return rwsch;
    }
    catch(WindowingException w)
    {
      throw w;
    }
    catch(Exception me)
    {
      throw new WindowingException(me);
    }
  }

  public static RowResolver getRowResolver(String tabAlias, StructObjectInspector rowObjectInspector ) throws WindowingException
  {
    LOG.info("HiveUtils::getRowResolver invoked on ObjectInspector");
    try
    {
      RowResolver rwsch = new RowResolver();
       List<? extends StructField> fields = rowObjectInspector.getAllStructFieldRefs();
       for (int i = 0; i < fields.size(); i++)
       {
         rwsch.put(tabAlias, fields.get(i).getFieldName(),
               new ColumnInfo(fields.get(i).getFieldName(),
                   TypeInfoUtils.getTypeInfoFromObjectInspector(fields.get(i).getFieldObjectInspector()),
                   tabAlias,
                   false)
         );
       }
      return rwsch;
    }
    catch(Exception me)
    {
      throw new WindowingException(me);
    }
  }
}
TOP

Related Classes of com.sap.hadoop.HiveUtils

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.