Package com.sap.hadoop.windowing.io

Source Code of com.sap.hadoop.windowing.io.IOUtils

package com.sap.hadoop.windowing.io;

import static com.sap.hadoop.windowing.Constants.INPUT_INPUTFORMAT_CLASS;
import static com.sap.hadoop.windowing.Constants.INPUT_KEY_CLASS;
import static com.sap.hadoop.windowing.Constants.INPUT_PATH;
import static com.sap.hadoop.windowing.Constants.INPUT_SERDE_CLASS;
import static com.sap.hadoop.windowing.Constants.INPUT_VALUE_CLASS;

import java.io.IOException;
import java.io.PrintStream;
import java.net.URI;
import java.util.Properties;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
import org.apache.hadoop.hive.metastore.MetaStoreUtils;
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.serde2.SerDe;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextInputFormat;

import com.sap.hadoop.HiveUtils;
import com.sap.hadoop.windowing.Constants;
import com.sap.hadoop.windowing.WindowingException;
import com.sap.hadoop.windowing.runtime2.Partition;

public class IOUtils
{
  private static final Log LOG = LogFactory.getLog("com.sap.hadoop.windowing.io");
 
  @SuppressWarnings("unchecked")
  public static WindowingInput createTableWindowingInput(String dbName, String tableName, Configuration conf) throws WindowingException
  {
    try
    {
      HiveMetaStoreClient client = HiveUtils.getClient(conf);
      String db = HiveUtils.validateDB(client, dbName);
      Table t = HiveUtils.getTable(client, db, tableName);
      StorageDescriptor sd = t.getSd();
     
      HiveConf hConf = new HiveConf(conf, IOUtils.class);
      JobConf job = new JobConf(hConf);
     
      Class<? extends InputFormat<? extends Writable, ? extends Writable>> inputFormatClass =
        (Class<? extends InputFormat<? extends Writable, ? extends Writable>>) Class.forName(sd.getInputFormat());
      hConf.setClass("mapred.input.format.class", inputFormatClass, InputFormat.class);
      hConf.set(INPUT_INPUTFORMAT_CLASS, inputFormatClass.getName());
      InputFormat<? extends Writable, ? extends Writable> iFmt = inputFormatClass.newInstance();
      if (iFmt instanceof TextInputFormat)
      {
        ((TextInputFormat)iFmt).configure(job);
      }
     
     
      Path p = new Path(sd.getLocation());
      /*
       * Convert the Path in the StorageDescriptor into a Path in the current FileSystem.
       * Used in testing: Jobs run on MiniDFSCluster, whereas hive metadata refers to a real cluster.
       */
      {
        p = makeQualified(p, conf);
      }
     
      FileInputFormat.addInputPath(job, p);
      InputSplit[] iSplits = iFmt.getSplits(job, 1);
      org.apache.hadoop.mapred.RecordReader<Writable, Writable> rdr =
          (org.apache.hadoop.mapred.RecordReader<Writable, Writable>) iFmt.getRecordReader(iSplits[0], job, Reporter.NULL);
     
      hConf.set(INPUT_PATH, sd.getLocation());
      hConf.set(INPUT_KEY_CLASS, rdr.createKey().getClass().getName());
      hConf.set(INPUT_VALUE_CLASS, rdr.createValue().getClass().getName());
     
      hConf.set(INPUT_SERDE_CLASS, sd.getSerdeInfo().getSerializationLib());
     
      TableWindowingInput tIn = new TableWindowingInput();
     
      tIn.initialize(null, hConf, MetaStoreUtils.getSchema(t) );
     
      return tIn;
     
     
    }
    catch(WindowingException w)
    {
      throw w;
    }
    catch(Exception e)
    {
      throw new WindowingException(e);
    }
  }
 
  @SuppressWarnings("unchecked")
  public static WindowingInput createFileWindowingInput(String path, String inputFormatClassName,
      String serDeClassName, Properties serDeProperties, Configuration conf) throws WindowingException
  {
    try
    {
      HiveConf hConf = new HiveConf(conf, IOUtils.class);
      JobConf job = new JobConf(hConf);
      Path p = new Path(path);
      p = makeQualified(p, conf);

      Class<? extends InputFormat<? extends Writable, ? extends Writable>> inputFormatClass =
          (Class<? extends InputFormat<? extends Writable, ? extends Writable>>) Class.forName(inputFormatClassName);
      hConf.setClass("mapred.input.format.class", inputFormatClass, InputFormat.class);
      hConf.set(INPUT_INPUTFORMAT_CLASS, inputFormatClass.getName());
      InputFormat<? extends Writable, ? extends Writable> iFmt = inputFormatClass.newInstance();
      if (iFmt instanceof TextInputFormat)
      {
        ((TextInputFormat)iFmt).configure(job);
      }
      FileInputFormat.addInputPath(job, p);
      InputSplit[] iSplits = iFmt.getSplits(job, 1);
      org.apache.hadoop.mapred.RecordReader<Writable, Writable> rdr =
          (org.apache.hadoop.mapred.RecordReader<Writable, Writable>) iFmt.getRecordReader(iSplits[0], job, Reporter.NULL);
     
      hConf.set(INPUT_PATH, path);
      hConf.set(INPUT_KEY_CLASS, rdr.createKey().getClass().getName());
      hConf.set(INPUT_VALUE_CLASS, rdr.createValue().getClass().getName());

      hConf.set(INPUT_SERDE_CLASS, serDeClassName);
     
      TableWindowingInput tIn = new TableWindowingInput();
     
      tIn.initialize(null, hConf, serDeProperties);
     
      return tIn;
    }
    catch(Exception e)
    {
      throw new WindowingException(e);
    }
  }
 
  public static Partition createPartition(String partitionClass,
      int partitionMemSize, WindowingInput wIn) throws WindowingException
  {
    try
    {
      SerDe serDe = (SerDe) wIn.getDeserializer();
      StructObjectInspector oI = (StructObjectInspector) serDe
          .getObjectInspector();
      Partition p = new Partition(partitionClass, partitionMemSize,
          serDe, oI);
      Writable w = wIn.createRow();
      while( wIn.next(w) != -1)
      {
        p.append(w);
      }
      return p;
    }
    catch (WindowingException we)
    {
      throw we;
    }
    catch (Exception e)
    {
      throw new WindowingException(e);
    }
  }
 
  public static void dumpPartition(Partition p, PrintStream pw)
      throws WindowingException
  {
    try
    {
      int sz = p.size();
      ObjectInspector OI = p.getSerDe().getObjectInspector();
      for (int i = 0; i < sz; i++)
      {
        Object o = p.getAt(i);

        o = ObjectInspectorUtils.copyToStandardJavaObject(o, OI);

        pw.println(o);
      }
    }
    catch (SerDeException se)
    {
      throw new WindowingException(se);
    }
  }
 
  public static Path makeQualified(Path p, Configuration conf) throws IOException
  {
    FileSystem fs = FileSystem.get(conf);
    p = new Path(p.toUri().getPath()).makeQualified(fs);
    return convertToTest(p, conf);
  }
 
  public static Path convertToTest(Path p, Configuration conf)
  {
    if (conf.getBoolean(Constants.WINDOWING_TEST_LOCAL_MODE, false))
    {
      String testDataDir = conf.get(Constants.WINDOWING_TEST_DATA_DIR);
      URI uri = p.toUri();
      String location = uri.getPath();
      location = location.replace("/user/hive/warehouse", testDataDir);
      return new Path(location);
    }
    return p;
  }
 
}
TOP

Related Classes of com.sap.hadoop.windowing.io.IOUtils

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.