Package com.sap.hadoop.windowing.runtime2.mr

Source Code of com.sap.hadoop.windowing.runtime2.mr.MRExecutor

package com.sap.hadoop.windowing.runtime2.mr;

import java.io.IOException;
import java.net.URI;
import java.util.Properties;

import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.DriverContext;
import org.apache.hadoop.hive.ql.exec.MapRedTask;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.OperatorFactory;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.ExtractDesc;
import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
import org.apache.hadoop.hive.ql.plan.MapredWork;
import org.apache.hadoop.hive.ql.plan.PlanUtils;
import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc;
import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.serde.Constants;
import org.apache.hadoop.hive.serde2.SerDe;

import com.sap.hadoop.windowing.WindowingException;
import com.sap.hadoop.windowing.query2.SerializationUtils;
import com.sap.hadoop.windowing.query2.definition.QueryDef;
import com.sap.hadoop.windowing.query2.definition.QueryOutputDef;
import com.sap.hadoop.windowing.query2.translate.TranslateUtils;
import com.sap.hadoop.windowing.runtime2.Executor;
import com.sap.hadoop.windowing.runtime2.RuntimeUtils;
import com.sap.hadoop.windowing.runtime2.WindowingShell;

public class MRExecutor extends Executor
{

  /*
   * Create a MapRedWork object and an operator tree
   * for processing queries with table functions.
   * Execute the plan defined in the MapRedWork using
   * the Hive runtime environment.
   */
  @Override
  public void execute(QueryDef qdef, WindowingShell wShell)
      throws WindowingException
  {
    deleteQueryOutputDir(qdef);
   
    MapredWork mr = PlanUtils.getMapRedWork();
    try
    {
      createOperatorTree(qdef, mr);
      executePlan(mr, wShell.getCfg());
    }
    catch (SemanticException se)
    {
      throw new WindowingException(se);
    }
    catch (Exception e)
    {
      throw new WindowingException(e);
    }
  }

  /**
   * Initialize the data structures required to create
   * the operator tree. Create the Map-side and
   * reduce-side operator trees.
   * @param qdef
   * @param mr
   * @throws SemanticException
   * @throws WindowingException
   */
  private void createOperatorTree(QueryDef qdef, MapredWork mr)
      throws SemanticException, WindowingException
  {
    MRUtils mrUtils = new MRUtils(qdef);
    mrUtils.initialize();
    createMapSideTree(qdef, mrUtils, mr);
    createReduceSideTree(qdef, mrUtils, mr);

  }
 
  /**
   * The map-side operator tree consists of:
   * MapOperator->ReduceSinkOperator if the query does
   * not have a map phase.
   * If the query has a map phase, the PTFOperator needs to be
   * invoked at the map-side of the query tree. The map-side plan then
   * looks like MapOperator->PTFOperator->ReduceSinkOperator.
   * @param qdef
   * @param mrUtils
   * @param mr
   * @throws WindowingException
   */
  @SuppressWarnings("unchecked")
  private void createMapSideTree(QueryDef qdef, MRUtils mrUtils, MapredWork mr) throws WindowingException{
    // map-side work
    if (MRUtils.addPTFMapOperator(qdef))
    {

      Operator<ReduceSinkDesc> op2 = OperatorFactory.get(PlanUtils
          .getReduceSinkDesc(mrUtils.getOrderCols(), mrUtils
              .getValueCols(), mrUtils.getOutputColumnNames(), true,
              -1, mrUtils.getPartCols(), mrUtils.getOrderString()
                  .toString(), -1));

      Operator<PTFDesc> op1 = RuntimeUtils.createPTFOperator(new PTFDesc(
          SerializationUtils.serializeQueryDef(qdef)), op2);

      Utilities.addMapWork(mr, mrUtils.getInputTable(), mrUtils
          .getHiveTableDef().getAlias(), op1);
      mr.setKeyDesc(op2.getConf().getKeySerializeInfo());
      mr.getTagToValueDesc().add(op2.getConf().getValueSerializeInfo());

    }
    else
    {
      Operator<ReduceSinkDesc> op1 = OperatorFactory.get(PlanUtils
          .getReduceSinkDesc(mrUtils.getOrderCols(), mrUtils
              .getValueCols(), mrUtils.getOutputColumnNames(), true,
              -1, mrUtils.getPartCols(), mrUtils.getOrderString()
                  .toString(), -1));
      Utilities.addMapWork(mr, mrUtils.getInputTable(), mrUtils
          .getHiveTableDef().getAlias(), op1);
      mr.setKeyDesc(op1.getConf().getKeySerializeInfo());
      mr.getTagToValueDesc().add(op1.getConf().getValueSerializeInfo());
    }


   
  }

  /**
   * The reduce-side plan always looks like
   * ExtratOperator->PTFOperator->FileSinkOperator.
   * Use the data structures initialized in MRUtils to
   * create the operators here.
   * @param qdef
   * @param mrUtils
   * @param mr
   * @throws WindowingException
   */
  @SuppressWarnings("unchecked")
  private void createReduceSideTree(QueryDef qdef, MRUtils mrUtils, MapredWork mr) throws WindowingException{
    // reduce side work
    Operator<FileSinkDesc> op4 = OperatorFactory.get(new FileSinkDesc(
        mrUtils.getOutputPath(), createOutputTableDesc(qdef), false));

    Operator<PTFDesc> op3 = RuntimeUtils.createPTFOperator(new PTFDesc(
        SerializationUtils.serializeQueryDef(qdef)), op4);

    Operator<ExtractDesc> op2 = OperatorFactory.get(
        new ExtractDesc(
            TranslateUtils
                .getStringColumn(Utilities.ReduceField.VALUE
                    .toString())), op3);

    mr.setReducer(op2);
  }

  /**
   * Invoke the MapRedTask and set the MapRedWork
   * query plan to be used for execution. 
   * MapRedTask (an extension to the ExecDriver) is
   * used to execute the query plan on Hadoop.
   * @param mr
   * @param hiveConf
   * @throws Exception
   */
  private void executePlan(MapredWork mr, HiveConf hiveConf) throws Exception
  {
    MapRedTask mrtask = new MapRedTask();
    DriverContext dctx = new DriverContext();
    mrtask.setWork(mr);
    mrtask.initialize(hiveConf, null, dctx);
    int exitVal = mrtask.execute(dctx);

    if (exitVal != 0)
    {
      System.out.println("Test execution failed with exit status: "
          + exitVal);
      throw new WindowingException(
          "Test execution failed with exit status: " + exitVal);
    }
    else
      System.out.println("Test execution completed successfully");

  }
 
  /**
   * Use the settings on the QueryOutputDef to define the
   * properties for the output table in hive.
   * @param qDef
   * @return
   * @throws WindowingException
   */
  static TableDesc createOutputTableDesc(QueryDef qDef) throws WindowingException
  {
    QueryOutputDef oDef = qDef.getOutput();
    Class<? extends SerDe> serDeClass = oDef.getSerDe().getClass();
    Properties p = oDef.getSpec().getSerDeProps();
    String columnNamesList = p.getProperty(Constants.LIST_COLUMNS);
    String columnTypesList = p.getProperty(Constants.LIST_COLUMN_TYPES);
    String fieldSeparator = p.getProperty(Constants.FIELD_DELIM, Integer.toString(Utilities.ctrlaCode));
    return PlanUtils.getTableDesc(serDeClass, fieldSeparator,
            columnNamesList, columnTypesList, false);
  }
 
  static void deleteQueryOutputDir(QueryDef qDef) throws WindowingException
  {
    try
    {
      String outputPath = qDef.getOutput().getSpec().getPath();
      FileSystem fs = FileSystem.get(URI.create(outputPath), qDef.getTranslationInfo().getHiveCfg());
      Path p = new Path(outputPath);
      if ( fs.exists(p))
      {
        fs.delete(p, true);
      }
    }
    catch(IOException ie)
    {
      throw new WindowingException(ie);
    }
  }

}
TOP

Related Classes of com.sap.hadoop.windowing.runtime2.mr.MRExecutor

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.