Package com.sap.hadoop.windowing.runtime2.mr

Source Code of com.sap.hadoop.windowing.runtime2.mr.PTFOperator$ForwardPTF

package com.sap.hadoop.windowing.runtime2.mr;

import java.io.ByteArrayInputStream;
import java.io.Serializable;
import java.util.ArrayList;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory;
import org.apache.hadoop.hive.ql.exec.ExtractOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.api.OperatorType;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;
import org.apache.hadoop.io.Writable;

import com.sap.hadoop.windowing.WindowingException;
import com.sap.hadoop.windowing.query2.SerializationUtils;
import com.sap.hadoop.windowing.query2.definition.ColumnDef;
import com.sap.hadoop.windowing.query2.definition.PartitionDef;
import com.sap.hadoop.windowing.query2.definition.QueryDef;
import com.sap.hadoop.windowing.query2.definition.TableFuncDef;
import com.sap.hadoop.windowing.query2.translate.QueryDefDeserializer;
import com.sap.hadoop.windowing.query2.translate.QueryDefVisitor;
import com.sap.hadoop.windowing.query2.translate.QueryDefWalker;
import com.sap.hadoop.windowing.runtime2.Executor;
import com.sap.hadoop.windowing.runtime2.Executor.ForwardSink;
import com.sap.hadoop.windowing.runtime2.Partition;
import com.sap.hadoop.windowing.runtime2.PartitionIterator;
import com.sap.hadoop.windowing.runtime2.RuntimeUtils;

public class PTFOperator extends Operator<PTFDesc> implements Serializable
{

  private static final long serialVersionUID = 1L;
  QueryDef qDef;
  Partition inputPart;
  boolean isMapOperator;
 
  transient WindowingKeyWrapperFactory keyWrapperFactory;
  protected transient WindowingKeyWrapper currentKeys;
  protected transient WindowingKeyWrapper newKeys;
  transient HiveConf hiveConf;

  /*
   * 1. Find out if the operator is invoked at Map-Side or Reduce-side
   * 2. Get the deserialized QueryDef
   * 3. Reconstruct the transient variables in QueryDef
   * 4. Create input partition to store rows coming from previous operator
   */
  @Override
  protected void initializeOp(Configuration jobConf) throws HiveException
  {
    hiveConf = new HiveConf(jobConf, PTFOperator.class);
    // if the parent is ExtractOperator, this invocation is from reduce-side
    Operator<? extends OperatorDesc> parentOp = getParentOperators().get(0);
    if (parentOp instanceof ExtractOperator)
    {
      isMapOperator = false;
    }
    else
    {
      isMapOperator = true;
    }

    // use the string from PTFDesc to get deserialized QueryDef
    qDef = (QueryDef) SerializationUtils
        .deserialize(new ByteArrayInputStream(conf.getQueryDefStr()
            .getBytes()));
    try
    {
      reconstructQueryDef(hiveConf);
      inputPart = RuntimeUtils.createFirstPartitionForChain(qDef,
          inputObjInspectors[0], hiveConf, isMapOperator);
    }
    catch (WindowingException we)
    {
      throw new HiveException(
          "Cannot create input partition for PTFOperator.", we);
    }
    // OI for FileSinkOperator is taken from select-list (reduce-side)
    // OI for ReduceSinkOperator is taken from TODO
    if (isMapOperator)
    {
      TableFuncDef tDef = RuntimeUtils.getFirstTableFunction(qDef);
      outputObjInspector = tDef.getMapOI();
    }
    else
    {
      outputObjInspector = qDef.getSelectList().getOI();
    }
   
    setupKeysWrapper(inputObjInspectors[0]);
   
    super.initializeOp(jobConf);
  }

  @Override
  protected void closeOp(boolean abort) throws HiveException
  {
    super.closeOp(abort);
    if (isMapOperator)
    {
      processMapFunction();
    }
    else
    {
      processInputPartition();
    }
  }

  @Override
  public void processOp(Object row, int tag) throws HiveException
  {
    try
    {
      if (!isMapOperator )
      {
        /*
         * checkif current row belongs to the current accumulated Partition:
         * - If not:
         *   - process the current Partition
         *  - reset input Partition
         * - set currentKey to the newKey if it is null or has changed.
         */
        newKeys.getNewKey(row, inputPart.getOI());
        boolean keysAreEqual = (currentKeys != null && newKeys != null)?
                newKeys.equals(currentKeys) : false;
               
        if (currentKeys != null && !keysAreEqual)
        {
          processInputPartition();
          inputPart = RuntimeUtils.createFirstPartitionForChain(qDef, inputObjInspectors[0], hiveConf, isMapOperator);
        }
       
        if (currentKeys == null || !keysAreEqual)
        {
          if (currentKeys == null)
          {
            currentKeys = newKeys.copyKey();
          }
          else
          {
            currentKeys.copyKey(newKeys);
          }
        }
      }
     
      // add row to current Partition.     
      inputPart.append(row);
    }
    catch (WindowingException we)
    {
      throw new HiveException("Cannot process PTFOperator.", we);
    }
  }

  /**
   * Initialize the visitor to use the QueryDefDeserializer Use the order
   * defined in QueryDefWalker to visit the QueryDef
   *
   * @param hiveConf
   * @throws WindowingException
   */
  protected void reconstructQueryDef(HiveConf hiveConf) throws WindowingException
  {

    QueryDefVisitor qdd = new QueryDefDeserializer(hiveConf,
        inputObjInspectors[0]);
    QueryDefWalker qdw = new QueryDefWalker(qdd);
    qdw.walk(qDef);
  }

  protected void setupKeysWrapper(ObjectInspector inputOI) throws HiveException
  {
    PartitionDef pDef = RuntimeUtils.getFirstTableFunction(qDef).getWindow().getPartDef();
    ArrayList<ColumnDef> cols = pDef.getColumns();
    int numCols = cols.size();
    ExprNodeEvaluator[] keyFields = new ExprNodeEvaluator[numCols];
    ObjectInspector[] keyOIs = new ObjectInspector[numCols];
    ObjectInspector[] currentKeyOIs = new ObjectInspector[numCols];
   
    for(int i=0; i<numCols; i++)
    {
      ColumnDef cDef = cols.get(i);
      /*
       * Why cannot we just use the ExprNodeEvaluator on the column?
       * - because on the reduce-side it is initialized based on the rowOI of the HiveTable
       *   and not the OI of the ExtractOp ( the parent of this Operator on the reduce-side)
       */
      keyFields[i] = ExprNodeEvaluatorFactory.get(cDef.getExprNode());
      keyOIs[i] = keyFields[i].initialize(inputOI);
      currentKeyOIs[i] = ObjectInspectorUtils.getStandardObjectInspector(keyOIs[i], ObjectInspectorCopyOption.WRITABLE);
    }
   
    keyWrapperFactory = new WindowingKeyWrapperFactory(keyFields, keyOIs, currentKeyOIs);

      newKeys = keyWrapperFactory.getWindowingKeyWrapper();
  }
 
  protected void processInputPartition() throws HiveException
  {
    try
    {
      Partition outPart = Executor.executeChain(qDef, inputPart);
      Executor.executeSelectList(qDef, outPart, new ForwardPTF());
    }
    catch (WindowingException we)
    {
      throw new HiveException("Cannot close PTFOperator.", we);
    }
  }
 
  protected void processMapFunction() throws HiveException
  {
    try
    {
      TableFuncDef tDef = RuntimeUtils.getFirstTableFunction(qDef);
      Partition outPart = tDef.getFunction().transformRawInput(inputPart);
      PartitionIterator<Object> pItr = outPart.iterator();
      while (pItr.hasNext())
      {
        Object oRow = pItr.next();
        forward(oRow, outputObjInspector);
      }
    }
    catch (WindowingException we)
    {
      throw new HiveException("Cannot close PTFOperator.", we);
    }
  }

  /**
   * Implement the @see{ForwardSink} interface to define the logic for
   * collecting output for the next operator in the chain
   *
   */
  public class ForwardPTF implements ForwardSink
  {

    /*
     * PTFOperator does not construct a writable output
     */
    @Override
    public void collectOutput(Writable key, Writable value)
    {
      throw new UnsupportedOperationException();
    }

    /*
     * forward row and outputObjInspector to next operator in chain
     */
    @Override
    public void collectOutput(Object[] o)
    {
      try
      {
        forward(o, outputObjInspector);
      }
      catch (HiveException e)
      {
        throw new RuntimeException(
            "Cannot forward output from PTF operator.", e);
      }
    }

    @Override
    public boolean acceptObject()
    {
      return true;
    }
  }
 
  /**
   * @return the name of the operator
   */
  @Override
  public String getName()
  {
    return "PTF";
  }

  @Override
  public OperatorType getType()
  {
    return null;
  }
}
TOP

Related Classes of com.sap.hadoop.windowing.runtime2.mr.PTFOperator$ForwardPTF

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.