Package org.apache.hadoop.hive.ql.optimizer

Source Code of org.apache.hadoop.hive.ql.optimizer.MapJoinFactory$MapJoinMapJoin

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.optimizer;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Stack;

import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.Context;
import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.OperatorFactory;
import org.apache.hadoop.hive.ql.exec.SelectOperator;
import org.apache.hadoop.hive.ql.exec.Task;
import org.apache.hadoop.hive.ql.exec.TaskFactory;
import org.apache.hadoop.hive.ql.exec.UnionOperator;
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.lib.NodeProcessor;
import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMRMapJoinCtx;
import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx;
import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext;
import org.apache.hadoop.hive.ql.parse.ErrorMsg;
import org.apache.hadoop.hive.ql.parse.ParseContext;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
import org.apache.hadoop.hive.ql.plan.MapredWork;
import org.apache.hadoop.hive.ql.plan.PlanUtils;
import org.apache.hadoop.hive.ql.plan.TableDesc;

/**
* Operator factory for MapJoin processing.
*/
public final class MapJoinFactory {

  public static int getPositionParent(AbstractMapJoinOperator<? extends MapJoinDesc> op, Stack<Node> stack) {
    int pos = 0;
    int size = stack.size();
    assert size >= 2 && stack.get(size - 1) == op;
    Operator<? extends Serializable> parent = (Operator<? extends Serializable>) stack
        .get(size - 2);
    List<Operator<? extends Serializable>> parOp = op.getParentOperators();
    pos = parOp.indexOf(parent);
    assert pos < parOp.size();
    return pos;
  }

  /**
   * TableScan followed by MapJoin.
   */
  public static class TableScanMapJoin implements NodeProcessor {

    @Override
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
        Object... nodeOutputs) throws SemanticException {
      AbstractMapJoinOperator<MapJoinDesc> mapJoin = (AbstractMapJoinOperator<MapJoinDesc>) nd;
      GenMRProcContext ctx = (GenMRProcContext) procCtx;

      // find the branch on which this processor was invoked
      int pos = getPositionParent(mapJoin, stack);

      Map<Operator<? extends Serializable>, GenMapRedCtx> mapCurrCtx = ctx
          .getMapCurrCtx();
      GenMapRedCtx mapredCtx = mapCurrCtx.get(mapJoin.getParentOperators().get(
          pos));
      Task<? extends Serializable> currTask = mapredCtx.getCurrTask();
      MapredWork currPlan = (MapredWork) currTask.getWork();
      Operator<? extends Serializable> currTopOp = mapredCtx.getCurrTopOp();
      String currAliasId = mapredCtx.getCurrAliasId();
      Operator<? extends Serializable> reducer = mapJoin;
      HashMap<Operator<? extends Serializable>, Task<? extends Serializable>> opTaskMap = ctx
          .getOpTaskMap();
      Task<? extends Serializable> opMapTask = opTaskMap.get(reducer);

      ctx.setCurrTopOp(currTopOp);
      ctx.setCurrAliasId(currAliasId);
      ctx.setCurrTask(currTask);

      // If the plan for this reducer does not exist, initialize the plan
      if (opMapTask == null) {
        assert currPlan.getReducer() == null;
        GenMapRedUtils.initMapJoinPlan(mapJoin, ctx, false, false, false, pos);
      } else {
        // The current plan can be thrown away after being merged with the
        // original plan
        GenMapRedUtils.joinPlan(mapJoin, null, opMapTask, ctx, pos, false,
            false, false);
        currTask = opMapTask;
        ctx.setCurrTask(currTask);
      }

      mapCurrCtx.put(mapJoin, new GenMapRedCtx(ctx.getCurrTask(), ctx
          .getCurrTopOp(), ctx.getCurrAliasId()));
      return null;
    }
  }

  /**
   * ReduceSink followed by MapJoin.
   */
  public static class ReduceSinkMapJoin implements NodeProcessor {

    @Override
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
        Object... nodeOutputs) throws SemanticException {
      AbstractMapJoinOperator<MapJoinDesc> mapJoin = (AbstractMapJoinOperator<MapJoinDesc>) nd;
      GenMRProcContext opProcCtx = (GenMRProcContext) procCtx;

      ParseContext parseCtx = opProcCtx.getParseCtx();
      MapredWork cplan = GenMapRedUtils.getMapRedWork(parseCtx);
      Task<? extends Serializable> redTask = TaskFactory.get(cplan, parseCtx
          .getConf());
      Task<? extends Serializable> currTask = opProcCtx.getCurrTask();

      // find the branch on which this processor was invoked
      int pos = getPositionParent(mapJoin, stack);
      boolean local = (pos == ((mapJoin.getConf())).getPosBigTable()) ? false
          : true;

      GenMapRedUtils.splitTasks(mapJoin, currTask, redTask, opProcCtx, false,
          local, pos);

      currTask = opProcCtx.getCurrTask();
      HashMap<Operator<? extends Serializable>, Task<? extends Serializable>> opTaskMap = opProcCtx
          .getOpTaskMap();
      Task<? extends Serializable> opMapTask = opTaskMap.get(mapJoin);

      // If the plan for this reducer does not exist, initialize the plan
      if (opMapTask == null) {
        assert cplan.getReducer() == null;
        opTaskMap.put(mapJoin, currTask);
        opProcCtx.setCurrMapJoinOp(null);
      } else {
        // The current plan can be thrown away after being merged with the
        // original plan
        GenMapRedUtils.joinPlan(mapJoin, currTask, opMapTask, opProcCtx, pos,
            false, false, false);
        currTask = opMapTask;
        opProcCtx.setCurrTask(currTask);
      }

      return null;
    }
  }

  /**
   * MapJoin followed by Select.
   */
  public static class MapJoin implements NodeProcessor {

    /**
     * Create a task by splitting the plan below the join. The reason, we have
     * to do so in the processing of Select and not MapJoin is due to the
     * walker. While processing a node, it is not safe to alter its children
     * because that will decide the course of the walk. It is perfectly fine to
     * muck around with its parents though, since those nodes have already been
     * visited.
     */
    @Override
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
        Object... nodeOutputs) throws SemanticException {

      SelectOperator sel = (SelectOperator) nd;
      AbstractMapJoinOperator<MapJoinDesc> mapJoin = (AbstractMapJoinOperator<MapJoinDesc>) sel.getParentOperators().get(
          0);
      assert sel.getParentOperators().size() == 1;

      GenMRProcContext ctx = (GenMRProcContext) procCtx;
      ParseContext parseCtx = ctx.getParseCtx();

      // is the mapjoin followed by a reducer
      List<AbstractMapJoinOperator<? extends MapJoinDesc>> listMapJoinOps = parseCtx
          .getListMapJoinOpsNoReducer();

      if (listMapJoinOps.contains(mapJoin)) {
        ctx.setCurrAliasId(null);
        ctx.setCurrTopOp(null);
        Map<Operator<? extends Serializable>, GenMapRedCtx> mapCurrCtx = ctx
            .getMapCurrCtx();
        mapCurrCtx.put((Operator<? extends Serializable>) nd, new GenMapRedCtx(
            ctx.getCurrTask(), null, null));
        return null;
      }

      ctx.setCurrMapJoinOp(mapJoin);

      Task<? extends Serializable> currTask = ctx.getCurrTask();
      GenMRMapJoinCtx mjCtx = ctx.getMapJoinCtx(mapJoin);
      if (mjCtx == null) {
        mjCtx = new GenMRMapJoinCtx();
        ctx.setMapJoinCtx(mapJoin, mjCtx);
      }

      MapredWork mjPlan = GenMapRedUtils.getMapRedWork(parseCtx);
      Task<? extends Serializable> mjTask = TaskFactory.get(mjPlan, parseCtx
          .getConf());

      TableDesc tt_desc = PlanUtils.getIntermediateFileTableDesc(PlanUtils
          .getFieldSchemasFromRowSchema(mapJoin.getSchema(), "temporarycol"));

      // generate the temporary file
      Context baseCtx = parseCtx.getContext();
      String taskTmpDir = baseCtx.getMRTmpFileURI();

      // Add the path to alias mapping
      mjCtx.setTaskTmpDir(taskTmpDir);
      mjCtx.setTTDesc(tt_desc);
      mjCtx.setRootMapJoinOp(sel);

      sel.setParentOperators(null);

      // Create a file sink operator for this file name
      Operator<? extends Serializable> fs_op = OperatorFactory.get(
          new FileSinkDesc(taskTmpDir, tt_desc, parseCtx.getConf().getBoolVar(
          HiveConf.ConfVars.COMPRESSINTERMEDIATE)), mapJoin.getSchema());

      assert mapJoin.getChildOperators().size() == 1;
      mapJoin.getChildOperators().set(0, fs_op);

      List<Operator<? extends Serializable>> parentOpList = new ArrayList<Operator<? extends Serializable>>();
      parentOpList.add(mapJoin);
      fs_op.setParentOperators(parentOpList);

      currTask.addDependentTask(mjTask);

      ctx.setCurrTask(mjTask);
      ctx.setCurrAliasId(null);
      ctx.setCurrTopOp(null);

      Map<Operator<? extends Serializable>, GenMapRedCtx> mapCurrCtx = ctx
          .getMapCurrCtx();
      mapCurrCtx.put((Operator<? extends Serializable>) nd, new GenMapRedCtx(
          ctx.getCurrTask(), null, null));

      return null;
    }
  }

  /**
   * MapJoin followed by MapJoin.
   */
  public static class MapJoinMapJoin implements NodeProcessor {

    @Override
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
        Object... nodeOutputs) throws SemanticException {
      AbstractMapJoinOperator<? extends MapJoinDesc> mapJoin = (AbstractMapJoinOperator<? extends MapJoinDesc>) nd;
      GenMRProcContext ctx = (GenMRProcContext) procCtx;

      ctx.getParseCtx();
      AbstractMapJoinOperator<? extends MapJoinDesc> oldMapJoin = ctx.getCurrMapJoinOp();

      GenMRMapJoinCtx mjCtx = ctx.getMapJoinCtx(mapJoin);
      if (mjCtx != null) {
        mjCtx.setOldMapJoin(oldMapJoin);
      } else {
        ctx.setMapJoinCtx(mapJoin, new GenMRMapJoinCtx(null, null, null,
            oldMapJoin));
      }
      ctx.setCurrMapJoinOp(mapJoin);

      // find the branch on which this processor was invoked
      int pos = getPositionParent(mapJoin, stack);

      Map<Operator<? extends Serializable>, GenMapRedCtx> mapCurrCtx = ctx
          .getMapCurrCtx();
      GenMapRedCtx mapredCtx = mapCurrCtx.get(mapJoin.getParentOperators().get(
          pos));
      Task<? extends Serializable> currTask = mapredCtx.getCurrTask();
      MapredWork currPlan = (MapredWork) currTask.getWork();
      mapredCtx.getCurrAliasId();
      Operator<? extends Serializable> reducer = mapJoin;
      HashMap<Operator<? extends Serializable>, Task<? extends Serializable>> opTaskMap = ctx
          .getOpTaskMap();
      Task<? extends Serializable> opMapTask = opTaskMap.get(reducer);

      ctx.setCurrTask(currTask);

      // If the plan for this reducer does not exist, initialize the plan
      if (opMapTask == null) {
        assert currPlan.getReducer() == null;
        GenMapRedUtils.initMapJoinPlan(mapJoin, ctx, true, false, false, pos);
      } else {
        // The current plan can be thrown away after being merged with the
        // original plan
        GenMapRedUtils.joinPlan(mapJoin, currTask, opMapTask, ctx, pos, false,
            true, false);
        currTask = opMapTask;
        ctx.setCurrTask(currTask);
      }

      mapCurrCtx.put(mapJoin, new GenMapRedCtx(ctx.getCurrTask(), null, null));
      return null;
    }
  }

  /**
   * Union followed by MapJoin.
   */
  public static class UnionMapJoin implements NodeProcessor {

    @Override
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
        Object... nodeOutputs) throws SemanticException {
      GenMRProcContext ctx = (GenMRProcContext) procCtx;

      ParseContext parseCtx = ctx.getParseCtx();
      UnionProcContext uCtx = parseCtx.getUCtx();

      // union was map only - no special processing needed
      if (uCtx.isMapOnlySubq()) {
        return (new TableScanMapJoin())
            .process(nd, stack, procCtx, nodeOutputs);
      }

      UnionOperator currUnion = ctx.getCurrUnionOp();
      assert currUnion != null;
      ctx.getUnionTask(currUnion);
      AbstractMapJoinOperator<MapJoinDesc> mapJoin = (AbstractMapJoinOperator<MapJoinDesc>) nd;

      // find the branch on which this processor was invoked
      int pos = getPositionParent(mapJoin, stack);

      Map<Operator<? extends Serializable>, GenMapRedCtx> mapCurrCtx = ctx
          .getMapCurrCtx();
      GenMapRedCtx mapredCtx = mapCurrCtx.get(mapJoin.getParentOperators().get(
          pos));
      Task<? extends Serializable> currTask = mapredCtx.getCurrTask();
      MapredWork currPlan = (MapredWork) currTask.getWork();
      Operator<? extends Serializable> reducer = mapJoin;
      HashMap<Operator<? extends Serializable>, Task<? extends Serializable>> opTaskMap = ctx
          .getOpTaskMap();
      Task<? extends Serializable> opMapTask = opTaskMap.get(reducer);

      // union result cannot be a map table
      boolean local = (pos == (mapJoin.getConf()).getPosBigTable()) ? false
          : true;
      if (local) {
        throw new SemanticException(ErrorMsg.INVALID_MAPJOIN_TABLE.getMsg());
      }

      // If the plan for this reducer does not exist, initialize the plan
      if (opMapTask == null) {
        assert currPlan.getReducer() == null;
        ctx.setCurrMapJoinOp(mapJoin);
        GenMapRedUtils.initMapJoinPlan(mapJoin, ctx, true, true, false, pos);
        ctx.setCurrUnionOp(null);
      } else {
        // The current plan can be thrown away after being merged with the
        // original plan
        Task<? extends Serializable> uTask = ctx.getUnionTask(
            ctx.getCurrUnionOp()).getUTask();
        if (uTask.getId().equals(opMapTask.getId())) {
          GenMapRedUtils.joinPlan(mapJoin, null, opMapTask, ctx, pos, false,
              false, true);
        } else {
          GenMapRedUtils.joinPlan(mapJoin, uTask, opMapTask, ctx, pos, false,
              false, true);
        }
        currTask = opMapTask;
        ctx.setCurrTask(currTask);
      }

      mapCurrCtx.put(mapJoin, new GenMapRedCtx(ctx.getCurrTask(), ctx
          .getCurrTopOp(), ctx.getCurrAliasId()));
      return null;
    }
  }

  public static NodeProcessor getTableScanMapJoin() {
    return new TableScanMapJoin();
  }

  public static NodeProcessor getUnionMapJoin() {
    return new UnionMapJoin();
  }

  public static NodeProcessor getReduceSinkMapJoin() {
    return new ReduceSinkMapJoin();
  }

  public static NodeProcessor getMapJoin() {
    return new MapJoin();
  }

  public static NodeProcessor getMapJoinMapJoin() {
    return new MapJoinMapJoin();
  }

  private MapJoinFactory() {
    // prevent instantiation
  }
}
TOP

Related Classes of org.apache.hadoop.hive.ql.optimizer.MapJoinFactory$MapJoinMapJoin

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.