Package org.apache.hadoop.hive.ql.optimizer.physical.MapJoinResolver

Examples of org.apache.hadoop.hive.ql.optimizer.physical.MapJoinResolver.LocalMapJoinProcCtx


   *
   */
  public static class MapJoinFollowedByGroupByProcessor implements NodeProcessor {
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs)
        throws SemanticException {
      LocalMapJoinProcCtx context = (LocalMapJoinProcCtx) ctx;
      if (!nd.getName().equals("GBY")) {
        return null;
      }
      context.setFollowedByGroupBy(true);
      GroupByOperator groupByOp = (GroupByOperator) nd;
      float groupByMemoryUsage = context.getParseCtx().getConf().getFloatVar(
          HiveConf.ConfVars.HIVEMAPJOINFOLLOWEDBYMAPAGGRHASHMEMORY);
      groupByOp.getConf().setGroupByMemoryUsage(groupByMemoryUsage);
      return null;
    }
View Full Code Here


   *
   */
  public static class LocalMapJoinProcessor implements NodeProcessor {
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs)
        throws SemanticException {
      LocalMapJoinProcCtx context = (LocalMapJoinProcCtx) ctx;
      if (!nd.getName().equals("MAPJOIN")) {
        return null;
      }
      MapJoinOperator mapJoinOp = (MapJoinOperator) nd;
      try {
        hasGroupBy(mapJoinOp, context);
      } catch (Exception e) {
        e.printStackTrace();
      }

      // mapjoin should not affected by join reordering
      mapJoinOp.getConf().resetOrder();

      HashTableSinkDesc hashTableSinkDesc = new HashTableSinkDesc(mapJoinOp.getConf());
      HashTableSinkOperator hashTableSinkOp = (HashTableSinkOperator) OperatorFactory
          .get(hashTableSinkDesc);

      // set hashtable memory usage
      float hashtableMemoryUsage;
      if (context.isFollowedByGroupBy()) {
        hashtableMemoryUsage = context.getParseCtx().getConf().getFloatVar(
            HiveConf.ConfVars.HIVEHASHTABLEFOLLOWBYGBYMAXMEMORYUSAGE);
      } else {
        hashtableMemoryUsage = context.getParseCtx().getConf().getFloatVar(
            HiveConf.ConfVars.HIVEHASHTABLEMAXMEMORYUSAGE);
      }
      hashTableSinkOp.getConf().setHashtableMemoryUsage(hashtableMemoryUsage);

      // get the last operator for processing big tables
      int bigTable = mapJoinOp.getConf().getPosBigTable();

      // the parent ops for hashTableSinkOp
      List<Operator<? extends OperatorDesc>> smallTablesParentOp =
        new ArrayList<Operator<? extends OperatorDesc>>();
      List<Operator<? extends OperatorDesc>> dummyOperators =
        new ArrayList<Operator<? extends OperatorDesc>>();
      // get all parents
      List<Operator<? extends OperatorDesc>> parentsOp = mapJoinOp.getParentOperators();
      for (int i = 0; i < parentsOp.size(); i++) {
        if (i == bigTable) {
          smallTablesParentOp.add(null);
          continue;
        }
        Operator<? extends OperatorDesc> parent = parentsOp.get(i);
        // let hashtable Op be the child of this parent
        parent.replaceChild(mapJoinOp, hashTableSinkOp);
        // keep the parent id correct
        smallTablesParentOp.add(parent);

        // create an new operator: HashTable DummyOpeator, which share the table desc
        HashTableDummyDesc desc = new HashTableDummyDesc();
        HashTableDummyOperator dummyOp = (HashTableDummyOperator) OperatorFactory.get(desc);
        TableDesc tbl;

        if (parent.getSchema() == null) {
          if (parent instanceof TableScanOperator) {
            tbl = ((TableScanOperator) parent).getTableDesc();
          } else {
            throw new SemanticException();
          }
        } else {
          // get parent schema
          RowSchema rowSchema = parent.getSchema();
          tbl = PlanUtils.getIntermediateFileTableDesc(PlanUtils.getFieldSchemasFromRowSchema(
              rowSchema, ""));
        }
        dummyOp.getConf().setTbl(tbl);
        // let the dummy op be the parent of mapjoin op
        mapJoinOp.replaceParent(parent, dummyOp);
        List<Operator<? extends OperatorDesc>> dummyChildren =
          new ArrayList<Operator<? extends OperatorDesc>>();
        dummyChildren.add(mapJoinOp);
        dummyOp.setChildOperators(dummyChildren);
        // add this dummy op to the dummp operator list
        dummyOperators.add(dummyOp);
      }
      hashTableSinkOp.setParentOperators(smallTablesParentOp);
      for (Operator<? extends OperatorDesc> op : dummyOperators) {
        context.addDummyParentOp(op);
      }
      return null;
    }
View Full Code Here

   *
   */
  public static class MapJoinFollowByProcessor implements NodeProcessor {
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs)
        throws SemanticException {
      LocalMapJoinProcCtx context = (LocalMapJoinProcCtx) ctx;
      if (!nd.getName().equals("GBY")) {
        return null;
      }
      context.setFollowedByGroupBy(true);
      GroupByOperator groupByOp = (GroupByOperator) nd;
      float groupByMemoryUsage = context.getParseCtx().getConf().getFloatVar(
          HiveConf.ConfVars.HIVEMAPJOINFOLLOWEDBYMAPAGGRHASHMEMORY);
      groupByOp.getConf().setGroupByMemoryUsage(groupByMemoryUsage);
      return null;
    }
View Full Code Here

   *
   */
  public static class LocalMapJoinProcessor implements NodeProcessor {
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs)
        throws SemanticException {
      LocalMapJoinProcCtx context = (LocalMapJoinProcCtx) ctx;
      if (!nd.getName().equals("MAPJOIN")) {
        return null;
      }
      MapJoinOperator mapJoinOp = (MapJoinOperator) nd;
      try {
        hasGroupBy(mapJoinOp, context);
      } catch (Exception e) {
        e.printStackTrace();
      }

      HashTableSinkDesc hashTableSinkDesc = new HashTableSinkDesc(mapJoinOp.getConf());
      HashTableSinkOperator hashTableSinkOp = (HashTableSinkOperator) OperatorFactory
          .get(hashTableSinkDesc);

      // set hashtable memory usage
      float hashtableMemoryUsage;
      if (context.isFollowedByGroupBy()) {
        hashtableMemoryUsage = context.getParseCtx().getConf().getFloatVar(
            HiveConf.ConfVars.HIVEHASHTABLEFOLLOWBYGBYMAXMEMORYUSAGE);
      } else {
        hashtableMemoryUsage = context.getParseCtx().getConf().getFloatVar(
            HiveConf.ConfVars.HIVEHASHTABLEMAXMEMORYUSAGE);
      }
      hashTableSinkOp.getConf().setHashtableMemoryUsage(hashtableMemoryUsage);

      // get the last operator for processing big tables
      int bigTable = mapJoinOp.getConf().getPosBigTable();
      Byte[] order = mapJoinOp.getConf().getTagOrder();
      int bigTableAlias = (int) order[bigTable];
      Operator<? extends Serializable> bigOp = mapJoinOp.getParentOperators().get(bigTable);

      // the parent ops for hashTableSinkOp
      List<Operator<? extends Serializable>> smallTablesParentOp = new ArrayList<Operator<? extends Serializable>>();
      List<Operator<? extends Serializable>> dummyOperators = new ArrayList<Operator<? extends Serializable>>();
      // get all parents
      List<Operator<? extends Serializable>> parentsOp = mapJoinOp.getParentOperators();
      for (int i = 0; i < parentsOp.size(); i++) {
        if (i == bigTableAlias) {
          smallTablesParentOp.add(null);
          continue;
        }
        Operator<? extends Serializable> parent = parentsOp.get(i);
        // let hashtable Op be the child of this parent
        parent.replaceChild(mapJoinOp, hashTableSinkOp);
        // keep the parent id correct
        smallTablesParentOp.add(parent);

        // create an new operator: HashTable DummyOpeator, which share the table desc
        HashTableDummyDesc desc = new HashTableDummyDesc();
        HashTableDummyOperator dummyOp = (HashTableDummyOperator) OperatorFactory.get(desc);
        TableDesc tbl;

        if (parent.getSchema() == null) {
          if (parent instanceof TableScanOperator) {
            tbl = ((TableScanOperator) parent).getTableDesc();
          } else {
            throw new SemanticException();
          }
        } else {
          // get parent schema
          RowSchema rowSchema = parent.getSchema();
          tbl = PlanUtils.getIntermediateFileTableDesc(PlanUtils.getFieldSchemasFromRowSchema(
              rowSchema, ""));
        }
        dummyOp.getConf().setTbl(tbl);
        // let the dummy op be the parent of mapjoin op
        mapJoinOp.replaceParent(parent, dummyOp);
        List<Operator<? extends Serializable>> dummyChildren = new ArrayList<Operator<? extends Serializable>>();
        dummyChildren.add(mapJoinOp);
        dummyOp.setChildOperators(dummyChildren);
        // add this dummy op to the dummp operator list
        dummyOperators.add(dummyOp);
      }
      hashTableSinkOp.setParentOperators(smallTablesParentOp);
      for (Operator<? extends Serializable> op : dummyOperators) {
        context.addDummyParentOp(op);
      }
      return null;
    }
View Full Code Here

TOP

Related Classes of org.apache.hadoop.hive.ql.optimizer.physical.MapJoinResolver.LocalMapJoinProcCtx

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.