Examples of LocalMapJoinProcCtx


Examples of org.apache.hadoop.hive.ql.optimizer.physical.MapJoinResolver.LocalMapJoinProcCtx

   *
   */
  public static class MapJoinFollowedByGroupByProcessor implements NodeProcessor {
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs)
        throws SemanticException {
      LocalMapJoinProcCtx context = (LocalMapJoinProcCtx) ctx;
      if (!nd.getName().equals("GBY")) {
        return null;
      }
      context.setFollowedByGroupBy(true);
      GroupByOperator groupByOp = (GroupByOperator) nd;
      float groupByMemoryUsage = context.getParseCtx().getConf().getFloatVar(
          HiveConf.ConfVars.HIVEMAPJOINFOLLOWEDBYMAPAGGRHASHMEMORY);
      groupByOp.getConf().setGroupByMemoryUsage(groupByMemoryUsage);
      return null;
    }
View Full Code Here

Examples of org.apache.hadoop.hive.ql.optimizer.physical.MapJoinResolver.LocalMapJoinProcCtx

   *
   */
  public static class LocalMapJoinProcessor implements NodeProcessor {
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs)
        throws SemanticException {
      LocalMapJoinProcCtx context = (LocalMapJoinProcCtx) ctx;
      if (!nd.getName().equals("MAPJOIN")) {
        return null;
      }
      MapJoinOperator mapJoinOp = (MapJoinOperator) nd;
      try {
        hasGroupBy(mapJoinOp, context);
      } catch (Exception e) {
        e.printStackTrace();
      }

      HashTableSinkDesc hashTableSinkDesc = new HashTableSinkDesc(mapJoinOp.getConf());
      HashTableSinkOperator hashTableSinkOp = (HashTableSinkOperator) OperatorFactory
          .get(hashTableSinkDesc);

      // set hashtable memory usage
      float hashtableMemoryUsage;
      if (context.isFollowedByGroupBy()) {
        hashtableMemoryUsage = context.getParseCtx().getConf().getFloatVar(
            HiveConf.ConfVars.HIVEHASHTABLEFOLLOWBYGBYMAXMEMORYUSAGE);
      } else {
        hashtableMemoryUsage = context.getParseCtx().getConf().getFloatVar(
            HiveConf.ConfVars.HIVEHASHTABLEMAXMEMORYUSAGE);
      }
      hashTableSinkOp.getConf().setHashtableMemoryUsage(hashtableMemoryUsage);

      // get the last operator for processing big tables
      int bigTable = mapJoinOp.getConf().getPosBigTable();
      Byte[] order = mapJoinOp.getConf().getTagOrder();
      int bigTableAlias = (int) order[bigTable];

      // the parent ops for hashTableSinkOp
      List<Operator<? extends OperatorDesc>> smallTablesParentOp =
        new ArrayList<Operator<? extends OperatorDesc>>();
      List<Operator<? extends OperatorDesc>> dummyOperators =
        new ArrayList<Operator<? extends OperatorDesc>>();
      // get all parents
      List<Operator<? extends OperatorDesc>> parentsOp = mapJoinOp.getParentOperators();
      for (int i = 0; i < parentsOp.size(); i++) {
        if (i == bigTableAlias) {
          smallTablesParentOp.add(null);
          continue;
        }
        Operator<? extends OperatorDesc> parent = parentsOp.get(i);
        // let hashtable Op be the child of this parent
        parent.replaceChild(mapJoinOp, hashTableSinkOp);
        // keep the parent id correct
        smallTablesParentOp.add(parent);

        // create an new operator: HashTable DummyOpeator, which share the table desc
        HashTableDummyDesc desc = new HashTableDummyDesc();
        HashTableDummyOperator dummyOp = (HashTableDummyOperator) OperatorFactory.get(desc);
        TableDesc tbl;

        if (parent.getSchema() == null) {
          if (parent instanceof TableScanOperator) {
            tbl = ((TableScanOperator) parent).getTableDesc();
          } else {
            throw new SemanticException();
          }
        } else {
          // get parent schema
          RowSchema rowSchema = parent.getSchema();
          tbl = PlanUtils.getIntermediateFileTableDesc(PlanUtils.getFieldSchemasFromRowSchema(
              rowSchema, ""));
        }
        dummyOp.getConf().setTbl(tbl);
        // let the dummy op be the parent of mapjoin op
        mapJoinOp.replaceParent(parent, dummyOp);
        List<Operator<? extends OperatorDesc>> dummyChildren =
          new ArrayList<Operator<? extends OperatorDesc>>();
        dummyChildren.add(mapJoinOp);
        dummyOp.setChildOperators(dummyChildren);
        // add this dummy op to the dummp operator list
        dummyOperators.add(dummyOp);
      }
      hashTableSinkOp.setParentOperators(smallTablesParentOp);
      for (Operator<? extends OperatorDesc> op : dummyOperators) {
        context.addDummyParentOp(op);
      }
      return null;
    }
View Full Code Here

Examples of org.apache.hadoop.hive.ql.optimizer.physical.MapJoinResolver.LocalMapJoinProcCtx

   *
   */
  public static class MapJoinFollowedByGroupByProcessor implements NodeProcessor {
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs)
        throws SemanticException {
      LocalMapJoinProcCtx context = (LocalMapJoinProcCtx) ctx;
      if (!nd.getName().equals("GBY")) {
        return null;
      }
      context.setFollowedByGroupBy(true);
      GroupByOperator groupByOp = (GroupByOperator) nd;
      float groupByMemoryUsage = context.getParseCtx().getConf().getFloatVar(
          HiveConf.ConfVars.HIVEMAPJOINFOLLOWEDBYMAPAGGRHASHMEMORY);
      groupByOp.getConf().setGroupByMemoryUsage(groupByMemoryUsage);
      return null;
    }
View Full Code Here

Examples of org.apache.hadoop.hive.ql.optimizer.physical.MapJoinResolver.LocalMapJoinProcCtx

   *
   */
  public static class LocalMapJoinProcessor implements NodeProcessor {
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs)
        throws SemanticException {
      LocalMapJoinProcCtx context = (LocalMapJoinProcCtx) ctx;
      if (!nd.getName().equals("MAPJOIN")) {
        return null;
      }
      MapJoinOperator mapJoinOp = (MapJoinOperator) nd;
      try {
        hasGroupBy(mapJoinOp, context);
      } catch (Exception e) {
        e.printStackTrace();
      }

      HashTableSinkDesc hashTableSinkDesc = new HashTableSinkDesc(mapJoinOp.getConf());
      HashTableSinkOperator hashTableSinkOp = (HashTableSinkOperator) OperatorFactory
          .get(hashTableSinkDesc);

      // set hashtable memory usage
      float hashtableMemoryUsage;
      if (context.isFollowedByGroupBy()) {
        hashtableMemoryUsage = context.getParseCtx().getConf().getFloatVar(
            HiveConf.ConfVars.HIVEHASHTABLEFOLLOWBYGBYMAXMEMORYUSAGE);
      } else {
        hashtableMemoryUsage = context.getParseCtx().getConf().getFloatVar(
            HiveConf.ConfVars.HIVEHASHTABLEMAXMEMORYUSAGE);
      }
      hashTableSinkOp.getConf().setHashtableMemoryUsage(hashtableMemoryUsage);

      // get the last operator for processing big tables
      int bigTable = mapJoinOp.getConf().getPosBigTable();
      Byte[] order = mapJoinOp.getConf().getTagOrder();
      int bigTableAlias = (int) order[bigTable];

      // the parent ops for hashTableSinkOp
      List<Operator<? extends Serializable>> smallTablesParentOp = new ArrayList<Operator<? extends Serializable>>();
      List<Operator<? extends Serializable>> dummyOperators = new ArrayList<Operator<? extends Serializable>>();
      // get all parents
      List<Operator<? extends Serializable>> parentsOp = mapJoinOp.getParentOperators();
      for (int i = 0; i < parentsOp.size(); i++) {
        if (i == bigTableAlias) {
          smallTablesParentOp.add(null);
          continue;
        }
        Operator<? extends Serializable> parent = parentsOp.get(i);
        // let hashtable Op be the child of this parent
        parent.replaceChild(mapJoinOp, hashTableSinkOp);
        // keep the parent id correct
        smallTablesParentOp.add(parent);

        // create an new operator: HashTable DummyOpeator, which share the table desc
        HashTableDummyDesc desc = new HashTableDummyDesc();
        HashTableDummyOperator dummyOp = (HashTableDummyOperator) OperatorFactory.get(desc);
        TableDesc tbl;

        if (parent.getSchema() == null) {
          if (parent instanceof TableScanOperator) {
            tbl = ((TableScanOperator) parent).getTableDesc();
          } else {
            throw new SemanticException();
          }
        } else {
          // get parent schema
          RowSchema rowSchema = parent.getSchema();
          tbl = PlanUtils.getIntermediateFileTableDesc(PlanUtils.getFieldSchemasFromRowSchema(
              rowSchema, ""));
        }
        dummyOp.getConf().setTbl(tbl);
        // let the dummy op be the parent of mapjoin op
        mapJoinOp.replaceParent(parent, dummyOp);
        List<Operator<? extends Serializable>> dummyChildren = new ArrayList<Operator<? extends Serializable>>();
        dummyChildren.add(mapJoinOp);
        dummyOp.setChildOperators(dummyChildren);
        // add this dummy op to the dummp operator list
        dummyOperators.add(dummyOp);
      }
      hashTableSinkOp.setParentOperators(smallTablesParentOp);
      for (Operator<? extends Serializable> op : dummyOperators) {
        context.addDummyParentOp(op);
      }
      return null;
    }
View Full Code Here

Examples of org.apache.hadoop.hive.ql.optimizer.physical.MapJoinResolver.LocalMapJoinProcCtx

   *
   */
  public static class MapJoinFollowedByGroupByProcessor implements NodeProcessor {
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs)
        throws SemanticException {
      LocalMapJoinProcCtx context = (LocalMapJoinProcCtx) ctx;
      if (!nd.getName().equals("GBY")) {
        return null;
      }
      context.setFollowedByGroupBy(true);
      GroupByOperator groupByOp = (GroupByOperator) nd;
      float groupByMemoryUsage = context.getParseCtx().getConf().getFloatVar(
          HiveConf.ConfVars.HIVEMAPJOINFOLLOWEDBYMAPAGGRHASHMEMORY);
      groupByOp.getConf().setGroupByMemoryUsage(groupByMemoryUsage);
      return null;
    }
View Full Code Here

Examples of org.apache.hadoop.hive.ql.optimizer.physical.MapJoinResolver.LocalMapJoinProcCtx

   *
   */
  public static class LocalMapJoinProcessor implements NodeProcessor {
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs)
        throws SemanticException {
      LocalMapJoinProcCtx context = (LocalMapJoinProcCtx) ctx;
      if (!nd.getName().equals("MAPJOIN")) {
        return null;
      }
      MapJoinOperator mapJoinOp = (MapJoinOperator) nd;
      try {
        hasGroupBy(mapJoinOp, context);
      } catch (Exception e) {
        e.printStackTrace();
      }

      // mapjoin should not affected by join reordering
      mapJoinOp.getConf().resetOrder();

      HiveConf conf = context.getParseCtx().getConf();

      HashTableSinkDesc hashTableSinkDesc = new HashTableSinkDesc(mapJoinOp.getConf());
      HashTableSinkOperator hashTableSinkOp = (HashTableSinkOperator) OperatorFactory
          .get(hashTableSinkDesc);

      // set hashtable memory usage
      float hashtableMemoryUsage;
      if (context.isFollowedByGroupBy()) {
        hashtableMemoryUsage = conf.getFloatVar(
            HiveConf.ConfVars.HIVEHASHTABLEFOLLOWBYGBYMAXMEMORYUSAGE);
      } else {
        hashtableMemoryUsage = conf.getFloatVar(
            HiveConf.ConfVars.HIVEHASHTABLEMAXMEMORYUSAGE);
      }
      mapJoinOp.getConf().setHashTableMemoryUsage(hashtableMemoryUsage);
      LOG.info("Setting max memory usage to " + hashtableMemoryUsage + " for table sink "
          + (context.isFollowedByGroupBy() ? "" : "not") + " followed by group by");
      hashTableSinkOp.getConf().setHashtableMemoryUsage(hashtableMemoryUsage);

      // get the last operator for processing big tables
      int bigTable = mapJoinOp.getConf().getPosBigTable();
      Byte[] orders = mapJoinOp.getConf().getTagOrder();

      // todo: support tez/vectorization
      boolean useNontaged = conf.getBoolVar(
          HiveConf.ConfVars.HIVECONVERTJOINUSENONSTAGED) &&
          conf.getVar(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("mr") &&
          !conf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED);

      // the parent ops for hashTableSinkOp
      List<Operator<? extends OperatorDesc>> smallTablesParentOp =
        new ArrayList<Operator<? extends OperatorDesc>>();
      List<Operator<? extends OperatorDesc>> dummyOperators =
        new ArrayList<Operator<? extends OperatorDesc>>();
      List<Operator<? extends OperatorDesc>> directOperators =
          new ArrayList<Operator<? extends OperatorDesc>>();
      // get all parents
      List<Operator<? extends OperatorDesc>> parentsOp = mapJoinOp.getParentOperators();
      for (int i = 0; i < parentsOp.size(); i++) {
        if (i == bigTable) {
          smallTablesParentOp.add(null);
          directOperators.add(null);
          continue;
        }
        Operator<? extends OperatorDesc> parent = parentsOp.get(i);
        boolean directFetchable = useNontaged &&
            (parent instanceof TableScanOperator || parent instanceof MapJoinOperator);
        if (directFetchable) {
          // no filter, no projection. no need to stage
          smallTablesParentOp.add(null);
          directOperators.add(parent);
          hashTableSinkDesc.getKeys().put(orders[i], null);
          hashTableSinkDesc.getExprs().put(orders[i], null);
          hashTableSinkDesc.getFilters().put(orders[i], null);
        } else {
          // keep the parent id correct
          smallTablesParentOp.add(parent);
          directOperators.add(null);
        }
        // let hashtable Op be the child of this parent
        parent.replaceChild(mapJoinOp, hashTableSinkOp);
        if (directFetchable) {
          parent.setChildOperators(null);
        }

        // create an new operator: HashTable DummyOpeator, which share the table desc
        HashTableDummyDesc desc = new HashTableDummyDesc();
        HashTableDummyOperator dummyOp = (HashTableDummyOperator) OperatorFactory.get(desc);
        TableDesc tbl;

        if (parent.getSchema() == null) {
          if (parent instanceof TableScanOperator) {
            tbl = ((TableScanOperator) parent).getTableDesc();
          } else {
            throw new SemanticException("Expected parent operator of type TableScanOperator." +
              "Found " + parent.getClass().getName() + " instead.");
          }
        } else {
          // get parent schema
          RowSchema rowSchema = parent.getSchema();
          tbl = PlanUtils.getIntermediateFileTableDesc(PlanUtils.getFieldSchemasFromRowSchema(
              rowSchema, ""));
        }
        dummyOp.getConf().setTbl(tbl);
        // let the dummy op be the parent of mapjoin op
        mapJoinOp.replaceParent(parent, dummyOp);
        List<Operator<? extends OperatorDesc>> dummyChildren =
          new ArrayList<Operator<? extends OperatorDesc>>();
        dummyChildren.add(mapJoinOp);
        dummyOp.setChildOperators(dummyChildren);
        // add this dummy op to the dummp operator list
        dummyOperators.add(dummyOp);
      }
      hashTableSinkOp.setParentOperators(smallTablesParentOp);
      for (Operator<? extends OperatorDesc> op : dummyOperators) {
        context.addDummyParentOp(op);
      }
      if (hasAnyDirectFetch(directOperators)) {
        context.addDirectWorks(mapJoinOp, directOperators);
      }
      return null;
    }
View Full Code Here

Examples of org.apache.hadoop.hive.ql.optimizer.physical.MapJoinResolver.LocalMapJoinProcCtx

   *
   */
  public static class MapJoinFollowedByGroupByProcessor implements NodeProcessor {
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs)
        throws SemanticException {
      LocalMapJoinProcCtx context = (LocalMapJoinProcCtx) ctx;
      if (!nd.getName().equals("GBY")) {
        return null;
      }
      context.setFollowedByGroupBy(true);
      GroupByOperator groupByOp = (GroupByOperator) nd;
      float groupByMemoryUsage = context.getParseCtx().getConf().getFloatVar(
          HiveConf.ConfVars.HIVEMAPJOINFOLLOWEDBYMAPAGGRHASHMEMORY);
      groupByOp.getConf().setGroupByMemoryUsage(groupByMemoryUsage);
      return null;
    }
View Full Code Here

Examples of org.apache.hadoop.hive.ql.optimizer.physical.MapJoinResolver.LocalMapJoinProcCtx

   *
   */
  public static class LocalMapJoinProcessor implements NodeProcessor {
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs)
        throws SemanticException {
      LocalMapJoinProcCtx context = (LocalMapJoinProcCtx) ctx;
      if (!nd.getName().equals("MAPJOIN")) {
        return null;
      }
      MapJoinOperator mapJoinOp = (MapJoinOperator) nd;
      try {
        hasGroupBy(mapJoinOp, context);
      } catch (Exception e) {
        e.printStackTrace();
      }

      MapJoinDesc mapJoinDesc = mapJoinOp.getConf();

      // mapjoin should not affected by join reordering
      mapJoinDesc.resetOrder();

      HiveConf conf = context.getParseCtx().getConf();
      // set hashtable memory usage
      float hashtableMemoryUsage;
      if (context.isFollowedByGroupBy()) {
        hashtableMemoryUsage = conf.getFloatVar(
            HiveConf.ConfVars.HIVEHASHTABLEFOLLOWBYGBYMAXMEMORYUSAGE);
      } else {
        hashtableMemoryUsage = conf.getFloatVar(
            HiveConf.ConfVars.HIVEHASHTABLEMAXMEMORYUSAGE);
      }
      mapJoinDesc.setHashTableMemoryUsage(hashtableMemoryUsage);
      LOG.info("Setting max memory usage to " + hashtableMemoryUsage + " for table sink "
          + (context.isFollowedByGroupBy() ? "" : "not") + " followed by group by");

      HashTableSinkDesc hashTableSinkDesc = new HashTableSinkDesc(mapJoinDesc);
      HashTableSinkOperator hashTableSinkOp = (HashTableSinkOperator) OperatorFactory
          .get(hashTableSinkDesc);

      // get the last operator for processing big tables
      int bigTable = mapJoinDesc.getPosBigTable();

      // todo: support tez/vectorization
      boolean useNontaged = conf.getBoolVar(
          HiveConf.ConfVars.HIVECONVERTJOINUSENONSTAGED) &&
          conf.getVar(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("mr") &&
          !conf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED);

      // the parent ops for hashTableSinkOp
      List<Operator<? extends OperatorDesc>> smallTablesParentOp =
        new ArrayList<Operator<? extends OperatorDesc>>();
      List<Operator<? extends OperatorDesc>> dummyOperators =
        new ArrayList<Operator<? extends OperatorDesc>>();
      List<Operator<? extends OperatorDesc>> directOperators =
          new ArrayList<Operator<? extends OperatorDesc>>();
      // get all parents
      List<Operator<? extends OperatorDesc>> parentsOp = mapJoinOp.getParentOperators();
      for (byte i = 0; i < parentsOp.size(); i++) {
        if (i == bigTable) {
          smallTablesParentOp.add(null);
          directOperators.add(null);
          continue;
        }
        Operator<? extends OperatorDesc> parent = parentsOp.get(i);
        boolean directFetchable = useNontaged &&
            (parent instanceof TableScanOperator || parent instanceof MapJoinOperator);
        if (directFetchable) {
          // no filter, no projection. no need to stage
          smallTablesParentOp.add(null);
          directOperators.add(parent);
          hashTableSinkDesc.getKeys().put(i, null);
          hashTableSinkDesc.getExprs().put(i, null);
          hashTableSinkDesc.getFilters().put(i, null);
        } else {
          // keep the parent id correct
          smallTablesParentOp.add(parent);
          directOperators.add(null);
          int[] valueIndex = mapJoinDesc.getValueIndex(i);
          if (valueIndex != null) {
            // remove values in key exprs
            // schema for value is already fixed in MapJoinProcessor#convertJoinOpMapJoinOp
            List<ExprNodeDesc> newValues = new ArrayList<ExprNodeDesc>();
            List<ExprNodeDesc> values = hashTableSinkDesc.getExprs().get(i);
            for (int index = 0; index < values.size(); index++) {
              if (valueIndex[index] < 0) {
                newValues.add(values.get(index));
              }
            }
            hashTableSinkDesc.getExprs().put(i, newValues);
          }
        }
        // let hashtable Op be the child of this parent
        parent.replaceChild(mapJoinOp, hashTableSinkOp);
        if (directFetchable) {
          parent.setChildOperators(null);
        }

        // create new operator: HashTable DummyOperator, which share the table desc
        HashTableDummyDesc desc = new HashTableDummyDesc();
        HashTableDummyOperator dummyOp = (HashTableDummyOperator) OperatorFactory.get(desc);
        TableDesc tbl;

        if (parent.getSchema() == null) {
          if (parent instanceof TableScanOperator) {
            tbl = ((TableScanOperator) parent).getTableDesc();
          } else {
            throw new SemanticException("Expected parent operator of type TableScanOperator." +
              "Found " + parent.getClass().getName() + " instead.");
          }
        } else {
          // get parent schema
          RowSchema rowSchema = parent.getSchema();
          tbl = PlanUtils.getIntermediateFileTableDesc(PlanUtils.getFieldSchemasFromRowSchema(
              rowSchema, ""));
        }
        dummyOp.getConf().setTbl(tbl);
        // let the dummy op be the parent of mapjoin op
        mapJoinOp.replaceParent(parent, dummyOp);
        List<Operator<? extends OperatorDesc>> dummyChildren =
          new ArrayList<Operator<? extends OperatorDesc>>();
        dummyChildren.add(mapJoinOp);
        dummyOp.setChildOperators(dummyChildren);
        // add this dummy op to the dummp operator list
        dummyOperators.add(dummyOp);
      }
      hashTableSinkOp.setParentOperators(smallTablesParentOp);
      for (Operator<? extends OperatorDesc> op : dummyOperators) {
        context.addDummyParentOp(op);
      }
      if (hasAnyDirectFetch(directOperators)) {
        context.addDirectWorks(mapJoinOp, directOperators);
      }
      return null;
    }
View Full Code Here

Examples of org.apache.hadoop.hive.ql.optimizer.physical.MapJoinResolver.LocalMapJoinProcCtx

   *
   */
  public static class MapJoinFollowedByGroupByProcessor implements NodeProcessor {
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs)
        throws SemanticException {
      LocalMapJoinProcCtx context = (LocalMapJoinProcCtx) ctx;
      if (!nd.getName().equals("GBY")) {
        return null;
      }
      context.setFollowedByGroupBy(true);
      GroupByOperator groupByOp = (GroupByOperator) nd;
      float groupByMemoryUsage = context.getParseCtx().getConf().getFloatVar(
          HiveConf.ConfVars.HIVEMAPJOINFOLLOWEDBYMAPAGGRHASHMEMORY);
      groupByOp.getConf().setGroupByMemoryUsage(groupByMemoryUsage);
      return null;
    }
View Full Code Here

Examples of org.apache.hadoop.hive.ql.optimizer.physical.MapJoinResolver.LocalMapJoinProcCtx

   *
   */
  public static class LocalMapJoinProcessor implements NodeProcessor {
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs)
        throws SemanticException {
      LocalMapJoinProcCtx context = (LocalMapJoinProcCtx) ctx;
      if (!nd.getName().equals("MAPJOIN")) {
        return null;
      }
      MapJoinOperator mapJoinOp = (MapJoinOperator) nd;
      try {
        hasGroupBy(mapJoinOp, context);
      } catch (Exception e) {
        e.printStackTrace();
      }

      // mapjoin should not affected by join reordering
      mapJoinOp.getConf().resetOrder();

      HiveConf conf = context.getParseCtx().getConf();

      HashTableSinkDesc hashTableSinkDesc = new HashTableSinkDesc(mapJoinOp.getConf());
      HashTableSinkOperator hashTableSinkOp = (HashTableSinkOperator) OperatorFactory
          .get(hashTableSinkDesc);

      // set hashtable memory usage
      float hashtableMemoryUsage;
      if (context.isFollowedByGroupBy()) {
        hashtableMemoryUsage = conf.getFloatVar(
            HiveConf.ConfVars.HIVEHASHTABLEFOLLOWBYGBYMAXMEMORYUSAGE);
      } else {
        hashtableMemoryUsage = conf.getFloatVar(
            HiveConf.ConfVars.HIVEHASHTABLEMAXMEMORYUSAGE);
      }
      mapJoinOp.getConf().setHashTableMemoryUsage(hashtableMemoryUsage);
      LOG.info("Setting max memory usage to " + hashtableMemoryUsage + " for table sink "
          + (context.isFollowedByGroupBy() ? "" : "not") + " followed by group by");
      hashTableSinkOp.getConf().setHashtableMemoryUsage(hashtableMemoryUsage);

      // get the last operator for processing big tables
      int bigTable = mapJoinOp.getConf().getPosBigTable();
      Byte[] orders = mapJoinOp.getConf().getTagOrder();

      // todo: support tez/vectorization
      boolean useNontaged = conf.getBoolVar(
          HiveConf.ConfVars.HIVECONVERTJOINUSENONSTAGED) &&
          conf.getVar(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("mr") &&
          !conf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED);

      // the parent ops for hashTableSinkOp
      List<Operator<? extends OperatorDesc>> smallTablesParentOp =
        new ArrayList<Operator<? extends OperatorDesc>>();
      List<Operator<? extends OperatorDesc>> dummyOperators =
        new ArrayList<Operator<? extends OperatorDesc>>();
      List<Operator<? extends OperatorDesc>> directOperators =
          new ArrayList<Operator<? extends OperatorDesc>>();
      // get all parents
      List<Operator<? extends OperatorDesc>> parentsOp = mapJoinOp.getParentOperators();
      for (int i = 0; i < parentsOp.size(); i++) {
        if (i == bigTable) {
          smallTablesParentOp.add(null);
          directOperators.add(null);
          continue;
        }
        Operator<? extends OperatorDesc> parent = parentsOp.get(i);
        boolean directFetchable = useNontaged &&
            (parent instanceof TableScanOperator || parent instanceof MapJoinOperator);
        if (directFetchable) {
          // no filter, no projection. no need to stage
          smallTablesParentOp.add(null);
          directOperators.add(parent);
          hashTableSinkDesc.getKeys().put(orders[i], null);
          hashTableSinkDesc.getExprs().put(orders[i], null);
          hashTableSinkDesc.getFilters().put(orders[i], null);
        } else {
          // keep the parent id correct
          smallTablesParentOp.add(parent);
          directOperators.add(null);
        }
        // let hashtable Op be the child of this parent
        parent.replaceChild(mapJoinOp, hashTableSinkOp);
        if (directFetchable) {
          parent.setChildOperators(null);
        }

        // create an new operator: HashTable DummyOpeator, which share the table desc
        HashTableDummyDesc desc = new HashTableDummyDesc();
        HashTableDummyOperator dummyOp = (HashTableDummyOperator) OperatorFactory.get(desc);
        TableDesc tbl;

        if (parent.getSchema() == null) {
          if (parent instanceof TableScanOperator) {
            tbl = ((TableScanOperator) parent).getTableDesc();
          } else {
            throw new SemanticException("Expected parent operator of type TableScanOperator." +
              "Found " + parent.getClass().getName() + " instead.");
          }
        } else {
          // get parent schema
          RowSchema rowSchema = parent.getSchema();
          tbl = PlanUtils.getIntermediateFileTableDesc(PlanUtils.getFieldSchemasFromRowSchema(
              rowSchema, ""));
        }
        dummyOp.getConf().setTbl(tbl);
        // let the dummy op be the parent of mapjoin op
        mapJoinOp.replaceParent(parent, dummyOp);
        List<Operator<? extends OperatorDesc>> dummyChildren =
          new ArrayList<Operator<? extends OperatorDesc>>();
        dummyChildren.add(mapJoinOp);
        dummyOp.setChildOperators(dummyChildren);
        // add this dummy op to the dummp operator list
        dummyOperators.add(dummyOp);
      }
      hashTableSinkOp.setParentOperators(smallTablesParentOp);
      for (Operator<? extends OperatorDesc> op : dummyOperators) {
        context.addDummyParentOp(op);
      }
      if (hasAnyDirectFetch(directOperators)) {
        context.addDirectWorks(mapJoinOp, directOperators);
      }
      return null;
    }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.