Package org.apache.hadoop.hive.ql.exec

Examples of org.apache.hadoop.hive.ql.exec.JoinOperator


      newPlan.getPathToPartitionInfo().put(bigKeyDirPath.toString(), part);
      newPlan.getAliasToPartnInfo().put(alias, part);

      Operator<? extends OperatorDesc> reducer = clonePlan.getReduceWork().getReducer();
      assert reducer instanceof JoinOperator;
      JoinOperator cloneJoinOp = (JoinOperator) reducer;

      String dumpFilePrefix = "mapfile"+PlanUtils.getCountForMapJoinDumpFilePrefix();
      MapJoinDesc mapJoinDescriptor = new MapJoinDesc(newJoinKeys, keyTblDesc,
          newJoinValues, newJoinValueTblDesc, newJoinValueTblDesc,joinDescriptor
          .getOutputColumnNames(), i, joinDescriptor.getConds(),
          joinDescriptor.getFilters(), joinDescriptor.getNoOuterJoin(), dumpFilePrefix);
      mapJoinDescriptor.setTagOrder(tags);
      mapJoinDescriptor.setHandleSkewJoin(false);
      mapJoinDescriptor.setNullSafes(joinDescriptor.getNullSafes());

      MapredLocalWork localPlan = new MapredLocalWork(
          new LinkedHashMap<String, Operator<? extends OperatorDesc>>(),
          new LinkedHashMap<String, FetchWork>());
      Map<Byte, Path> smallTblDirs = smallKeysDirMap.get(src);

      for (int j = 0; j < numAliases; j++) {
        if (j == i) {
          continue;
        }
        Byte small_alias = tags[j];
        Operator<? extends OperatorDesc> tblScan_op2 = parentOps[j];
        localPlan.getAliasToWork().put(small_alias.toString(), tblScan_op2);
        Path tblDir = smallTblDirs.get(small_alias);
        localPlan.getAliasToFetchWork().put(small_alias.toString(),
            new FetchWork(tblDir, tableDescList.get(small_alias)));
      }

      newPlan.setMapLocalWork(localPlan);

      // construct a map join and set it as the child operator of tblScan_op
      MapJoinOperator mapJoinOp = (MapJoinOperator) OperatorFactory
          .getAndMakeChild(mapJoinDescriptor, (RowSchema) null, parentOps);
      // change the children of the original join operator to point to the map
      // join operator
      List<Operator<? extends OperatorDesc>> childOps = cloneJoinOp
          .getChildOperators();
      for (Operator<? extends OperatorDesc> childOp : childOps) {
        childOp.replaceParent(cloneJoinOp, mapJoinOp);
      }
      mapJoinOp.setChildOperators(childOps);
View Full Code Here


  @Override
  public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
      Object... nodeOutputs) throws SemanticException {

    JoinOperator joinOp = (JoinOperator) nd;
    SortBucketJoinProcCtx smbJoinContext = (SortBucketJoinProcCtx) procCtx;

    boolean convert =
        canConvertJoinToSMBJoin(
            joinOp, smbJoinContext, pGraphContext);
View Full Code Here

  public static class JoinRule implements NodeProcessor {

    @Override
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
        Object... nodeOutputs) throws SemanticException {
      JoinOperator joinOp = (JoinOperator)nd;
      List<List<String>> bucketColsList = new ArrayList<List<String>>();
      byte pos = 0;
      for (Operator<? extends OperatorDesc> parentOp : joinOp.getParentOperators()) {
        if (!(parentOp instanceof ReduceSinkOperator)) {
          // can be mux operator
          break;
        }
        ReduceSinkOperator rsOp = (ReduceSinkOperator)parentOp;
        if (rsOp.getOpTraits() == null) {
          ReduceSinkRule rsRule = new ReduceSinkRule();
          rsRule.process(rsOp, stack, procCtx, nodeOutputs);
        }
        bucketColsList.add(getOutputColNames(joinOp, rsOp, pos));
        pos++;
      }

      joinOp.setOpTraits(new OpTraits(bucketColsList, -1));
      return null;
    }
View Full Code Here

      // We are in the join operator now.

      SkewJoinOptProcCtx ctx = (SkewJoinOptProcCtx) procCtx;
      parseContext = ctx.getpGraphContext();

      JoinOperator joinOp = (JoinOperator)nd;
      // This join has already been processed
      if (ctx.getDoneJoins().contains(joinOp)) {
        return null;
      }

      ctx.getDoneJoins().add(joinOp);

      Operator<? extends OperatorDesc> currOp = joinOp;
      boolean processSelect = false;

      // Is there a select following
      // Clone the select also. It is useful for a follow-on optimization where the union
      // followed by a select star is completely removed.
      if ((joinOp.getChildOperators().size() == 1) &&
          (joinOp.getChildOperators().get(0) instanceof SelectOperator)) {
        currOp = joinOp.getChildOperators().get(0);
        processSelect = true;
      }

      List<TableScanOperator> tableScanOpsForJoin = new ArrayList<TableScanOperator>();
      if (!getTableScanOpsForJoin(joinOp, tableScanOpsForJoin)) {
        return null;
      }

      if ((tableScanOpsForJoin == null) || (tableScanOpsForJoin.isEmpty())) {
        return null;
      }

      // Get the skewed values in all the tables
      Map<List<ExprNodeDesc>, List<List<String>>> skewedValues =
        getSkewedValues(joinOp, tableScanOpsForJoin);

      // If there are no skewed values, nothing needs to be done
      if (skewedValues == null || skewedValues.size() == 0) {
        return null;
      }

      // After this optimization, the tree should be like:
      //  TS -> (FIL "skewed rows") * -> RS -
      //                                     \
      //                                       ->   JOIN
      //                                     /           \
      //  TS -> (FIL "skewed rows") * -> RS -             \
      //                                                   \
      //                                                     ->  UNION -> ..
      //                                                   /
      //  TS -> (FIL "no skewed rows") * -> RS -          /
      //                                        \        /
      //                                         -> JOIN
      //                                        /
      //  TS -> (FIL "no skewed rows") * -> RS -
      //

      // Create a clone of the operator
      Operator<? extends OperatorDesc> currOpClone;
      try {
        currOpClone = currOp.clone();
        insertRowResolvers(currOp, currOpClone, ctx);
      } catch (CloneNotSupportedException e) {
        LOG.debug("Operator tree could not be cloned");
        return null;
      }

      JoinOperator joinOpClone;
      if (processSelect) {
        joinOpClone = (JoinOperator)(currOpClone.getParentOperators().get(0));
      } else {
        joinOpClone = (JoinOperator)currOpClone;
      }
View Full Code Here

    return new NodeProcessor() {
      @Override
      public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
        Object... nodeOutputs) throws SemanticException {
        SortBucketJoinProcCtx smbJoinContext = (SortBucketJoinProcCtx)procCtx;
        JoinOperator joinOperator = (JoinOperator)nd;
        int size = stack.size();
        if (!(stack.get(size-1) instanceof JoinOperator) ||
            !(stack.get(size-2) instanceof ReduceSinkOperator)) {
          smbJoinContext.getRejectedJoinOps().add(joinOperator);
          return null;
View Full Code Here

      process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs)
          throws SemanticException {

    OptimizeTezProcContext context = (OptimizeTezProcContext) procCtx;

    JoinOperator joinOp = (JoinOperator) nd;

    TezBucketJoinProcCtx tezBucketJoinProcCtx = new TezBucketJoinProcCtx(context.conf);
    if (!context.conf.getBoolVar(HiveConf.ConfVars.HIVECONVERTJOIN)) {
      // we are just converting to a common merge join operator. The shuffle
      // join in map-reduce case.
      Object retval = checkAndConvertSMBJoin(context, joinOp, tezBucketJoinProcCtx);
      if (retval == null) {
        return retval;
      } else {
        int pos = 0; // it doesn't matter which position we use in this case.
        convertJoinSMBJoin(joinOp, context, pos, 0, false, false);
        return null;
      }
    }

    // if we have traits, and table info is present in the traits, we know the
    // exact number of buckets. Else choose the largest number of estimated
    // reducers from the parent operators.
    int numBuckets = -1;
    int estimatedBuckets = -1;
    if (context.conf.getBoolVar(HiveConf.ConfVars.HIVE_CONVERT_JOIN_BUCKET_MAPJOIN_TEZ)) {
      for (Operator<? extends OperatorDesc>parentOp : joinOp.getParentOperators()) {
        if (parentOp.getOpTraits().getNumBuckets() > 0) {
          numBuckets = (numBuckets < parentOp.getOpTraits().getNumBuckets()) ?
              parentOp.getOpTraits().getNumBuckets() : numBuckets;
        }

        if (parentOp instanceof ReduceSinkOperator) {
          ReduceSinkOperator rs = (ReduceSinkOperator)parentOp;
          estimatedBuckets = (estimatedBuckets < rs.getConf().getNumReducers()) ?
              rs.getConf().getNumReducers() : estimatedBuckets;
        }
      }

      if (numBuckets <= 0) {
        numBuckets = estimatedBuckets;
        if (numBuckets <= 0) {
          numBuckets = 1;
        }
      }
    } else {
      numBuckets = 1;
    }
    LOG.info("Estimated number of buckets " + numBuckets);
    int mapJoinConversionPos = getMapJoinConversionPos(joinOp, context, numBuckets);
    if (mapJoinConversionPos < 0) {
      Object retval = checkAndConvertSMBJoin(context, joinOp, tezBucketJoinProcCtx);
      if (retval == null) {
        return retval;
      } else {
          // only case is full outer join with SMB enabled which is not possible. Convert to regular
          // join.
          convertJoinSMBJoin(joinOp, context, 0, 0, false, false);
          return null;
      }
    }

    if (numBuckets > 1) {
      if (context.conf.getBoolVar(HiveConf.ConfVars.HIVE_CONVERT_JOIN_BUCKET_MAPJOIN_TEZ)) {
        if (convertJoinBucketMapJoin(joinOp, context, mapJoinConversionPos, tezBucketJoinProcCtx)) {
          return null;
        }
      }
    }

    LOG.info("Convert to non-bucketed map join");
    // check if we can convert to map join no bucket scaling.
    mapJoinConversionPos = getMapJoinConversionPos(joinOp, context, 1);
    if (mapJoinConversionPos < 0) {
      // we are just converting to a common merge join operator. The shuffle
      // join in map-reduce case.
      int pos = 0; // it doesn't matter which position we use in this case.
      convertJoinSMBJoin(joinOp, context, pos, 0, false, false);
      return null;
    }

    MapJoinOperator mapJoinOp = convertJoinMapJoin(joinOp, context, mapJoinConversionPos);
    // map join operator by default has no bucket cols
    mapJoinOp.setOpTraits(new OpTraits(null, -1, null));
    mapJoinOp.setStatistics(joinOp.getStatistics());
    // propagate this change till the next RS
    for (Operator<? extends OperatorDesc> childOp : mapJoinOp.getChildOperators()) {
      setAllChildrenTraitsToNull(childOp);
    }
View Full Code Here

      // be called for leafs.
      assert(!stack.isEmpty());

      // LineageCtx
      LineageCtx lCtx = (LineageCtx) procCtx;
      JoinOperator op = (JoinOperator)nd;
      JoinDesc jd = op.getConf();

      // The input operator to the join is always a reduce sink operator
      ReduceSinkOperator inpOp = (ReduceSinkOperator)getParent(stack);
      ReduceSinkDesc rd = inpOp.getConf();
      int tag = rd.getTag();

      // Iterate over the outputs of the join operator and merge the
      // dependencies of the columns that corresponding to the tag.
      int cnt = 0;
      List<ExprNodeDesc> exprs = jd.getExprs().get((byte)tag);
      for(ColumnInfo ci : op.getSchema().getSignature()) {
        if (jd.getReversedExprs().get(ci.getInternalName()) != tag) {
          continue;
        }

        // Otherwise look up the expression corresponding to this ci
View Full Code Here

   */
  public static class ColumnPrunerJoinProc implements NodeProcessor {
    @Override
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx,
        Object... nodeOutputs) throws SemanticException {
      JoinOperator op = (JoinOperator) nd;
      pruneJoinOperator(ctx, op, op.getConf(), op.getColumnExprMap(), null,
          false);
      return null;
    }
View Full Code Here

      OpWalkerInfo owi = (OpWalkerInfo) procCtx;
      if (operator.getNumChild() == 1 &&
          operator.getChildOperators().get(0) instanceof JoinOperator) {
        if (HiveConf.getBoolVar(owi.getParseContext().getConf(),
            HiveConf.ConfVars.HIVEPPDRECOGNIZETRANSITIVITY)) {
          JoinOperator child = (JoinOperator) operator.getChildOperators().get(0);
          int targetPos = child.getParentOperators().indexOf(operator);
          applyFilterTransitivity(child, targetPos, owi);
        }
      }
      return null;
    }
View Full Code Here

        return null;
      }

      if (op.getChildOperators().size() == 1
          && op.getChildOperators().get(0) instanceof JoinOperator) {
        JoinOperator joinOp = (JoinOperator) op.getChildOperators().get(0);
        if (skipFolding(joinOp.getConf(), rsDesc.getTag())) {
          LOG.debug("Skip folding in outer join " + op);
          cppCtx.getOpToConstantExprs().put(op, new HashMap<ColumnInfo, ExprNodeDesc>());
          return null;
        }
      }
View Full Code Here

TOP

Related Classes of org.apache.hadoop.hive.ql.exec.JoinOperator

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.