Package org.apache.hadoop.hive.ql.plan

Examples of org.apache.hadoop.hive.ql.plan.MapJoinDesc


      Operator<? extends OperatorDesc> reducer = clonePlan.getReduceWork().getReducer();
      assert reducer instanceof JoinOperator;
      JoinOperator cloneJoinOp = (JoinOperator) reducer;

      String dumpFilePrefix = "mapfile"+PlanUtils.getCountForMapJoinDumpFilePrefix();
      MapJoinDesc mapJoinDescriptor = new MapJoinDesc(newJoinKeys, keyTblDesc,
          newJoinValues, newJoinValueTblDesc, newJoinValueTblDesc,joinDescriptor
          .getOutputColumnNames(), i, joinDescriptor.getConds(),
          joinDescriptor.getFilters(), joinDescriptor.getNoOuterJoin(), dumpFilePrefix);
      mapJoinDescriptor.setTagOrder(tags);
      mapJoinDescriptor.setHandleSkewJoin(false);
      mapJoinDescriptor.setNullSafes(joinDescriptor.getNullSafes());

      MapredLocalWork localPlan = new MapredLocalWork(
          new LinkedHashMap<String, Operator<? extends OperatorDesc>>(),
          new LinkedHashMap<String, FetchWork>());
      Map<Byte, Path> smallTblDirs = smallKeysDirMap.get(src);
View Full Code Here


    TableScanDesc desc = op.getConf();
    return !desc.isGatherStats();
  }

  private boolean validateMapJoinOperator(MapJoinOperator op) {
    MapJoinDesc desc = op.getConf();
    return validateMapJoinDesc(desc);
  }
View Full Code Here

  static MapJoinOperator convertJoinOpMapJoinOp(HiveConf hconf,
      LinkedHashMap<Operator<? extends OperatorDesc>, OpParseContext> opParseCtxMap,
      JoinOperator op, QBJoinTree joinTree, int mapJoinPos, boolean noCheckOuterJoin)
      throws SemanticException {

    MapJoinDesc mapJoinDescriptor =
        getMapJoinDesc(hconf, opParseCtxMap, op, joinTree, mapJoinPos, noCheckOuterJoin);

    // reduce sink row resolver used to generate map join op
    RowResolver outputRS = opParseCtxMap.get(op).getRowResolver();
View Full Code Here

    // Create a new map join operator
    SMBJoinDesc smbJoinDesc = smbJoinOp.getConf();
    List<ExprNodeDesc> keyCols = smbJoinDesc.getKeys().get(Byte.valueOf((byte) 0));
    TableDesc keyTableDesc = PlanUtils.getMapJoinKeyTableDesc(hconf, PlanUtils
        .getFieldSchemasFromColumnList(keyCols, MAPJOINKEY_FIELDPREFIX));
    MapJoinDesc mapJoinDesc = new MapJoinDesc(smbJoinDesc.getKeys(),
        keyTableDesc, smbJoinDesc.getExprs(),
        smbJoinDesc.getValueTblDescs(), smbJoinDesc.getValueTblDescs(),
        smbJoinDesc.getOutputColumnNames(),
        bigTablePos, smbJoinDesc.getConds(),
        smbJoinDesc.getFilters(), smbJoinDesc.isNoOuterJoin(), smbJoinDesc.getDumpFilePrefix());

    mapJoinDesc.setStatistics(smbJoinDesc.getStatistics());

    RowResolver joinRS = opParseCtxMap.get(smbJoinOp).getRowResolver();
    // The mapjoin has the same schema as the join operator
    MapJoinOperator mapJoinOp = (MapJoinOperator) OperatorFactory.getAndMakeChild(
        mapJoinDesc, joinRS.getRowSchema(),
View Full Code Here

    List<String> outputColumnNames = op.getConf().getOutputColumnNames();
    TableDesc keyTableDesc =
        PlanUtils.getMapJoinKeyTableDesc(hconf,
            PlanUtils.getFieldSchemasFromColumnList(keyCols, MAPJOINKEY_FIELDPREFIX));
    JoinCondDesc[] joinCondns = op.getConf().getConds();
    MapJoinDesc mapJoinDescriptor =
        new MapJoinDesc(keyExprMap, keyTableDesc, newValueExprs, valueTableDescs,
            valueFilteredTableDescs, outputColumnNames, mapJoinPos, joinCondns, filters, op
                .getConf().getNoOuterJoin(), dumpFilePrefix);
    mapJoinDescriptor.setStatistics(op.getConf().getStatistics());
    mapJoinDescriptor.setTagOrder(tagOrder);
    mapJoinDescriptor.setNullSafes(desc.getNullSafes());
    mapJoinDescriptor.setFilterMap(desc.getFilterMap());
    if (!valueIndices.isEmpty()) {
      mapJoinDescriptor.setValueIndices(valueIndices);
    }

    return mapJoinDescriptor;
  }
View Full Code Here

    @Override
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
        Object... nodeOutputs) throws SemanticException {
      @SuppressWarnings("unchecked")
      AbstractMapJoinOperator<? extends MapJoinDesc> mjOp = (AbstractMapJoinOperator<? extends MapJoinDesc>) nd;
      MapJoinDesc mjDesc = mjOp.getConf();

      String bigTablAlias = mjDesc.getBigTableAlias();
      if ( bigTablAlias == null ) {
        Operator<? extends OperatorDesc> parent = null;
        for(Operator<? extends OperatorDesc> op : mjOp.getParentOperators() ) {
          if ( op instanceof TableScanOperator ) {
            parent = op;
          }
        }
        if ( parent != null) {
          TableScanDesc tDesc = ((TableScanOperator)parent).getConf();
          bigTablAlias = tDesc.getAlias();
        }
      }
      bigTablAlias = bigTablAlias == null ? "?" : bigTablAlias;

      List<ExprNodeDesc> joinExprs = mjDesc.getKeys().values().iterator().next();

      if ( joinExprs.size() == 0 ) {
        warnings.add(
            String.format("Map Join %s[bigTable=%s] in task '%s' is a cross product",
                mjOp.toString(), bigTablAlias, taskName));
View Full Code Here

        hasGroupBy(mapJoinOp, context);
      } catch (Exception e) {
        e.printStackTrace();
      }

      MapJoinDesc mapJoinDesc = mapJoinOp.getConf();

      // mapjoin should not affected by join reordering
      mapJoinDesc.resetOrder();

      HiveConf conf = context.getParseCtx().getConf();
      // set hashtable memory usage
      float hashtableMemoryUsage;
      if (context.isFollowedByGroupBy()) {
        hashtableMemoryUsage = conf.getFloatVar(
            HiveConf.ConfVars.HIVEHASHTABLEFOLLOWBYGBYMAXMEMORYUSAGE);
      } else {
        hashtableMemoryUsage = conf.getFloatVar(
            HiveConf.ConfVars.HIVEHASHTABLEMAXMEMORYUSAGE);
      }
      mapJoinDesc.setHashTableMemoryUsage(hashtableMemoryUsage);
      LOG.info("Setting max memory usage to " + hashtableMemoryUsage + " for table sink "
          + (context.isFollowedByGroupBy() ? "" : "not") + " followed by group by");

      HashTableSinkDesc hashTableSinkDesc = new HashTableSinkDesc(mapJoinDesc);
      HashTableSinkOperator hashTableSinkOp = (HashTableSinkOperator) OperatorFactory
          .get(hashTableSinkDesc);

      // get the last operator for processing big tables
      int bigTable = mapJoinDesc.getPosBigTable();

      // todo: support tez/vectorization
      boolean useNontaged = conf.getBoolVar(
          HiveConf.ConfVars.HIVECONVERTJOINUSENONSTAGED) &&
          conf.getVar(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("mr") &&
          !conf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED);

      // the parent ops for hashTableSinkOp
      List<Operator<? extends OperatorDesc>> smallTablesParentOp =
        new ArrayList<Operator<? extends OperatorDesc>>();
      List<Operator<? extends OperatorDesc>> dummyOperators =
        new ArrayList<Operator<? extends OperatorDesc>>();
      List<Operator<? extends OperatorDesc>> directOperators =
          new ArrayList<Operator<? extends OperatorDesc>>();
      // get all parents
      List<Operator<? extends OperatorDesc>> parentsOp = mapJoinOp.getParentOperators();
      for (byte i = 0; i < parentsOp.size(); i++) {
        if (i == bigTable) {
          smallTablesParentOp.add(null);
          directOperators.add(null);
          continue;
        }
        Operator<? extends OperatorDesc> parent = parentsOp.get(i);
        boolean directFetchable = useNontaged &&
            (parent instanceof TableScanOperator || parent instanceof MapJoinOperator);
        if (directFetchable) {
          // no filter, no projection. no need to stage
          smallTablesParentOp.add(null);
          directOperators.add(parent);
          hashTableSinkDesc.getKeys().put(i, null);
          hashTableSinkDesc.getExprs().put(i, null);
          hashTableSinkDesc.getFilters().put(i, null);
        } else {
          // keep the parent id correct
          smallTablesParentOp.add(parent);
          directOperators.add(null);
          int[] valueIndex = mapJoinDesc.getValueIndex(i);
          if (valueIndex != null) {
            // remove values in key exprs
            // schema for value is already fixed in MapJoinProcessor#convertJoinOpMapJoinOp
            List<ExprNodeDesc> newValues = new ArrayList<ExprNodeDesc>();
            List<ExprNodeDesc> values = hashTableSinkDesc.getExprs().get(i);
View Full Code Here

      return false;
    }

    MapJoinOperator mapJoinOp =
      convertJoinMapJoin(joinOp, context, bigTablePosition);
    MapJoinDesc joinDesc = mapJoinOp.getConf();
    joinDesc.setBucketMapJoin(true);

    // we can set the traits for this join operator
    OpTraits opTraits = new OpTraits(joinOp.getOpTraits().getBucketColNames(),
        tezBucketJoinProcCtx.getNumBuckets());
    mapJoinOp.setOpTraits(opTraits);
    setNumberOfBucketsOnChildren(mapJoinOp);

    // Once the conversion is done, we can set the partitioner to bucket cols on the small table   
    Map<String, Integer> bigTableBucketNumMapping = new HashMap<String, Integer>();
    bigTableBucketNumMapping.put(joinDesc.getBigTableAlias(), tezBucketJoinProcCtx.getNumBuckets());
    joinDesc.setBigTableBucketNumMapping(bigTableBucketNumMapping);
    LOG.info("Setting legacy map join to " + (!tezBucketJoinProcCtx.isSubQuery()));
    joinDesc.setCustomBucketMapJoin(!tezBucketJoinProcCtx.isSubQuery());

    return true;
  }
View Full Code Here

    List<String> outputColumnNames = op.getConf().getOutputColumnNames();
    TableDesc keyTableDesc = PlanUtils.getMapJoinKeyTableDesc(hconf,
        PlanUtils.getFieldSchemasFromColumnList(keyCols, MAPJOINKEY_FIELDPREFIX));
    JoinCondDesc[] joinCondns = op.getConf().getConds();
    MapJoinDesc mapJoinDescriptor = new MapJoinDesc(keyExprMap, keyTableDesc, newValueExprs,
        valueTableDescs, valueFiltedTableDescs, outputColumnNames, mapJoinPos, joinCondns,
        filters, op.getConf().getNoOuterJoin(), dumpFilePrefix);
    mapJoinDescriptor.setStatistics(op.getConf().getStatistics());
    mapJoinDescriptor.setTagOrder(tagOrder);
    mapJoinDescriptor.setNullSafes(desc.getNullSafes());
    mapJoinDescriptor.setFilterMap(desc.getFilterMap());

    // reduce sink row resolver used to generate map join op
    RowResolver outputRS = opParseCtxMap.get(op).getRowResolver();

    MapJoinOperator mapJoinOp = (MapJoinOperator) OperatorFactory.getAndMakeChild(
View Full Code Here

    // Create a new map join operator
    SMBJoinDesc smbJoinDesc = smbJoinOp.getConf();
    List<ExprNodeDesc> keyCols = smbJoinDesc.getKeys().get(Byte.valueOf((byte) 0));
    TableDesc keyTableDesc = PlanUtils.getMapJoinKeyTableDesc(hconf, PlanUtils
        .getFieldSchemasFromColumnList(keyCols, MAPJOINKEY_FIELDPREFIX));
    MapJoinDesc mapJoinDesc = new MapJoinDesc(smbJoinDesc.getKeys(),
        keyTableDesc, smbJoinDesc.getExprs(),
        smbJoinDesc.getValueTblDescs(), smbJoinDesc.getValueTblDescs(),
        smbJoinDesc.getOutputColumnNames(),
        bigTablePos, smbJoinDesc.getConds(),
        smbJoinDesc.getFilters(), smbJoinDesc.isNoOuterJoin(), smbJoinDesc.getDumpFilePrefix());

    mapJoinDesc.setStatistics(smbJoinDesc.getStatistics());

    RowResolver joinRS = opParseCtxMap.get(smbJoinOp).getRowResolver();
    // The mapjoin has the same schema as the join operator
    MapJoinOperator mapJoinOp = (MapJoinOperator) OperatorFactory.getAndMakeChild(
        mapJoinDesc, joinRS.getRowSchema(),
View Full Code Here

TOP

Related Classes of org.apache.hadoop.hive.ql.plan.MapJoinDesc

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.