Package org.apache.hadoop.hive.ql.plan

Examples of org.apache.hadoop.hive.ql.plan.joinDesc


    JoinOperator op, QBJoinTree joinTree, int mapJoinPos, boolean noCheckOuterJoin,
    boolean validateMapJoinTree)
    throws SemanticException {

    // outer join cannot be performed on a table which is being cached
    JoinDesc desc = op.getConf();
    JoinCondDesc[] condns = desc.getConds();
    Byte[] tagOrder = desc.getTagOrder();

    if (!noCheckOuterJoin) {
      if (checkMapJoin(mapJoinPos, condns) < 0) {
        throw new SemanticException(ErrorMsg.NO_OUTER_MAPJOIN.getMsg());
      }
View Full Code Here


  }

  public static MapJoinDesc getMapJoinDesc(HiveConf hconf,
      LinkedHashMap<Operator<? extends OperatorDesc>, OpParseContext> opParseCtxMap,
      JoinOperator op, QBJoinTree joinTree, int mapJoinPos, boolean noCheckOuterJoin) throws SemanticException {
    JoinDesc desc = op.getConf();
    JoinCondDesc[] condns = desc.getConds();
    Byte[] tagOrder = desc.getTagOrder();

    // outer join cannot be performed on a table which is being cached
    if (!noCheckOuterJoin) {
      if (checkMapJoin(mapJoinPos, condns) < 0) {
        throw new SemanticException(ErrorMsg.NO_OUTER_MAPJOIN.getMsg());
      }
    }

    // Walk over all the sources (which are guaranteed to be reduce sink
    // operators).
    // The join outputs a concatenation of all the inputs.
    QBJoinTree leftSrc = joinTree.getJoinSrc();
    List<ReduceSinkOperator> oldReduceSinkParentOps =
        new ArrayList<ReduceSinkOperator>(op.getNumParent());
    if (leftSrc != null) {
      // assert mapJoinPos == 0;
      Operator<? extends OperatorDesc> parentOp = op.getParentOperators().get(0);
      assert parentOp.getParentOperators().size() == 1;
      oldReduceSinkParentOps.add((ReduceSinkOperator) parentOp);
    }

    byte pos = 0;
    for (String src : joinTree.getBaseSrc()) {
      if (src != null) {
        Operator<? extends OperatorDesc> parentOp = op.getParentOperators().get(pos);
        assert parentOp.getParentOperators().size() == 1;
        oldReduceSinkParentOps.add((ReduceSinkOperator) parentOp);
      }
      pos++;
    }

    Map<String, ExprNodeDesc> colExprMap = op.getColumnExprMap();
    List<ColumnInfo> schema = new ArrayList<ColumnInfo>(op.getSchema().getSignature());
    Map<Byte, List<ExprNodeDesc>> valueExprs = op.getConf().getExprs();
    Map<Byte, List<ExprNodeDesc>> newValueExprs = new HashMap<Byte, List<ExprNodeDesc>>();
    for (Map.Entry<Byte, List<ExprNodeDesc>> entry : valueExprs.entrySet()) {
      byte tag = entry.getKey();
      Operator<?> terminal = oldReduceSinkParentOps.get(tag);

      List<ExprNodeDesc> values = entry.getValue();
      List<ExprNodeDesc> newValues = ExprNodeDescUtils.backtrack(values, op, terminal);
      newValueExprs.put(tag, newValues);
      for (int i = 0; i < schema.size(); i++) {
        ColumnInfo column = schema.get(i);
        if (column == null) {
          continue;
        }
        ExprNodeDesc expr = colExprMap.get(column.getInternalName());
        int index = ExprNodeDescUtils.indexOf(expr, values);
        if (index >= 0) {
          colExprMap.put(column.getInternalName(), newValues.get(index));
          schema.set(i, null);
        }
      }
    }

    // rewrite value index for mapjoin
    Map<Byte, int[]> valueIndices = new HashMap<Byte, int[]>();

    // get the join keys from old parent ReduceSink operators
    Map<Byte, List<ExprNodeDesc>> keyExprMap = new HashMap<Byte, List<ExprNodeDesc>>();

    // construct valueTableDescs and valueFilteredTableDescs
    List<TableDesc> valueTableDescs = new ArrayList<TableDesc>();
    List<TableDesc> valueFilteredTableDescs = new ArrayList<TableDesc>();
    int[][] filterMap = desc.getFilterMap();
    for (pos = 0; pos < op.getParentOperators().size(); pos++) {
      ReduceSinkOperator inputRS = oldReduceSinkParentOps.get(pos);
      List<ExprNodeDesc> keyCols = inputRS.getConf().getKeyCols();
      List<ExprNodeDesc> valueCols = newValueExprs.get(pos);
      if (pos != mapJoinPos) {
        // remove values in key exprs for value table schema
        // value expression for hashsink will be modified in
        // LocalMapJoinProcessor
        int[] valueIndex = new int[valueCols.size()];
        List<ExprNodeDesc> valueColsInValueExpr = new ArrayList<ExprNodeDesc>();
        for (int i = 0; i < valueIndex.length; i++) {
          ExprNodeDesc expr = valueCols.get(i);
          int kindex = ExprNodeDescUtils.indexOf(expr, keyCols);
          if (kindex >= 0) {
            valueIndex[i] = kindex;
          } else {
            valueIndex[i] = -valueColsInValueExpr.size() - 1;
            valueColsInValueExpr.add(expr);
          }
        }
        if (needValueIndex(valueIndex)) {
          valueIndices.put(pos, valueIndex);
        }
        valueCols = valueColsInValueExpr;
      }
      // deep copy expr node desc
      List<ExprNodeDesc> valueFilteredCols = ExprNodeDescUtils.clone(valueCols);
      if (filterMap != null && filterMap[pos] != null && pos != mapJoinPos) {
        ExprNodeColumnDesc isFilterDesc =
            new ExprNodeColumnDesc(
                TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.SMALLINT_TYPE_NAME), "filter",
                "filter", false);
        valueFilteredCols.add(isFilterDesc);
      }

      TableDesc valueTableDesc =
          PlanUtils.getMapJoinValueTableDesc(PlanUtils.getFieldSchemasFromColumnList(valueCols,
              "mapjoinvalue"));
      TableDesc valueFilteredTableDesc =
          PlanUtils.getMapJoinValueTableDesc(PlanUtils.getFieldSchemasFromColumnList(
              valueFilteredCols, "mapjoinvalue"));

      valueTableDescs.add(valueTableDesc);
      valueFilteredTableDescs.add(valueFilteredTableDesc);

      keyExprMap.put(pos, keyCols);
    }

    Map<Byte, List<ExprNodeDesc>> filters = desc.getFilters();
    Map<Byte, List<ExprNodeDesc>> newFilters = new HashMap<Byte, List<ExprNodeDesc>>();
    for (Map.Entry<Byte, List<ExprNodeDesc>> entry : filters.entrySet()) {
      byte srcTag = entry.getKey();
      List<ExprNodeDesc> filter = entry.getValue();

      Operator<?> terminal = oldReduceSinkParentOps.get(srcTag);
      newFilters.put(srcTag, ExprNodeDescUtils.backtrack(filter, op, terminal));
    }
    desc.setFilters(filters = newFilters);

    // create dumpfile prefix needed to create descriptor
    String dumpFilePrefix = "";
    if (joinTree.getMapAliases() != null) {
      for (String mapAlias : joinTree.getMapAliases()) {
        dumpFilePrefix = dumpFilePrefix + mapAlias;
      }
      dumpFilePrefix = dumpFilePrefix + "-" + PlanUtils.getCountForMapJoinDumpFilePrefix();
    } else {
      dumpFilePrefix = "mapfile" + PlanUtils.getCountForMapJoinDumpFilePrefix();
    }

    List<ExprNodeDesc> keyCols = keyExprMap.get((byte) mapJoinPos);

    List<String> outputColumnNames = op.getConf().getOutputColumnNames();
    TableDesc keyTableDesc =
        PlanUtils.getMapJoinKeyTableDesc(hconf,
            PlanUtils.getFieldSchemasFromColumnList(keyCols, MAPJOINKEY_FIELDPREFIX));
    JoinCondDesc[] joinCondns = op.getConf().getConds();
    MapJoinDesc mapJoinDescriptor =
        new MapJoinDesc(keyExprMap, keyTableDesc, newValueExprs, valueTableDescs,
            valueFilteredTableDescs, outputColumnNames, mapJoinPos, joinCondns, filters, op
                .getConf().getNoOuterJoin(), dumpFilePrefix);
    mapJoinDescriptor.setStatistics(op.getConf().getStatistics());
    mapJoinDescriptor.setTagOrder(tagOrder);
    mapJoinDescriptor.setNullSafes(desc.getNullSafes());
    mapJoinDescriptor.setFilterMap(desc.getFilterMap());
    if (!valueIndices.isEmpty()) {
      mapJoinDescriptor.setValueIndices(valueIndices);
    }

    return mapJoinDescriptor;
View Full Code Here

        LOG.info("Cannot guess if CommonJoinOperator will optimize " +
            joinOp.getName() + " " + joinOp.getIdentifier());
        continue;
      }

      JoinDesc joinDesc = joinOp.getConf();
      Byte[] order = joinDesc.getTagOrder();
      int numAliases = order.length;
      Set<Integer> bigTableCandidates =
          MapJoinProcessor.getBigTableCandidates(joinDesc.getConds());
      if (bigTableCandidates.isEmpty()) {
        continue;
      }

      long ThresholdOfSmallTblSizeSum = HiveConf.getLongVar(pCtx.getConf(),
View Full Code Here

    for (int i = 0; i < join.getJoinCond().length; i++) {
      JoinCond condn = join.getJoinCond()[i];
      joinCondns[i] = new JoinCondDesc(condn);
    }

    JoinDesc desc = new JoinDesc(exprMap, outputColumnNames,
        join.getNoOuterJoin(), joinCondns, filterMap);
    desc.setReversedExprs(reversedExprs);
    desc.setFilterMap(join.getFilterMap());

    JoinOperator joinOp = (JoinOperator) OperatorFactory.getAndMakeChild(desc,
        new RowSchema(outputRR.getColumnInfos()), rightOps);
    joinOp.setColumnExprMap(colExprMap);
    joinOp.setPosToAliasMap(posToAliasMap);

    if (join.getNullSafes() != null) {
      boolean[] nullsafes = new boolean[join.getNullSafes().size()];
      for (int i = 0; i < nullsafes.length; i++) {
        nullsafes[i] = join.getNullSafes().get(i);
      }
      desc.setNullSafes(nullsafes);
    }
    return putOpInsertMap(joinOp, outputRR);
  }
View Full Code Here

    noOuterJoin = joinOp.noOuterJoin;
  }

  public void initiliaze(Configuration hconf) {
    this.hconf = hconf;
    JoinDesc desc = joinOp.getConf();
    skewKeyDefinition = desc.getSkewKeyDefinition();
    skewKeysTableObjectInspector = new HashMap<Byte, StructObjectInspector>(
        numAliases);
    tblDesc = desc.getSkewKeysValuesTables();
    tblSerializers = new HashMap<Byte, SerDe>(numAliases);
    bigKeysExistingMap = new HashMap<Byte, Boolean>(numAliases);
    taskId = Utilities.getTaskId(hconf);

    int[][] filterMap = desc.getFilterMap();
    for (int i = 0; i < numAliases; i++) {
      Byte alias = conf.getTagOrder()[i];
      List<ObjectInspector> skewTableKeyInspectors = new ArrayList<ObjectInspector>();
      StructObjectInspector soi = (StructObjectInspector) joinOp.inputObjInspectors[alias];
      StructField sf = soi.getStructFieldRef(Utilities.ReduceField.KEY
          .toString());
      List<? extends StructField> keyFields = ((StructObjectInspector) sf
          .getFieldObjectInspector()).getAllStructFieldRefs();
      int keyFieldSize = keyFields.size();
      for (int k = 0; k < keyFieldSize; k++) {
        skewTableKeyInspectors.add(keyFields.get(k).getFieldObjectInspector());
      }
      TableDesc joinKeyDesc = desc.getKeyTableDesc();
      List<String> keyColNames = Utilities.getColumnNames(joinKeyDesc
          .getProperties());
      StructObjectInspector structTblKeyInpector = ObjectInspectorFactory
          .getStandardStructObjectInspector(keyColNames, skewTableKeyInspectors);
View Full Code Here

        LOG.info("Cannot guess if CommonJoinOperator will optimize " +
            joinOp.getName() + " " + joinOp.getIdentifier());
        continue;
      }

      JoinDesc joinDesc = joinOp.getConf();
      Byte[] order = joinDesc.getTagOrder();
      int numAliases = order.length;
      Set<Integer> bigTableCandidates =
          MapJoinProcessor.getBigTableCandidates(joinDesc.getConds());
      if (bigTableCandidates.isEmpty()) {
        continue;
      }

      long ThresholdOfSmallTblSizeSum = HiveConf.getLongVar(pCtx.getConf(),
View Full Code Here

    JoinOperator op, QBJoinTree joinTree, int mapJoinPos, boolean noCheckOuterJoin,
    boolean validateMapJoinTree)
    throws SemanticException {

    // outer join cannot be performed on a table which is being cached
    JoinDesc desc = op.getConf();
    JoinCondDesc[] condns = desc.getConds();
    Byte[] tagOrder = desc.getTagOrder();

    if (!noCheckOuterJoin) {
      if (checkMapJoin(mapJoinPos, condns) < 0) {
        throw new SemanticException(ErrorMsg.NO_OUTER_MAPJOIN.getMsg());
      }
View Full Code Here

  public static MapJoinOperator convertJoinOpMapJoinOp(HiveConf hconf,
      LinkedHashMap<Operator<? extends OperatorDesc>, OpParseContext> opParseCtxMap,
      JoinOperator op, QBJoinTree joinTree, int mapJoinPos, boolean noCheckOuterJoin)
      throws SemanticException {

    JoinDesc desc = op.getConf();
    JoinCondDesc[] condns = desc.getConds();
    Byte[] tagOrder = desc.getTagOrder();

    // outer join cannot be performed on a table which is being cached
    if (!noCheckOuterJoin) {
      if (checkMapJoin(mapJoinPos, condns) < 0) {
        throw new SemanticException(ErrorMsg.NO_OUTER_MAPJOIN.getMsg());
      }
    }

    Map<Byte, List<ExprNodeDesc>> keyExprMap = new HashMap<Byte, List<ExprNodeDesc>>();

    // Walk over all the sources (which are guaranteed to be reduce sink
    // operators).
    // The join outputs a concatenation of all the inputs.
    QBJoinTree leftSrc = joinTree.getJoinSrc();
    List<Operator<? extends OperatorDesc>> oldReduceSinkParentOps =
        new ArrayList<Operator<? extends OperatorDesc>>();
    if (leftSrc != null) {
      // assert mapJoinPos == 0;
      Operator<? extends OperatorDesc> parentOp = op.getParentOperators().get(0);
      assert parentOp.getParentOperators().size() == 1;
      oldReduceSinkParentOps.add(parentOp);
    }


    byte pos = 0;
    for (String src : joinTree.getBaseSrc()) {
      if (src != null) {
        Operator<? extends OperatorDesc> parentOp = op.getParentOperators().get(pos);
        assert parentOp.getParentOperators().size() == 1;
        oldReduceSinkParentOps.add(parentOp);
      }
      pos++;
    }

    // get the join keys from old parent ReduceSink operators
    for (pos = 0; pos < op.getParentOperators().size(); pos++) {
      ReduceSinkOperator parent = (ReduceSinkOperator) oldReduceSinkParentOps.get(pos);
      ReduceSinkDesc rsconf = parent.getConf();
      List<ExprNodeDesc> keys = rsconf.getKeyCols();
      keyExprMap.put(pos, keys);
    }

    List<ExprNodeDesc> keyCols = keyExprMap.get(Byte.valueOf((byte) 0));
    StringBuilder keyOrder = new StringBuilder();
    for (int i = 0; i < keyCols.size(); i++) {
      keyOrder.append("+");
    }

    Map<String, ExprNodeDesc> colExprMap = op.getColumnExprMap();
    List<ColumnInfo> schema = new ArrayList<ColumnInfo>(op.getSchema().getSignature());
    Map<Byte, List<ExprNodeDesc>> valueExprs = op.getConf().getExprs();
    Map<Byte, List<ExprNodeDesc>> newValueExprs = new HashMap<Byte, List<ExprNodeDesc>>();
    for (Map.Entry<Byte, List<ExprNodeDesc>> entry : valueExprs.entrySet()) {
      byte tag = entry.getKey();
      Operator<?> terminal = oldReduceSinkParentOps.get(tag);

      List<ExprNodeDesc> values = entry.getValue();
      List<ExprNodeDesc> newValues = ExprNodeDescUtils.backtrack(values, op, terminal);
      newValueExprs.put(tag, newValues);
      for (int i = 0; i < schema.size(); i++) {
        ColumnInfo column = schema.get(i);
        if (column == null) {
          continue;
        }
        ExprNodeDesc expr = colExprMap.get(column.getInternalName());
        int index = ExprNodeDescUtils.indexOf(expr, values);
        if (index >= 0) {
          colExprMap.put(column.getInternalName(), newValues.get(index));
          schema.set(i, null);
        }
      }
    }

    // construct valueTableDescs and valueFilteredTableDescs
    List<TableDesc> valueTableDescs = new ArrayList<TableDesc>();
    List<TableDesc> valueFiltedTableDescs = new ArrayList<TableDesc>();
    int[][] filterMap = desc.getFilterMap();
    for (pos = 0; pos < op.getParentOperators().size(); pos++) {
      List<ExprNodeDesc> valueCols = newValueExprs.get(Byte.valueOf((byte) pos));
      int length = valueCols.size();
      List<ExprNodeDesc> valueFilteredCols = new ArrayList<ExprNodeDesc>(length);
      // deep copy expr node desc
      for (int i = 0; i < length; i++) {
        valueFilteredCols.add(valueCols.get(i).clone());
      }
      if (filterMap != null && filterMap[pos] != null && pos != mapJoinPos) {
        ExprNodeColumnDesc isFilterDesc = new ExprNodeColumnDesc(TypeInfoFactory
            .getPrimitiveTypeInfo(serdeConstants.SMALLINT_TYPE_NAME), "filter", "filter", false);
        valueFilteredCols.add(isFilterDesc);
      }


      keyOrder = new StringBuilder();
      for (int i = 0; i < valueCols.size(); i++) {
        keyOrder.append("+");
      }

      TableDesc valueTableDesc = PlanUtils.getMapJoinValueTableDesc(PlanUtils
          .getFieldSchemasFromColumnList(valueCols, "mapjoinvalue"));
      TableDesc valueFilteredTableDesc = PlanUtils.getMapJoinValueTableDesc(PlanUtils
          .getFieldSchemasFromColumnList(valueFilteredCols, "mapjoinvalue"));

      valueTableDescs.add(valueTableDesc);
      valueFiltedTableDescs.add(valueFilteredTableDesc);
    }

    Map<Byte, List<ExprNodeDesc>> filters = desc.getFilters();
    Map<Byte, List<ExprNodeDesc>> newFilters = new HashMap<Byte, List<ExprNodeDesc>>();
    for (Map.Entry<Byte, List<ExprNodeDesc>> entry : filters.entrySet()) {
      byte srcTag = entry.getKey();
      List<ExprNodeDesc> filter = entry.getValue();

      Operator<?> terminal = op.getParentOperators().get(srcTag);
      newFilters.put(srcTag, ExprNodeDescUtils.backtrack(filter, op, terminal));
    }
    desc.setFilters(filters = newFilters);

    // create dumpfile prefix needed to create descriptor
    String dumpFilePrefix = "";
    if( joinTree.getMapAliases() != null ) {
      for(String mapAlias : joinTree.getMapAliases()) {
        dumpFilePrefix = dumpFilePrefix + mapAlias;
      }
      dumpFilePrefix = dumpFilePrefix+"-"+PlanUtils.getCountForMapJoinDumpFilePrefix();
    } else {
      dumpFilePrefix = "mapfile"+PlanUtils.getCountForMapJoinDumpFilePrefix();
    }

    List<String> outputColumnNames = op.getConf().getOutputColumnNames();
    TableDesc keyTableDesc = PlanUtils.getMapJoinKeyTableDesc(hconf,
        PlanUtils.getFieldSchemasFromColumnList(keyCols, MAPJOINKEY_FIELDPREFIX));
    JoinCondDesc[] joinCondns = op.getConf().getConds();
    MapJoinDesc mapJoinDescriptor = new MapJoinDesc(keyExprMap, keyTableDesc, newValueExprs,
        valueTableDescs, valueFiltedTableDescs, outputColumnNames, mapJoinPos, joinCondns,
        filters, op.getConf().getNoOuterJoin(), dumpFilePrefix);
    mapJoinDescriptor.setStatistics(op.getConf().getStatistics());
    mapJoinDescriptor.setTagOrder(tagOrder);
    mapJoinDescriptor.setNullSafes(desc.getNullSafes());
    mapJoinDescriptor.setFilterMap(desc.getFilterMap());

    // reduce sink row resolver used to generate map join op
    RowResolver outputRS = opParseCtxMap.get(op).getRowResolver();

    MapJoinOperator mapJoinOp = (MapJoinOperator) OperatorFactory.getAndMakeChild(
View Full Code Here

      assert(!stack.isEmpty());

      // LineageCtx
      LineageCtx lCtx = (LineageCtx) procCtx;
      JoinOperator op = (JoinOperator)nd;
      JoinDesc jd = op.getConf();

      // The input operator to the join is always a reduce sink operator
      ReduceSinkOperator inpOp = (ReduceSinkOperator)getParent(stack);
      ReduceSinkDesc rd = inpOp.getConf();
      int tag = rd.getTag();

      // Iterate over the outputs of the join operator and merge the
      // dependencies of the columns that corresponding to the tag.
      int cnt = 0;
      List<ExprNodeDesc> exprs = jd.getExprs().get((byte)tag);
      for(ColumnInfo ci : op.getSchema().getSignature()) {
        if (jd.getReversedExprs().get(ci.getInternalName()) != tag) {
          continue;
        }

        // Otherwise look up the expression corresponding to this ci
        ExprNodeDesc expr = exprs.get(cnt++);
View Full Code Here

      throw new SemanticException(e.getMessage());
    }

    BigTableSelectorForAutoSMJ bigTableMatcher =
      (BigTableSelectorForAutoSMJ) ReflectionUtils.newInstance(bigTableMatcherClass, null);
    JoinDesc joinDesc = joinOp.getConf();
    JoinCondDesc[] joinCondns = joinDesc.getConds();
    Set<Integer> joinCandidates = MapJoinProcessor.getBigTableCandidates(joinCondns);
    if (joinCandidates.isEmpty()) {
      // This is a full outer join. This can never be a map-join
      // of any type. So return false.
      return false;
View Full Code Here

TOP

Related Classes of org.apache.hadoop.hive.ql.plan.joinDesc

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.