Examples of RowResolver


Examples of org.apache.hadoop.hive.ql.parse.RowResolver

    parentOpList.add(parent);
    fs_op.setParentOperators(parentOpList);

    // create a dummy tableScan operator on top of op
    // TableScanOperator is implicitly created here for each MapOperator
    RowResolver rowResolver = opProcCtx.getParseCtx().getOpParseCtx().get(parent).getRowResolver();
    Operator<? extends Serializable> ts_op = putOpInsertMap(OperatorFactory
        .get(TableScanDesc.class, parent.getSchema()), rowResolver, parseCtx);

    childOpList = new ArrayList<Operator<? extends Serializable>>();
    childOpList.add(op);
View Full Code Here

Examples of org.apache.hadoop.hive.ql.parse.RowResolver

    OperatorFactory.getAndMakeChild(rsDesc, inputRS, tsMerge);
    ParseContext parseCtx = ctx.getParseCtx();
    FileSinkDesc fsConf = fsOp.getConf();

    // Add the extract operator to get the value fields
    RowResolver out_rwsch = new RowResolver();
    RowResolver interim_rwsch = ctx.getParseCtx().getOpParseCtx().get(fsOp).getRowResolver();
    Integer pos = Integer.valueOf(0);
    for (ColumnInfo colInfo : interim_rwsch.getColumnInfos()) {
      String[] info = interim_rwsch.reverseLookup(colInfo.getInternalName());
      out_rwsch.put(info[0], info[1], new ColumnInfo(pos.toString(), colInfo
          .getType(), info[0], colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol()));
      pos = Integer.valueOf(pos.intValue() + 1);
    }
View Full Code Here

Examples of org.apache.hadoop.hive.ql.parse.RowResolver

   */
  @SuppressWarnings("nls")
  private Operator genSelectPlan(Operator input, List<String> colNames)
    throws SemanticException {

    RowResolver inputRR  = pGraphContext.getOpParseCtx().get(input).getRR();
    RowResolver outputRR = new RowResolver();
    ArrayList<exprNodeDesc> col_list = new ArrayList<exprNodeDesc>();
   
    // Iterate over the selects
    for (int pos = 0; pos < colNames.size(); pos++) {
      String   internalName = colNames.get(pos);
      String[] colName      = inputRR.reverseLookup(internalName);
      ColumnInfo in = inputRR.get(colName[0], colName[1]);
      outputRR.put(colName[0], colName[1],
                   new ColumnInfo((Integer.valueOf(pos)).toString(), in.getType()));
      col_list.add(new exprNodeColumnDesc(in.getType(), internalName));
    }

    Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(
      new selectDesc(col_list), new RowSchema(outputRR.getColumnInfos()), input), outputRR);

    return output;
  }
View Full Code Here

Examples of org.apache.hadoop.hive.ql.parse.RowResolver

    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException {
      ReduceSinkOperator op = (ReduceSinkOperator)nd;
      ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx)ctx;
      HashMap<Operator<? extends Serializable>, OpParseContext> opToParseCtxMap =
          cppCtx.getOpToParseCtxMap();
      RowResolver redSinkRR = opToParseCtxMap.get(op).getRR();
      reduceSinkDesc conf = op.getConf();
      List<Operator<? extends Serializable>> childOperators = op.getChildOperators();
      List<Operator<? extends Serializable>> parentOperators = op.getParentOperators();
      List<String> childColLists = new ArrayList<String>();

      for(Operator<? extends Serializable> child: childOperators)
        childColLists = Utilities.mergeUniqElems(childColLists, cppCtx.getPrunedColLists().get(child));

      List<String> colLists = new ArrayList<String>();
      ArrayList<exprNodeDesc> keys = conf.getKeyCols();
      for (exprNodeDesc key : keys)
        colLists = Utilities.mergeUniqElems(colLists, key.getCols());

      if ((childOperators.size() == 1) && (childOperators.get(0) instanceof JoinOperator)) {
        assert parentOperators.size() == 1;
        Operator<? extends Serializable> par = parentOperators.get(0);
        RowResolver parRR = opToParseCtxMap.get(par).getRR();
        RowResolver childRR = opToParseCtxMap.get(childOperators.get(0)).getRR();

        for (String childCol : childColLists) {
          String [] nm = childRR.reverseLookup(childCol);
          ColumnInfo cInfo = redSinkRR.get(nm[0],nm[1]);
          if (cInfo != null) {
            cInfo = parRR.get(nm[0], nm[1]);
            if (!colLists.contains(cInfo.getInternalName()))
              colLists.add(cInfo.getInternalName());
View Full Code Here

Examples of org.apache.hadoop.hive.ql.parse.RowResolver

      if (checkMapJoin(mapJoinPos, condns) < 0) {
        throw new SemanticException(ErrorMsg.NO_OUTER_MAPJOIN.getMsg());
      }
    }

    RowResolver outputRS = opParseCtxMap.get(op).getRowResolver();
    Map<Byte, List<ExprNodeDesc>> keyExprMap = new HashMap<Byte, List<ExprNodeDesc>>();

    // Walk over all the sources (which are guaranteed to be reduce sink
    // operators).
    // The join outputs a concatenation of all the inputs.
    QBJoinTree leftSrc = joinTree.getJoinSrc();

    List<Operator<? extends OperatorDesc>> parentOps = op.getParentOperators();
    List<Operator<? extends OperatorDesc>> newParentOps =
      new ArrayList<Operator<? extends OperatorDesc>>();
    List<Operator<? extends OperatorDesc>> oldReduceSinkParentOps =
       new ArrayList<Operator<? extends OperatorDesc>>();

    // found a source which is not to be stored in memory
    if (leftSrc != null) {
      // assert mapJoinPos == 0;
      Operator<? extends OperatorDesc> parentOp = parentOps.get(0);
      assert parentOp.getParentOperators().size() == 1;
      Operator<? extends OperatorDesc> grandParentOp =
        parentOp.getParentOperators().get(0);
      oldReduceSinkParentOps.add(parentOp);
      newParentOps.add(grandParentOp);
    }

    byte pos = 0;
    // Remove parent reduce-sink operators
    for (String src : joinTree.getBaseSrc()) {
      if (src != null) {
        Operator<? extends OperatorDesc> parentOp = parentOps.get(pos);
        assert parentOp.getParentOperators().size() == 1;
        Operator<? extends OperatorDesc> grandParentOp =
          parentOp.getParentOperators().get(0);

        oldReduceSinkParentOps.add(parentOp);
        newParentOps.add(grandParentOp);
      }
      pos++;
    }

    // get the join keys from old parent ReduceSink operators
    for (pos = 0; pos < newParentOps.size(); pos++) {
      ReduceSinkOperator oldPar = (ReduceSinkOperator) oldReduceSinkParentOps.get(pos);
      ReduceSinkDesc rsconf = oldPar.getConf();
      List<ExprNodeDesc> keys = rsconf.getKeyCols();
      keyExprMap.put(pos, keys);
    }

    // removing RS, only ExprNodeDesc is changed (key/value/filter exprs and colExprMap)
    // others (output column-name, RR, schema) remain intact
    Map<String, ExprNodeDesc> colExprMap = op.getColumnExprMap();
    List<String> outputColumnNames = op.getConf().getOutputColumnNames();

    List<ColumnInfo> schema = new ArrayList<ColumnInfo>(op.getSchema().getSignature());

    Map<Byte, List<ExprNodeDesc>> valueExprs = op.getConf().getExprs();
    Map<Byte, List<ExprNodeDesc>> newValueExprs = new HashMap<Byte, List<ExprNodeDesc>>();
    for (Map.Entry<Byte, List<ExprNodeDesc>> entry : valueExprs.entrySet()) {
      byte tag = entry.getKey();
      Operator<?> terminal = oldReduceSinkParentOps.get(tag);

      List<ExprNodeDesc> values = entry.getValue();
      List<ExprNodeDesc> newValues = ExprNodeDescUtils.backtrack(values, op, terminal);
      newValueExprs.put(tag, newValues);
      for (int i = 0; i < schema.size(); i++) {
        ColumnInfo column = schema.get(i);
        if (column == null) {
          continue;
        }
        ExprNodeDesc expr = colExprMap.get(column.getInternalName());
        int index = ExprNodeDescUtils.indexOf(expr, values);
        if (index >= 0) {
          colExprMap.put(column.getInternalName(), newValues.get(index));
          schema.set(i, null);
        }
      }
    }

    Map<Byte, List<ExprNodeDesc>> filters = desc.getFilters();
    Map<Byte, List<ExprNodeDesc>> newFilters = new HashMap<Byte, List<ExprNodeDesc>>();
    for (Map.Entry<Byte, List<ExprNodeDesc>> entry : filters.entrySet()) {
      byte srcTag = entry.getKey();
      List<ExprNodeDesc> filter = entry.getValue();

      Operator<?> terminal = oldReduceSinkParentOps.get(srcTag);
      newFilters.put(srcTag, ExprNodeDescUtils.backtrack(filter, op, terminal));
    }
    desc.setFilters(filters = newFilters);

    // remove old parents
    for (pos = 0; pos < newParentOps.size(); pos++) {
      newParentOps.get(pos).removeChild(oldReduceSinkParentOps.get(pos));
    }

    JoinCondDesc[] joinCondns = op.getConf().getConds();

    Operator[] newPar = new Operator[newParentOps.size()];
    pos = 0;
    for (Operator<? extends OperatorDesc> o : newParentOps) {
      newPar[pos++] = o;
    }

    List<ExprNodeDesc> keyCols = keyExprMap.get(Byte.valueOf((byte) 0));
    StringBuilder keyOrder = new StringBuilder();
    for (int i = 0; i < keyCols.size(); i++) {
      keyOrder.append("+");
    }

    TableDesc keyTableDesc = PlanUtils.getMapJoinKeyTableDesc(PlanUtils
        .getFieldSchemasFromColumnList(keyCols, MAPJOINKEY_FIELDPREFIX));

    List<TableDesc> valueTableDescs = new ArrayList<TableDesc>();
    List<TableDesc> valueFiltedTableDescs = new ArrayList<TableDesc>();

    int[][] filterMap = desc.getFilterMap();
    for (pos = 0; pos < newParentOps.size(); pos++) {
      List<ExprNodeDesc> valueCols = newValueExprs.get(pos);
      int length = valueCols.size();
      List<ExprNodeDesc> valueFilteredCols = new ArrayList<ExprNodeDesc>(length);
      // deep copy expr node desc
      for (int i = 0; i < length; i++) {
        valueFilteredCols.add(valueCols.get(i).clone());
      }
      if (filterMap != null && filterMap[pos] != null && pos != mapJoinPos) {
        ExprNodeColumnDesc isFilterDesc = new ExprNodeColumnDesc(TypeInfoFactory
            .getPrimitiveTypeInfo(serdeConstants.SMALLINT_TYPE_NAME), "filter", "filter", false);
        valueFilteredCols.add(isFilterDesc);
      }


      keyOrder = new StringBuilder();
      for (int i = 0; i < valueCols.size(); i++) {
        keyOrder.append("+");
      }

      TableDesc valueTableDesc = PlanUtils.getMapJoinValueTableDesc(PlanUtils
          .getFieldSchemasFromColumnList(valueCols, "mapjoinvalue"));
      TableDesc valueFilteredTableDesc = PlanUtils.getMapJoinValueTableDesc(PlanUtils
          .getFieldSchemasFromColumnList(valueFilteredCols, "mapjoinvalue"));

      valueTableDescs.add(valueTableDesc);
      valueFiltedTableDescs.add(valueFilteredTableDesc);
    }
    String dumpFilePrefix = "";
    if( joinTree.getMapAliases() != null ) {
      for(String mapAlias : joinTree.getMapAliases()) {
        dumpFilePrefix = dumpFilePrefix + mapAlias;
      }
      dumpFilePrefix = dumpFilePrefix+"-"+PlanUtils.getCountForMapJoinDumpFilePrefix();
    } else {
      dumpFilePrefix = "mapfile"+PlanUtils.getCountForMapJoinDumpFilePrefix();
    }
    MapJoinDesc mapJoinDescriptor = new MapJoinDesc(keyExprMap, keyTableDesc, newValueExprs,
        valueTableDescs, valueFiltedTableDescs, outputColumnNames, mapJoinPos, joinCondns,
        filters, op.getConf().getNoOuterJoin(), dumpFilePrefix);
    mapJoinDescriptor.setTagOrder(tagOrder);
    mapJoinDescriptor.setNullSafes(desc.getNullSafes());
    mapJoinDescriptor.setFilterMap(desc.getFilterMap());

    MapJoinOperator mapJoinOp = (MapJoinOperator) OperatorFactory.getAndMakeChild(
        mapJoinDescriptor, new RowSchema(outputRS.getColumnInfos()), newPar);

    OpParseContext ctx = new OpParseContext(outputRS);
    opParseCtxMap.put(mapJoinOp, ctx);

    mapJoinOp.getConf().setReversedExprs(op.getConf().getReversedExprs());
View Full Code Here

Examples of org.apache.hadoop.hive.ql.parse.RowResolver

        smbJoinDesc.getValueTblDescs(), smbJoinDesc.getValueTblDescs(),
        smbJoinDesc.getOutputColumnNames(),
        bigTablePos, smbJoinDesc.getConds(),
        smbJoinDesc.getFilters(), smbJoinDesc.isNoOuterJoin(), smbJoinDesc.getDumpFilePrefix());

    RowResolver joinRS = opParseCtxMap.get(smbJoinOp).getRowResolver();
    // The mapjoin has the same schema as the join operator
    MapJoinOperator mapJoinOp = (MapJoinOperator) OperatorFactory.getAndMakeChild(
        mapJoinDesc, joinRS.getRowSchema(),
        new ArrayList<Operator<? extends OperatorDesc>>());

    OpParseContext ctx = new OpParseContext(joinRS);
    opParseCtxMap.put(mapJoinOp, ctx);
View Full Code Here

Examples of org.apache.hadoop.hive.ql.parse.RowResolver

    List<Operator<? extends OperatorDesc>> childOps = input.getChildOperators();
    input.setChildOperators(null);

    // create a dummy select - This select is needed by the walker to split the
    // mapJoin later on
    RowResolver inputRR = pctx.getOpParseCtx().get(input).getRowResolver();

    ArrayList<ExprNodeDesc> exprs = new ArrayList<ExprNodeDesc>();
    ArrayList<String> outputs = new ArrayList<String>();
    List<String> outputCols = input.getConf().getOutputColumnNames();
    RowResolver outputRS = new RowResolver();

    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();

    for (int i = 0; i < outputCols.size(); i++) {
      String internalName = outputCols.get(i);
      String[] nm = inputRR.reverseLookup(internalName);
      ColumnInfo valueInfo = inputRR.get(nm[0], nm[1]);
      ExprNodeDesc colDesc = new ExprNodeColumnDesc(valueInfo.getType(), valueInfo
          .getInternalName(), nm[0], valueInfo.getIsVirtualCol());
      exprs.add(colDesc);
      outputs.add(internalName);
      outputRS.put(nm[0], nm[1], new ColumnInfo(internalName, valueInfo.getType(), nm[0], valueInfo
          .getIsVirtualCol(), valueInfo.isHiddenVirtualCol()));
      colExprMap.put(internalName, colDesc);
    }

    SelectDesc select = new SelectDesc(exprs, outputs, false);
View Full Code Here

Examples of org.apache.hadoop.hive.ql.parse.RowResolver

      List<String> prunedCols = cppCtx.getPrunedColList(op.getChildOperators().get(0));
      //we create a copy of prunedCols to create a list of pruned columns for PTFOperator
      prunedCols = new ArrayList<String>(prunedCols);
      prunedColumnsList(prunedCols, def);
      RowResolver oldRR = cppCtx.getOpToParseCtxMap().get(op).getRowResolver();
      RowResolver newRR = buildPrunedRR(prunedCols, oldRR, sig);
      cppCtx.getPrunedColLists().put(op, prunedInputList(prunedCols, def));
      cppCtx.getOpToParseCtxMap().get(op).setRowResolver(newRR);
      op.getSchema().setSignature(sig);
      return null;
    }
View Full Code Here

Examples of org.apache.hadoop.hive.ql.parse.RowResolver

      return null;
    }

    private static RowResolver buildPrunedRR(List<String> prunedCols,
        RowResolver oldRR, ArrayList<ColumnInfo> sig) throws SemanticException{
      RowResolver newRR = new RowResolver();
      HashSet<String> prunedColsSet = new HashSet<String>(prunedCols);
      for(ColumnInfo cInfo : oldRR.getRowSchema().getSignature()) {
        if ( prunedColsSet.contains(cInfo.getInternalName())) {
          String[] nm = oldRR.reverseLookup(cInfo.getInternalName());
          newRR.put(nm[0], nm[1], cInfo);
          sig.add(cInfo);
        }
      }
      return newRR;
    }
View Full Code Here

Examples of org.apache.hadoop.hive.ql.parse.RowResolver

          .genColLists((Operator<? extends OperatorDesc>) nd);
      cppCtx.getPrunedColLists().put((Operator<? extends OperatorDesc>) nd,
          cols);
      ArrayList<Integer> needed_columns = new ArrayList<Integer>();
      List<String> neededColumnNames = new ArrayList<String>();
      RowResolver inputRR = cppCtx.getOpToParseCtxMap().get(scanOp).getRowResolver();
      TableScanDesc desc = scanOp.getConf();
      List<VirtualColumn> virtualCols = desc.getVirtualCols();
      List<VirtualColumn> newVirtualCols = new ArrayList<VirtualColumn>();

      // add virtual columns for ANALYZE TABLE
      if(scanOp.getConf().isGatherStats()) {
        cols.add(VirtualColumn.RAWDATASIZE.getName());
      }

      for (int i = 0; i < cols.size(); i++) {
        String[] tabCol = inputRR.reverseLookup(cols.get(i));
        if(tabCol == null) {
          continue;
        }
        ColumnInfo colInfo = inputRR.get(tabCol[0], tabCol[1]);
        if (colInfo.getIsVirtualCol()) {
          // part is also a virtual column, but part col should not in this
          // list.
          for (int j = 0; j < virtualCols.size(); j++) {
            VirtualColumn vc = virtualCols.get(j);
            if (vc.getName().equals(colInfo.getInternalName())) {
              newVirtualCols.add(vc);
            }
          }
          //no need to pass virtual columns to reader.
          continue;
        }
        int position = inputRR.getPosition(cols.get(i));
        if (position >=0) {
          // get the needed columns by id and name
          needed_columns.add(position);
          neededColumnNames.add(cols.get(i));
        }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.