Examples of fileSinkDesc

Examples of org.apache.hadoop.hive.ql.plan.FileSinkDesc

    rWork.setKeyDesc(op1.getConf().getKeySerializeInfo());
    rWork.getTagToValueDesc().add(op1.getConf().getValueSerializeInfo());
    mr.setReduceWork(rWork);


    // reduce side work
    Operator<FileSinkDesc> op3 = OperatorFactory.get(new FileSinkDesc(tmpdir
        + "mapredplan1.out", Utilities.defaultTd, false));


    Operator<ExtractDesc> op2 = OperatorFactory.get(new ExtractDesc(
        getStringColumn(Utilities.ReduceField.VALUE.toString())), op3);

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.FileSinkDesc

    rWork.setKeyDesc(op1.getConf().getKeySerializeInfo());
    rWork.getTagToValueDesc().add(op1.getConf().getValueSerializeInfo());
    mr.setReduceWork(rWork);


    // reduce side work
    Operator<FileSinkDesc> op4 = OperatorFactory.get(new FileSinkDesc(tmpdir
        + "mapredplan2.out", Utilities.defaultTd, false));


    Operator<FilterDesc> op3 = OperatorFactory.get(getTestFilterDesc("0"), op4);


    Operator<ExtractDesc> op2 = OperatorFactory.get(new ExtractDesc(

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.FileSinkDesc


    mr.setReduceWork(rWork);
    rWork.getTagToValueDesc().add(op2.getConf().getValueSerializeInfo());


    // reduce side work
    Operator<FileSinkDesc> op4 = OperatorFactory.get(new FileSinkDesc(tmpdir
        + "mapredplan3.out", Utilities.defaultTd, false));


    Operator<SelectDesc> op5 = OperatorFactory.get(new SelectDesc(Utilities
        .makeList(new ExprNodeFieldDesc(TypeInfoFactory.stringTypeInfo,
        new ExprNodeColumnDesc(TypeInfoFactory.getListTypeInfo(TypeInfoFactory.stringTypeInfo),

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.FileSinkDesc

    rWork.getTagToValueDesc().add(op1.getConf().getValueSerializeInfo());
    rWork.setNumReduceTasks(Integer.valueOf(1));
    mr.setReduceWork(rWork);


    // reduce side work
    Operator<FileSinkDesc> op3 = OperatorFactory.get(new FileSinkDesc(tmpdir
        + "mapredplan4.out", Utilities.defaultTd, false));


    Operator<ExtractDesc> op2 = OperatorFactory.get(new ExtractDesc(
        getStringColumn(Utilities.ReduceField.VALUE.toString())), op3);

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.FileSinkDesc

    rWork.setNumReduceTasks(Integer.valueOf(1));
    rWork.setKeyDesc(op0.getConf().getKeySerializeInfo());
    rWork.getTagToValueDesc().add(op0.getConf().getValueSerializeInfo());


    // reduce side work
    Operator<FileSinkDesc> op3 = OperatorFactory.get(new FileSinkDesc(tmpdir
        + "mapredplan5.out", Utilities.defaultTd, false));


    Operator<ExtractDesc> op2 = OperatorFactory.get(new ExtractDesc(
        getStringColumn(Utilities.ReduceField.VALUE.toString())), op3);

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.FileSinkDesc

    rWork.setNumReduceTasks(Integer.valueOf(1));
    rWork.setKeyDesc(op1.getConf().getKeySerializeInfo());
    rWork.getTagToValueDesc().add(op1.getConf().getValueSerializeInfo());


    // reduce side work
    Operator<FileSinkDesc> op3 = OperatorFactory.get(new FileSinkDesc(tmpdir
        + "mapredplan6.out", Utilities.defaultTd, false));


    Operator<FilterDesc> op2 = OperatorFactory.get(getTestFilterDesc("0"), op3);


    Operator<ExtractDesc> op5 = OperatorFactory.get(new ExtractDesc(

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.FileSinkDesc

      throw new SemanticException(e.getMessage());
    }


    RowSchema fsRS = new RowSchema(vecCol);


    FileSinkDesc fileSinkDesc = new FileSinkDesc(
      queryTmpdir,
      table_desc,
      conf.getBoolVar(HiveConf.ConfVars.COMPRESSRESULT),
      currentTableId,
      rsCtx.isMultiFileSpray(),
      rsCtx.getNumFiles(),
      rsCtx.getTotalFiles(),
      rsCtx.getPartnCols(),
      dpCtx);


    // set the stats publishing/aggregating key prefix
    // the same as directory name. The directory name
    // can be changed in the optimizer  but the key should not be changed
    // it should be the same as the MoveWork's sourceDir.
    fileSinkDesc.setStatsAggPrefix(fileSinkDesc.getDirName());


    if (dest_part != null) {
      try {
        String staticSpec = Warehouse.makePartPath(dest_part.getSpec());
        fileSinkDesc.setStaticSpec(staticSpec);
      } catch (MetaException e) {
        throw new SemanticException(e);
      }
    } else if (dpCtx != null) {
      fileSinkDesc.setStaticSpec(dpCtx.getSPPath());
    }


    Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(fileSinkDesc,
            fsRS, input), inputRR);

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.FileSinkDesc


          while (!opList.isEmpty()) {
            Operator<? extends Serializable> op = opList.remove(0);


            if (op instanceof FileSinkOperator) {
              FileSinkDesc fdesc = ((FileSinkOperator) op).getConf();
              String s = fdesc.getDirName();
              if ((s != null) && ctx.isMRTmpFileURI(s)) {
                fdesc.setDirName(ctx.localizeMRTmpFileURI(s));
              }
              ((FileSinkOperator) op).setConf(fdesc);
            }


            if (op.getChildOperators() != null) {

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.FileSinkDesc

    ReduceSinkDesc rsDesc = PlanUtils.getReduceSinkDesc(
        new ArrayList<ExprNodeDesc>(), valueCols, outputColumns, false, -1, -1,
        -1);
    OperatorFactory.getAndMakeChild(rsDesc, inputRS, tsMerge);
    ParseContext parseCtx = ctx.getParseCtx();
    FileSinkDesc fsConf = fsOp.getConf();


    // Add the extract operator to get the value fields
    RowResolver out_rwsch = new RowResolver();
    RowResolver interim_rwsch = ctx.getParseCtx().getOpParseCtx().get(fsOp).getRowResolver();
    Integer pos = Integer.valueOf(0);
    for (ColumnInfo colInfo : interim_rwsch.getColumnInfos()) {
      String[] info = interim_rwsch.reverseLookup(colInfo.getInternalName());
      out_rwsch.put(info[0], info[1], new ColumnInfo(pos.toString(), colInfo
          .getType(), info[0], colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol()));
      pos = Integer.valueOf(pos.intValue() + 1);
    }


    Operator<ExtractDesc> extract = OperatorFactory.getAndMakeChild(new ExtractDesc(
        new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo,
            Utilities.ReduceField.VALUE.toString(), "", false)),
            new RowSchema(out_rwsch.getColumnInfos()));


    TableDesc ts = (TableDesc) fsConf.getTableInfo().clone();
    fsConf.getTableInfo().getProperties().remove(
        org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_PARTITION_COLUMNS);


    FileSinkDesc newFSD = new FileSinkDesc(finalName, ts, parseCtx.getConf()
        .getBoolVar(HiveConf.ConfVars.COMPRESSRESULT));
    FileSinkOperator newOutput = (FileSinkOperator) OperatorFactory.
      getAndMakeChild(newFSD, inputRS, extract);


    HiveConf conf = parseCtx.getConf();

View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.FileSinkDesc


    //
    // 1. create the operator tree
    //
    ParseContext parseCtx = ctx.getParseCtx();
    FileSinkDesc fsInputDesc = fsInput.getConf();


    // Create a TableScan operator
    RowSchema inputRS = fsInput.getSchema();
    Operator<? extends Serializable> tsMerge = OperatorFactory.get(TableScanDesc.class, inputRS);


    // Create a FileSink operator
    TableDesc ts = (TableDesc) fsInputDesc.getTableInfo().clone();
    FileSinkDesc fsOutputDesc =  new FileSinkDesc(finalName, ts,
        parseCtx.getConf().getBoolVar(HiveConf.ConfVars.COMPRESSRESULT));
    FileSinkOperator fsOutput = (FileSinkOperator) OperatorFactory.getAndMakeChild(
        fsOutputDesc,  inputRS, tsMerge);


    // If the input FileSinkOperator is a dynamic partition enabled, the tsMerge input schema
    // needs to include the partition column, and the fsOutput should have
    // a DynamicPartitionCtx to indicate that it needs to dynamically partitioned.
    DynamicPartitionCtx dpCtx = fsInputDesc.getDynPartCtx();
    if (dpCtx != null && dpCtx.getNumDPCols() > 0) {
      // adding DP ColumnInfo to the RowSchema signature
      ArrayList<ColumnInfo> signature = inputRS.getSignature();
      String tblAlias = fsInputDesc.getTableInfo().getTableName();
      LinkedHashMap<String, String> colMap = new LinkedHashMap<String, String>();
      StringBuilder partCols = new StringBuilder();
      for (String dpCol: dpCtx.getDPColNames()) {
        ColumnInfo colInfo = new ColumnInfo(dpCol,
            TypeInfoFactory.stringTypeInfo, // all partition column type should be string
            tblAlias, true); // partition column is virtual column
        signature.add(colInfo);
        colMap.put(dpCol, dpCol); // input and output have the same column name
        partCols.append(dpCol).append('/');
      }
      partCols.setLength(partCols.length()-1); // remove the last '/'
      inputRS.setSignature(signature);


      // create another DynamicPartitionCtx, which has a different input-to-DP column mapping
      DynamicPartitionCtx dpCtx2 = new DynamicPartitionCtx(dpCtx);
      dpCtx2.setInputToDPCols(colMap);
      fsOutputDesc.setDynPartCtx(dpCtx2);


      // update the FileSinkOperator to include partition columns
      fsInputDesc.getTableInfo().getProperties().setProperty(
        org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_PARTITION_COLUMNS,
        partCols.toString()); // list of dynamic partition column names

View Full Code Here

0 1 2 3 4 5

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.