Examples of FileSinkDesc


Examples of org.apache.hadoop.hive.ql.plan.FileSinkDesc

        .getFieldSchemasFromRowSchema(parent.getSchema(), "temporarycol"));

    // Create a file sink operator for this file name
    boolean compressIntermediate = parseCtx.getConf().getBoolVar(
        HiveConf.ConfVars.COMPRESSINTERMEDIATE);
    FileSinkDesc desc = new FileSinkDesc(taskTmpDir, tt_desc,
        compressIntermediate);
    if (compressIntermediate) {
      desc.setCompressCodec(parseCtx.getConf().getVar(
          HiveConf.ConfVars.COMPRESSINTERMEDIATECODEC));
      desc.setCompressType(parseCtx.getConf().getVar(
          HiveConf.ConfVars.COMPRESSINTERMEDIATETYPE));
    }
    Operator<? extends Serializable> fs_op = putOpInsertMap(OperatorFactory
        .get(desc, parent.getSchema()), null, parseCtx);
View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.FileSinkDesc

    ReduceSinkDesc rsDesc = PlanUtils.getReduceSinkDesc(
        new ArrayList<ExprNodeDesc>(), valueCols, outputColumns, false, -1, -1,
        -1);
    OperatorFactory.getAndMakeChild(rsDesc, inputRS, tsMerge);
    ParseContext parseCtx = ctx.getParseCtx();
    FileSinkDesc fsConf = fsOp.getConf();

    // Add the extract operator to get the value fields
    RowResolver out_rwsch = new RowResolver();
    RowResolver interim_rwsch = ctx.getParseCtx().getOpParseCtx().get(fsOp).getRowResolver();
    Integer pos = Integer.valueOf(0);
    for (ColumnInfo colInfo : interim_rwsch.getColumnInfos()) {
      String[] info = interim_rwsch.reverseLookup(colInfo.getInternalName());
      out_rwsch.put(info[0], info[1], new ColumnInfo(pos.toString(), colInfo
          .getType(), info[0], colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol()));
      pos = Integer.valueOf(pos.intValue() + 1);
    }

    Operator<ExtractDesc> extract = OperatorFactory.getAndMakeChild(new ExtractDesc(
        new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo,
            Utilities.ReduceField.VALUE.toString(), "", false)),
            new RowSchema(out_rwsch.getColumnInfos()));

    TableDesc ts = (TableDesc) fsConf.getTableInfo().clone();
    fsConf.getTableInfo().getProperties().remove(
        org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_PARTITION_COLUMNS);

    FileSinkDesc newFSD = new FileSinkDesc(finalName, ts, parseCtx.getConf()
        .getBoolVar(HiveConf.ConfVars.COMPRESSRESULT));
    FileSinkOperator newOutput = (FileSinkOperator) OperatorFactory.
      getAndMakeChild(newFSD, inputRS, extract);

    HiveConf conf = parseCtx.getConf();
View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.FileSinkDesc

    //
    // 1. create the operator tree
    //
    ParseContext parseCtx = ctx.getParseCtx();
    FileSinkDesc fsInputDesc = fsInput.getConf();

    // Create a TableScan operator
    RowSchema inputRS = fsInput.getSchema();
    Operator<? extends Serializable> tsMerge = OperatorFactory.get(TableScanDesc.class, inputRS);

    // Create a FileSink operator
    TableDesc ts = (TableDesc) fsInputDesc.getTableInfo().clone();
    FileSinkDesc fsOutputDesc =  new FileSinkDesc(finalName, ts,
        parseCtx.getConf().getBoolVar(HiveConf.ConfVars.COMPRESSRESULT));
    FileSinkOperator fsOutput = (FileSinkOperator) OperatorFactory.getAndMakeChild(
        fsOutputDesc,  inputRS, tsMerge);

    // If the input FileSinkOperator is a dynamic partition enabled, the tsMerge input schema
    // needs to include the partition column, and the fsOutput should have
    // a DynamicPartitionCtx to indicate that it needs to dynamically partitioned.
    DynamicPartitionCtx dpCtx = fsInputDesc.getDynPartCtx();
    if (dpCtx != null && dpCtx.getNumDPCols() > 0) {
      // adding DP ColumnInfo to the RowSchema signature
      ArrayList<ColumnInfo> signature = inputRS.getSignature();
      String tblAlias = fsInputDesc.getTableInfo().getTableName();
      LinkedHashMap<String, String> colMap = new LinkedHashMap<String, String>();
      StringBuilder partCols = new StringBuilder();
      for (String dpCol: dpCtx.getDPColNames()) {
        ColumnInfo colInfo = new ColumnInfo(dpCol,
            TypeInfoFactory.stringTypeInfo, // all partition column type should be string
            tblAlias, true); // partition column is virtual column
        signature.add(colInfo);
        colMap.put(dpCol, dpCol); // input and output have the same column name
        partCols.append(dpCol).append('/');
      }
      partCols.setLength(partCols.length()-1); // remove the last '/'
      inputRS.setSignature(signature);

      // create another DynamicPartitionCtx, which has a different input-to-DP column mapping
      DynamicPartitionCtx dpCtx2 = new DynamicPartitionCtx(dpCtx);
      dpCtx2.setInputToDPCols(colMap);
      fsOutputDesc.setDynPartCtx(dpCtx2);

      // update the FileSinkOperator to include partition columns
      fsInputDesc.getTableInfo().getProperties().setProperty(
        org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_PARTITION_COLUMNS,
        partCols.toString()); // list of dynamic partition column names
View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.FileSinkDesc

  @SuppressWarnings("unchecked")
  private void populateMapPlan1(Table src) {
    mr.setNumReduceTasks(Integer.valueOf(0));

    Operator<FileSinkDesc> op2 = OperatorFactory.get(new FileSinkDesc(tmpdir
        + "mapplan1.out", Utilities.defaultTd, true));
    Operator<FilterDesc> op1 = OperatorFactory.get(getTestFilterDesc("key"),
        op2);

    Utilities.addMapWork(mr, src, "a", op1);
View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.FileSinkDesc

  @SuppressWarnings("unchecked")
  private void populateMapPlan2(Table src) {
    mr.setNumReduceTasks(Integer.valueOf(0));

    Operator<FileSinkDesc> op3 = OperatorFactory.get(new FileSinkDesc(tmpdir
        + "mapplan2.out", Utilities.defaultTd, false));

    Operator<ScriptDesc> op2 = OperatorFactory.get(new ScriptDesc("/bin/cat",
        PlanUtils.getDefaultTableDesc("" + Utilities.tabCode, "key,value"),
        TextRecordWriter.class, PlanUtils.getDefaultTableDesc(""
View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.FileSinkDesc

    Utilities.addMapWork(mr, src, "a", op1);
    mr.setKeyDesc(op1.getConf().getKeySerializeInfo());
    mr.getTagToValueDesc().add(op1.getConf().getValueSerializeInfo());

    // reduce side work
    Operator<FileSinkDesc> op3 = OperatorFactory.get(new FileSinkDesc(tmpdir
        + "mapredplan1.out", Utilities.defaultTd, false));

    Operator<ExtractDesc> op2 = OperatorFactory.get(new ExtractDesc(
        getStringColumn(Utilities.ReduceField.VALUE.toString())), op3);
View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.FileSinkDesc

    Utilities.addMapWork(mr, src, "a", op1);
    mr.setKeyDesc(op1.getConf().getKeySerializeInfo());
    mr.getTagToValueDesc().add(op1.getConf().getValueSerializeInfo());

    // reduce side work
    Operator<FileSinkDesc> op4 = OperatorFactory.get(new FileSinkDesc(tmpdir
        + "mapredplan2.out", Utilities.defaultTd, false));

    Operator<FilterDesc> op3 = OperatorFactory.get(getTestFilterDesc("0"), op4);

    Operator<ExtractDesc> op2 = OperatorFactory.get(new ExtractDesc(
View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.FileSinkDesc

    Utilities.addMapWork(mr, src2, "b", op2);
    mr.getTagToValueDesc().add(op2.getConf().getValueSerializeInfo());

    // reduce side work
    Operator<FileSinkDesc> op4 = OperatorFactory.get(new FileSinkDesc(tmpdir
        + "mapredplan3.out", Utilities.defaultTd, false));

    Operator<SelectDesc> op5 = OperatorFactory.get(new SelectDesc(Utilities
        .makeList(getStringColumn(Utilities.ReduceField.ALIAS.toString()),
        new ExprNodeFieldDesc(TypeInfoFactory.stringTypeInfo,
View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.FileSinkDesc

    Utilities.addMapWork(mr, src, "a", op4);
    mr.setKeyDesc(op1.getConf().getKeySerializeInfo());
    mr.getTagToValueDesc().add(op1.getConf().getValueSerializeInfo());

    // reduce side work
    Operator<FileSinkDesc> op3 = OperatorFactory.get(new FileSinkDesc(tmpdir
        + "mapredplan4.out", Utilities.defaultTd, false));

    Operator<ExtractDesc> op2 = OperatorFactory.get(new ExtractDesc(
        getStringColumn(Utilities.ReduceField.VALUE.toString())), op3);
View Full Code Here

Examples of org.apache.hadoop.hive.ql.plan.FileSinkDesc

    Utilities.addMapWork(mr, src, "a", op4);
    mr.setKeyDesc(op0.getConf().getKeySerializeInfo());
    mr.getTagToValueDesc().add(op0.getConf().getValueSerializeInfo());

    // reduce side work
    Operator<FileSinkDesc> op3 = OperatorFactory.get(new FileSinkDesc(tmpdir
        + "mapredplan5.out", Utilities.defaultTd, false));

    Operator<ExtractDesc> op2 = OperatorFactory.get(new ExtractDesc(
        getStringColumn(Utilities.ReduceField.VALUE.toString())), op3);
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.