Package org.apache.hadoop.hive.ql.plan

Examples of org.apache.hadoop.hive.ql.plan.FileSinkDesc


    Utilities.addMapWork(mr, src, "a", op4);
    mr.setKeyDesc(op1.getConf().getKeySerializeInfo());
    mr.getTagToValueDesc().add(op1.getConf().getValueSerializeInfo());

    // reduce side work
    Operator<FileSinkDesc> op3 = OperatorFactory.get(new FileSinkDesc(tmpdir
        + "mapredplan6.out", Utilities.defaultTd, false));

    Operator<FilterDesc> op2 = OperatorFactory.get(getTestFilterDesc("0"), op3);

    Operator<ExtractDesc> op5 = OperatorFactory.get(new ExtractDesc(
View Full Code Here


      throw new SemanticException(e.getMessage());
    }

    RowSchema fsRS = new RowSchema(vecCol);

    FileSinkDesc fileSinkDesc = new FileSinkDesc(
      queryTmpdir,
      table_desc,
      conf.getBoolVar(HiveConf.ConfVars.COMPRESSRESULT),
      currentTableId,
      rsCtx.isMultiFileSpray(),
      rsCtx.getNumFiles(),
      rsCtx.getTotalFiles(),
      rsCtx.getPartnCols(),
      dpCtx);

    // set the stats publishing/aggregating key prefix
    // the same as directory name. The directory name
    // can be changed in the optimizer  but the key should not be changed
    // it should be the same as the MoveWork's sourceDir.
    fileSinkDesc.setStatsAggPrefix(fileSinkDesc.getDirName());

    if (dest_part != null) {
      try {
        String staticSpec = Warehouse.makePartPath(dest_part.getSpec());
        fileSinkDesc.setStaticSpec(staticSpec);
      } catch (MetaException e) {
        throw new SemanticException(e);
      }
    } else if (dpCtx != null) {
      fileSinkDesc.setStaticSpec(dpCtx.getSPPath());
    }

    Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(fileSinkDesc,
            fsRS, input), inputRR);
View Full Code Here

    while (!ops.isEmpty()) {
      Operator<? extends OperatorDesc> op = ops.remove(0);

      if (op instanceof FileSinkOperator) {
        FileSinkDesc fdesc = ((FileSinkOperator) op).getConf();
        String tempDir = fdesc.getDirName();

        if (tempDir != null) {
          Path tempPath = Utilities.toTempPath(new Path(tempDir));
          FileSystem fs = tempPath.getFileSystem(conf);
          fs.mkdirs(tempPath);
View Full Code Here

      // If underlying data is RCFile a RCFileBlockMerge task would be created.
      LOG.info("using CombineHiveInputformat for the merge job");
      createMRWorkForMergingFiles(fsOp, ctx, finalName);
    }

    FileSinkDesc fileSinkDesc = fsOp.getConf();
    if (fileSinkDesc.isLinkedFileSink()) {
      Map<FileSinkDesc, Task<? extends Serializable>> linkedFileDescTasks =
        ctx.getLinkedFileDescTasks();
      if (linkedFileDescTasks == null) {
        linkedFileDescTasks = new HashMap<FileSinkDesc, Task<? extends Serializable>>();
        ctx.setLinkedFileDescTasks(linkedFileDescTasks);
      }

      // The child tasks may be null in case of a select
      if ((currTask.getChildTasks() != null) &&
        (currTask.getChildTasks().size() == 1)) {
        for (FileSinkDesc fileDesc : fileSinkDesc.getLinkedFileSinkDesc()) {
          linkedFileDescTasks.put(fileDesc, currTask.getChildTasks().get(0));
        }
      }
    }
View Full Code Here

    //
    // 1. create the operator tree
    //
    HiveConf conf = ctx.getParseCtx().getConf();
    FileSinkDesc fsInputDesc = fsInput.getConf();

    // Create a TableScan operator
    RowSchema inputRS = fsInput.getSchema();
    Operator<? extends OperatorDesc> tsMerge =
        OperatorFactory.get(TableScanDesc.class, inputRS);

    // Create a FileSink operator
    TableDesc ts = (TableDesc) fsInputDesc.getTableInfo().clone();
    FileSinkDesc fsOutputDesc = new FileSinkDesc(finalName, ts,
      conf.getBoolVar(ConfVars.COMPRESSRESULT));
    FileSinkOperator fsOutput = (FileSinkOperator) OperatorFactory.getAndMakeChild(
      fsOutputDesc, inputRS, tsMerge);

    // If the input FileSinkOperator is a dynamic partition enabled, the tsMerge input schema
    // needs to include the partition column, and the fsOutput should have
    // a DynamicPartitionCtx to indicate that it needs to dynamically partitioned.
    DynamicPartitionCtx dpCtx = fsInputDesc.getDynPartCtx();
    if (dpCtx != null && dpCtx.getNumDPCols() > 0) {
      // adding DP ColumnInfo to the RowSchema signature
      ArrayList<ColumnInfo> signature = inputRS.getSignature();
      String tblAlias = fsInputDesc.getTableInfo().getTableName();
      LinkedHashMap<String, String> colMap = new LinkedHashMap<String, String>();
      StringBuilder partCols = new StringBuilder();
      for (String dpCol : dpCtx.getDPColNames()) {
        ColumnInfo colInfo = new ColumnInfo(dpCol,
            TypeInfoFactory.stringTypeInfo, // all partition column type should be string
            tblAlias, true); // partition column is virtual column
        signature.add(colInfo);
        colMap.put(dpCol, dpCol); // input and output have the same column name
        partCols.append(dpCol).append('/');
      }
      partCols.setLength(partCols.length() - 1); // remove the last '/'
      inputRS.setSignature(signature);

      // create another DynamicPartitionCtx, which has a different input-to-DP column mapping
      DynamicPartitionCtx dpCtx2 = new DynamicPartitionCtx(dpCtx);
      dpCtx2.setInputToDPCols(colMap);
      fsOutputDesc.setDynPartCtx(dpCtx2);

      // update the FileSinkOperator to include partition columns
      fsInputDesc.getTableInfo().getProperties().setProperty(
        org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS,
        partCols.toString()); // list of dynamic partition column names
View Full Code Here

      // it must be on the same file system as the current destination
      ParseContext parseCtx = ctx.getParseCtx();
      Context baseCtx = parseCtx.getContext();
      String tmpDir = baseCtx.getExternalTmpFileURI((new Path(dest)).toUri());

      FileSinkDesc fileSinkDesc = fsOp.getConf();
      // Change all the linked file sink descriptors
      if (fileSinkDesc.isLinkedFileSink()) {
        for (FileSinkDesc fsConf:fileSinkDesc.getLinkedFileSinkDesc()) {
          String fileName = Utilities.getFileNameFromDirName(fsConf.getDirName());
          fsConf.setParentDir(tmpDir);
          fsConf.setDirName(tmpDir + Path.SEPARATOR + fileName);
        }
      } else {
        fileSinkDesc.setDirName(tmpDir);
      }
    }

    Task<MoveWork> mvTask = null;
View Full Code Here

        .getFieldSchemasFromRowSchema(parent.getSchema(), "temporarycol"));

    // Create a file sink operator for this file name
    boolean compressIntermediate = parseCtx.getConf().getBoolVar(
        HiveConf.ConfVars.COMPRESSINTERMEDIATE);
    FileSinkDesc desc = new FileSinkDesc(taskTmpDir, tt_desc,
        compressIntermediate);
    if (compressIntermediate) {
      desc.setCompressCodec(parseCtx.getConf().getVar(
          HiveConf.ConfVars.COMPRESSINTERMEDIATECODEC));
      desc.setCompressType(parseCtx.getConf().getVar(
          HiveConf.ConfVars.COMPRESSINTERMEDIATETYPE));
    }
    Operator<? extends OperatorDesc> fs_op = putOpInsertMap(OperatorFactory
        .get(desc, parent.getSchema()), null, parseCtx);
View Full Code Here

    boolean canBeMerged = (dest_tab == null || !((dest_tab.getNumBuckets() > 0 &&
        conf.getBoolVar(HiveConf.ConfVars.HIVEENFORCEBUCKETING)) ||
        (dest_tab.getSortCols() != null && dest_tab.getSortCols().size() > 0 &&
        conf.getBoolVar(HiveConf.ConfVars.HIVEENFORCESORTING))));

    FileSinkDesc fileSinkDesc = new FileSinkDesc(
      queryTmpdir,
      table_desc,
      conf.getBoolVar(HiveConf.ConfVars.COMPRESSRESULT),
      currentTableId,
      rsCtx.isMultiFileSpray(),
      canBeMerged,
      rsCtx.getNumFiles(),
      rsCtx.getTotalFiles(),
      rsCtx.getPartnCols(),
      dpCtx);

    /* Set List Bucketing context. */
    if (lbCtx != null) {
      lbCtx.processRowSkewedIndex(fsRS);
      lbCtx.calculateSkewedValueSubDirList();
    }
    fileSinkDesc.setLbCtx(lbCtx);

    // set it in plan instead of runtime in FileSinkOperator
    fileSinkDesc.setStatsCollectRawDataSize(HiveConf.getBoolVar(conf,
        HiveConf.ConfVars.HIVE_STATS_COLLECT_RAWDATASIZE));

    // set the stats publishing/aggregating key prefix
    // the same as directory name. The directory name
    // can be changed in the optimizer but the key should not be changed
    // it should be the same as the MoveWork's sourceDir.
    fileSinkDesc.setStatsAggPrefix(fileSinkDesc.getDirName());

    if (dest_part != null) {
      try {
        String staticSpec = Warehouse.makePartPath(dest_part.getSpec());
        fileSinkDesc.setStaticSpec(staticSpec);
      } catch (MetaException e) {
        throw new SemanticException(e);
      }
    } else if (dpCtx != null) {
      fileSinkDesc.setStaticSpec(dpCtx.getSPPath());
    }

    Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(fileSinkDesc,
        fsRS, input), inputRR);

View Full Code Here

  }

  @SuppressWarnings("unchecked")
  private void populateMapPlan1(Table src) {

    Operator<FileSinkDesc> op2 = OperatorFactory.get(new FileSinkDesc(tmpdir
        + "mapplan1.out", Utilities.defaultTd, true));
    Operator<FilterDesc> op1 = OperatorFactory.get(getTestFilterDesc("key"),
        op2);

    addMapWork(mr, src, "a", op1);
View Full Code Here

  }

  @SuppressWarnings("unchecked")
  private void populateMapPlan2(Table src) {

    Operator<FileSinkDesc> op3 = OperatorFactory.get(new FileSinkDesc(tmpdir
        + "mapplan2.out", Utilities.defaultTd, false));

    Operator<ScriptDesc> op2 = OperatorFactory.get(new ScriptDesc("cat",
        PlanUtils.getDefaultTableDesc("" + Utilities.tabCode, "key,value"),
        TextRecordWriter.class, PlanUtils.getDefaultTableDesc(""
View Full Code Here

TOP

Related Classes of org.apache.hadoop.hive.ql.plan.FileSinkDesc

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.