Package org.apache.hadoop.hive.ql.plan

Examples of org.apache.hadoop.hive.ql.plan.fileSinkDesc


          while (!opList.isEmpty()) {
            Operator<? extends OperatorDesc> op = opList.remove(0);

            if (op instanceof FileSinkOperator) {
              FileSinkDesc fdesc = ((FileSinkOperator) op).getConf();
              String tempDir = fdesc.getDirName();

              if (tempDir != null) {
                Path tempPath = Utilities.toTempPath(new Path(tempDir));
                LOG.info("Making Temp Directory: " + tempDir);
                FileSystem fs = tempPath.getFileSystem(job);
View Full Code Here


          while (!opList.isEmpty()) {
            Operator<? extends OperatorDesc> op = opList.remove(0);

            if (op instanceof FileSinkOperator) {
              FileSinkDesc fdesc = ((FileSinkOperator) op).getConf();
              String s = fdesc.getDirName();
              if ((s != null) && ctx.isMRTmpFileURI(s)) {
                fdesc.setDirName(ctx.localizeMRTmpFileURI(s));
              }
              ((FileSinkOperator) op).setConf(fdesc);
            }

            if (op.getChildOperators() != null) {
View Full Code Here

      sel.setParentOperators(null);

      // Create a file sink operator for this file name
      Operator<? extends OperatorDesc> fs_op = OperatorFactory.get(
          new FileSinkDesc(taskTmpDir, tt_desc, parseCtx.getConf().getBoolVar(
          HiveConf.ConfVars.COMPRESSINTERMEDIATE)), mapJoin.getSchema());

      assert mapJoin.getChildOperators().size() == 1;
      mapJoin.getChildOperators().set(0, fs_op);
View Full Code Here

    Context baseCtx = parseCtx.getContext();
    String taskTmpDir = baseCtx.getMRTmpFileURI();

    // Create a file sink operator for this file name
    Operator<? extends OperatorDesc> fs_op = OperatorFactory.get(
        new FileSinkDesc(taskTmpDir, tt_desc, parseCtx.getConf().getBoolVar(
            HiveConf.ConfVars.COMPRESSINTERMEDIATE)), parent.getSchema());

    assert parent.getChildOperators().size() == 1;
    parent.getChildOperators().set(0, fs_op);
View Full Code Here

        // Clone the fileSinkDesc of the final fileSink and create similar fileSinks at
        // each parent
        List<FileSinkDesc> fileDescLists = new ArrayList<FileSinkDesc>();

        for (Operator<? extends OperatorDesc> parent : parents) {
          FileSinkDesc fileSinkDesc = (FileSinkDesc) fileSinkOp.getConf().clone();

          String dirName = parentDirName + Path.SEPARATOR + parent.getIdentifier() ;
          fileSinkDesc.setDirName(dirName);
          fileSinkDesc.setLinkedFileSink(true);
          fileSinkDesc.setParentDir(parentDirName);
          parent.setChildOperators(null);
          Operator<? extends OperatorDesc> tmpFileSinkOp =
            OperatorFactory.getAndMakeChild(fileSinkDesc, parent.getSchema(), parent);
          tmpFileSinkOp.setChildOperators(null);
          fileDescLists.add(fileSinkDesc);
View Full Code Here

        .getFieldSchemasFromRowSchema(parent.getSchema(), "temporarycol"));

    // Create a file sink operator for this file name
    boolean compressIntermediate = parseCtx.getConf().getBoolVar(
        HiveConf.ConfVars.COMPRESSINTERMEDIATE);
    FileSinkDesc desc = new FileSinkDesc(taskTmpDir, tt_desc,
        compressIntermediate);
    if (compressIntermediate) {
      desc.setCompressCodec(parseCtx.getConf().getVar(
          HiveConf.ConfVars.COMPRESSINTERMEDIATECODEC));
      desc.setCompressType(parseCtx.getConf().getVar(
          HiveConf.ConfVars.COMPRESSINTERMEDIATETYPE));
    }
    Operator<? extends OperatorDesc> fs_op = putOpInsertMap(OperatorFactory
        .get(desc, parent.getSchema()), null, parseCtx);
View Full Code Here

    // need to merge the files in the destination table/partitions
    if (chDir && (finalName != null)) {
      createMergeJob((FileSinkOperator) nd, ctx, finalName);
    }

    FileSinkDesc fileSinkDesc = fsOp.getConf();
    if (fileSinkDesc.isLinkedFileSink()) {
      Map<FileSinkDesc, Task<? extends Serializable>> linkedFileDescTasks =
        ctx.getLinkedFileDescTasks();
      if (linkedFileDescTasks == null) {
        linkedFileDescTasks = new HashMap<FileSinkDesc, Task<? extends Serializable>>();
        ctx.setLinkedFileDescTasks(linkedFileDescTasks);
      }

      // The child tasks may be null in case of a select
      if ((currTask.getChildTasks() != null) &&
        (currTask.getChildTasks().size() == 1)) {
        for (FileSinkDesc fileDesc : fileSinkDesc.getLinkedFileSinkDesc()) {
          linkedFileDescTasks.put(fileDesc, currTask.getChildTasks().get(0));
        }
      }
    }
View Full Code Here

    ReduceSinkDesc rsDesc = PlanUtils.getReduceSinkDesc(
        new ArrayList<ExprNodeDesc>(), valueCols, outputColumns, false, -1, -1,
        -1);
    OperatorFactory.getAndMakeChild(rsDesc, inputRS, tsMerge);
    ParseContext parseCtx = ctx.getParseCtx();
    FileSinkDesc fsConf = fsOp.getConf();

    // Add the extract operator to get the value fields
    RowResolver out_rwsch = new RowResolver();
    RowResolver interim_rwsch = ctx.getParseCtx().getOpParseCtx().get(fsOp).getRowResolver();
    Integer pos = Integer.valueOf(0);
    for (ColumnInfo colInfo : interim_rwsch.getColumnInfos()) {
      String[] info = interim_rwsch.reverseLookup(colInfo.getInternalName());
      out_rwsch.put(info[0], info[1], new ColumnInfo(pos.toString(), colInfo
          .getType(), info[0], colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol()));
      pos = Integer.valueOf(pos.intValue() + 1);
    }

    Operator<ExtractDesc> extract = OperatorFactory.getAndMakeChild(new ExtractDesc(
        new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo,
            Utilities.ReduceField.VALUE.toString(), "", false)),
            new RowSchema(out_rwsch.getColumnInfos()));

    TableDesc ts = (TableDesc) fsConf.getTableInfo().clone();
    fsConf.getTableInfo().getProperties().remove(
        org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS);

    FileSinkDesc newFSD = new FileSinkDesc(finalName, ts, parseCtx.getConf()
        .getBoolVar(HiveConf.ConfVars.COMPRESSRESULT));
    FileSinkOperator newOutput = (FileSinkOperator) OperatorFactory.
      getAndMakeChild(newFSD, inputRS, extract);

    HiveConf conf = parseCtx.getConf();
View Full Code Here

    //
    // 1. create the operator tree
    //
    ParseContext parseCtx = ctx.getParseCtx();
    FileSinkDesc fsInputDesc = fsInput.getConf();

    // Create a TableScan operator
    RowSchema inputRS = fsInput.getSchema();
    Operator<? extends OperatorDesc> tsMerge =
      OperatorFactory.get(TableScanDesc.class, inputRS);

    // Create a FileSink operator
    TableDesc ts = (TableDesc) fsInputDesc.getTableInfo().clone();
    FileSinkDesc fsOutputDesc =  new FileSinkDesc(finalName, ts,
        parseCtx.getConf().getBoolVar(HiveConf.ConfVars.COMPRESSRESULT));
    FileSinkOperator fsOutput = (FileSinkOperator) OperatorFactory.getAndMakeChild(
        fsOutputDesc,  inputRS, tsMerge);

    // If the input FileSinkOperator is a dynamic partition enabled, the tsMerge input schema
    // needs to include the partition column, and the fsOutput should have
    // a DynamicPartitionCtx to indicate that it needs to dynamically partitioned.
    DynamicPartitionCtx dpCtx = fsInputDesc.getDynPartCtx();
    if (dpCtx != null && dpCtx.getNumDPCols() > 0) {
      // adding DP ColumnInfo to the RowSchema signature
      ArrayList<ColumnInfo> signature = inputRS.getSignature();
      String tblAlias = fsInputDesc.getTableInfo().getTableName();
      LinkedHashMap<String, String> colMap = new LinkedHashMap<String, String>();
      StringBuilder partCols = new StringBuilder();
      for (String dpCol: dpCtx.getDPColNames()) {
        ColumnInfo colInfo = new ColumnInfo(dpCol,
            TypeInfoFactory.stringTypeInfo, // all partition column type should be string
            tblAlias, true); // partition column is virtual column
        signature.add(colInfo);
        colMap.put(dpCol, dpCol); // input and output have the same column name
        partCols.append(dpCol).append('/');
      }
      partCols.setLength(partCols.length()-1); // remove the last '/'
      inputRS.setSignature(signature);

      // create another DynamicPartitionCtx, which has a different input-to-DP column mapping
      DynamicPartitionCtx dpCtx2 = new DynamicPartitionCtx(dpCtx);
      dpCtx2.setInputToDPCols(colMap);
      fsOutputDesc.setDynPartCtx(dpCtx2);

      // update the FileSinkOperator to include partition columns
      fsInputDesc.getTableInfo().getProperties().setProperty(
        org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS,
        partCols.toString()); // list of dynamic partition column names
View Full Code Here

      // it must be on the same file system as the current destination
      ParseContext parseCtx = ctx.getParseCtx();
      Context baseCtx = parseCtx.getContext();
      String tmpDir = baseCtx.getExternalTmpFileURI((new Path(dest)).toUri());

      FileSinkDesc fileSinkDesc = fsOp.getConf();
      // Change all the linked file sink descriptors
      if (fileSinkDesc.isLinkedFileSink()) {
        for (FileSinkDesc fsConf:fileSinkDesc.getLinkedFileSinkDesc()) {
          String fileName = Utilities.getFileNameFromDirName(fsConf.getDirName());
          fsConf.setParentDir(tmpDir);
          fsConf.setDirName(tmpDir + Path.SEPARATOR + fileName);
        }
      } else {
        fileSinkDesc.setDirName(tmpDir);
      }
    }

    Task<MoveWork> mvTask = null;
View Full Code Here

TOP

Related Classes of org.apache.hadoop.hive.ql.plan.fileSinkDesc

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.