Package org.apache.hadoop.hive.ql.io.merge

Examples of org.apache.hadoop.hive.ql.io.merge.MergeFileWork


    ListBucketingCtx lbCtx = mergeFilesDesc.getLbCtx();
    boolean lbatc = lbCtx == null ? false : lbCtx.isSkewedStoredAsDir();
    int lbd = lbCtx == null ? 0 : lbCtx.calculateListBucketingLevel();

    // merge work only needs input and output.
    MergeFileWork mergeWork = new MergeFileWork(mergeFilesDesc.getInputDir(),
        mergeFilesDesc.getOutputDir(), mergeFilesDesc.getInputFormatClass().getName());
    mergeWork.setListBucketingCtx(mergeFilesDesc.getLbCtx());
    mergeWork.resolveConcatenateMerge(db.getConf());
    mergeWork.setMapperCannotSpanPartns(true);
    mergeWork.setSourceTableInputFormat(mergeFilesDesc.getInputFormatClass().getName());
    final FileMergeDesc fmd;
    if (mergeFilesDesc.getInputFormatClass().equals(RCFileInputFormat.class)) {
      fmd = new RCFileMergeDesc();
    } else {
      // safe to assume else is ORC as semantic analyzer will check for RC/ORC
      fmd = new OrcFileMergeDesc();
    }

    fmd.setDpCtx(null);
    fmd.setHasDynamicPartitions(false);
    fmd.setListBucketingAlterTableConcatenate(lbatc);
    fmd.setListBucketingDepth(lbd);
    fmd.setOutputPath(mergeFilesDesc.getOutputDir());

    Operator<? extends OperatorDesc> mergeOp = OperatorFactory.get(fmd);

    LinkedHashMap<String, Operator<? extends  OperatorDesc>> aliasToWork =
        new LinkedHashMap<String, Operator<? extends OperatorDesc>>();
    aliasToWork.put(mergeFilesDesc.getInputDir().toString(), mergeOp);
    mergeWork.setAliasToWork(aliasToWork);
    DriverContext driverCxt = new DriverContext();
    MergeFileTask taskExec = new MergeFileTask();
    taskExec.initialize(db.getConf(), null, driverCxt);
    taskExec.setWork(mergeWork);
    taskExec.setQueryPlan(this.getQueryPlan());
View Full Code Here


    conf.set(TEZ_TMP_DIR_KEY, context.getMRTmpPath().toUri().toString());
    conf.set("mapred.mapper.class", ExecMapper.class.getName());
    conf.set("mapred.input.format.class", inpFormat);

    if (mapWork instanceof MergeFileWork) {
      MergeFileWork mfWork = (MergeFileWork) mapWork;
      // This mapper class is used for serializaiton/deserializaiton of merge
      // file work.
      conf.set("mapred.mapper.class", MergeFileMapper.class.getName());
      conf.set("mapred.input.format.class", mfWork.getInputformat());
      conf.setClass("mapred.output.format.class", MergeFileOutputFormat.class,
          FileOutputFormat.class);
    }

    return conf;
View Full Code Here

      throw new SemanticException("createMergeTask called on a table with file"
          + " format other than RCFile or ORCFile");
    }

    // create the merge file work
    MergeFileWork work = new MergeFileWork(inputDirs, finalName,
        hasDynamicPartitions, tblDesc.getInputFileFormatClass().getName());
    LinkedHashMap<String, ArrayList<String>> pathToAliases =
        new LinkedHashMap<String, ArrayList<String>>();
    pathToAliases.put(inputDir.toString(), inputDirstr);
    work.setMapperCannotSpanPartns(true);
    work.setPathToAliases(pathToAliases);
    PartitionDesc pDesc = new PartitionDesc(tblDesc, null);
    pDesc.setInputFileFormatClass(internalIFClass);
    work.getPathToPartitionInfo().put(inputDir.toString(), pDesc);
    work.setListBucketingCtx(fsInputDesc.getLbCtx());

    // create alias to work which contains the merge operator
    LinkedHashMap<String, Operator<? extends OperatorDesc>> aliasToWork =
        new LinkedHashMap<String, Operator<? extends OperatorDesc>>();
    Operator<? extends OperatorDesc> mergeOp = null;
    final FileMergeDesc fmd;
    if (tblDesc.getInputFileFormatClass().equals(RCFileInputFormat.class)) {
      fmd = new RCFileMergeDesc();
    } else {
      fmd = new OrcFileMergeDesc();
    }
    fmd.setDpCtx(fsInputDesc.getDynPartCtx());
    fmd.setOutputPath(finalName);
    fmd.setHasDynamicPartitions(work.hasDynamicPartitions());
    fmd.setListBucketingAlterTableConcatenate(work.isListBucketingAlterTableConcatenate());
    int lbLevel = work.getListBucketingCtx() == null ? 0 :
      work.getListBucketingCtx().calculateListBucketingLevel();
    fmd.setListBucketingDepth(lbLevel);
    mergeOp = OperatorFactory.get(fmd);
    aliasToWork.put(inputDir.toString(), mergeOp);
    work.setAliasToWork(aliasToWork);

    return work;
  }
View Full Code Here

TOP

Related Classes of org.apache.hadoop.hive.ql.io.merge.MergeFileWork

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.