Package org.apache.hadoop.hive.ql.plan

Examples of org.apache.hadoop.hive.ql.plan.MapWork


      // modify the parse context to use indexing
      // we need to delay this until we choose one index so that we don't attempt to modify pctx multiple times
      HiveIndexQueryContext queryContext = queryContexts.get(chosenIndex);

      // prepare the map reduce job to use indexing
      MapWork work = currentTask.getWork().getMapWork();
      work.setInputformat(queryContext.getIndexInputFormat());
      work.addIndexIntermediateFile(queryContext.getIndexIntermediateFile());
      // modify inputs based on index query
      Set<ReadEntity> inputs = pctx.getSemanticInputs();
      inputs.addAll(queryContext.getAdditionalSemanticInputs());
      List<Task<?>> chosenRewrite = queryContext.getQueryTasks();
View Full Code Here


          }
        });

    work = new TezWork("");

    mws = new MapWork[] { new MapWork(), new MapWork()};
    rws = new ReduceWork[] { new ReduceWork(), new ReduceWork() };

    work.addAll(mws);
    work.addAll(rws);
View Full Code Here

    tblProps.put("columns", columnNames.toString());
    tblProps.put("columns.types", columnTypes.toString());
    TableDesc tbl = new TableDesc(OrcInputFormat.class, OrcOutputFormat.class,
        tblProps);

    MapWork mapWork = new MapWork();
    mapWork.setVectorMode(isVectorized);
    mapWork.setUseBucketizedHiveInputFormat(false);
    LinkedHashMap<String, ArrayList<String>> aliasMap =
        new LinkedHashMap<String, ArrayList<String>>();
    ArrayList<String> aliases = new ArrayList<String>();
    aliases.add(tableName);
    LinkedHashMap<String, PartitionDesc> partMap =
        new LinkedHashMap<String, PartitionDesc>();
    for(int p=0; p < partitions; ++p) {
      aliasMap.put(partPath[p], aliases);
      LinkedHashMap<String, String> partSpec =
          new LinkedHashMap<String, String>();
      PartitionDesc part = new PartitionDesc(tbl, partSpec);
      partMap.put(partPath[p], part);
    }
    mapWork.setPathToAliases(aliasMap);
    mapWork.setPathToPartitionInfo(partMap);
    mapWork.setAllColumnVectorMaps(new HashMap<String, Map<String, Integer>>());
    mapWork.setAllScratchColumnVectorTypeMaps(new HashMap<String,
        Map<Integer, String>>());

    // write the plan out
    FileSystem localFs = FileSystem.getLocal(conf).getRaw();
    Path mapXml = new Path(workDir, "map.xml");
View Full Code Here

  }

  public MapWork createMapWork(GenTezProcContext context, Operator<?> root,
      TezWork tezWork, PrunedPartitionList partitions) throws SemanticException {
    assert root.getParentOperators().isEmpty();
    MapWork mapWork = new MapWork("Map "+ (++sequenceNumber));
    LOG.debug("Adding map work (" + mapWork.getName() + ") for " + root);

    // map work starts with table scan operators
    assert root instanceof TableScanOperator;
    String alias = ((TableScanOperator)root).getConf().getAlias();
View Full Code Here

    if (statsTasks.isEmpty() || !(runner.getTask() instanceof MapRedTask)) {
      return;
    }
    MapRedTask mapredTask = (MapRedTask) runner.getTask();

    MapWork mapWork = mapredTask.getWork().getMapWork();
    ReduceWork reduceWork = mapredTask.getWork().getReduceWork();
    List<Operator> operators = new ArrayList<Operator>(mapWork.getAliasToWork().values());
    if (reduceWork != null) {
      operators.add(reduceWork.getReducer());
    }
    final List<String> statKeys = new ArrayList<String>(1);
    NodeUtils.iterate(operators, FileSinkOperator.class, new Function<FileSinkOperator>() {
View Full Code Here

        MapredWork plan = (MapredWork) currTask.getWork();
        for (int pos = 0; pos < size; pos++) {
          String taskTmpDir = taskTmpDirLst.get(pos);
          TableDesc tt_desc = tt_descLst.get(pos);
          MapWork mWork = plan.getMapWork();
          if (mWork.getPathToAliases().get(taskTmpDir) == null) {
            mWork.getPathToAliases().put(taskTmpDir,
                new ArrayList<String>());
            mWork.getPathToAliases().get(taskTmpDir).add(taskTmpDir);
            mWork.getPathToPartitionInfo().put(taskTmpDir,
                new PartitionDesc(tt_desc, null));
            mWork.getAliasToWork().put(taskTmpDir, topOperators.get(pos));
          }
        }
      }
    }
  }
View Full Code Here

   *
   * @return the new plan
   */
  public static MapredWork getMapRedWorkFromConf(HiveConf conf) {
    MapredWork mrWork = new MapredWork();
    MapWork work = mrWork.getMapWork();

    boolean mapperCannotSpanPartns =
        conf.getBoolVar(
            HiveConf.ConfVars.HIVE_MAPPER_CANNOT_SPAN_MULTIPLE_PARTITIONS);
    work.setMapperCannotSpanPartns(mapperCannotSpanPartns);
    work.setPathToAliases(new LinkedHashMap<String, ArrayList<String>>());
    work.setPathToPartitionInfo(new LinkedHashMap<String, PartitionDesc>());
    work.setAliasToWork(new LinkedHashMap<String, Operator<? extends OperatorDesc>>());
    work.setHadoopSupportsSplittable(
        conf.getBoolVar(HiveConf.ConfVars.HIVE_COMBINE_INPUT_FORMAT_SUPPORTS_SPLITTABLE));
    return mrWork;
  }
View Full Code Here

    //
    // 2. Constructing a conditional task consisting of a move task and a map reduce task
    //
    MoveWork dummyMv = new MoveWork(null, null, null,
         new LoadFileDesc(fsInputDesc.getFinalDirName(), finalName, true, null, null), false);
    MapWork cplan;
    Serializable work;

    if (conf.getBoolVar(ConfVars.HIVEMERGERCFILEBLOCKLEVEL) &&
        fsInputDesc.getTableInfo().getInputFileFormatClass().equals(RCFileInputFormat.class)) {

      // Check if InputFormatClass is valid
      String inputFormatClass = conf.getVar(ConfVars.HIVEMERGEINPUTFORMATBLOCKLEVEL);
      try {
        Class c = Class.forName(inputFormatClass);

        LOG.info("RCFile format- Using block level merge");
        cplan = GenMapRedUtils.createRCFileMergeTask(fsInputDesc, finalName,
            dpCtx != null && dpCtx.getNumDPCols() > 0);
        work = cplan;
      } catch (ClassNotFoundException e) {
        String msg = "Illegal input format class: " + inputFormatClass;
        throw new SemanticException(msg);
      }

    } else {
      cplan = createMRWorkForMergingFiles(conf, tsMerge, fsInputDesc);
      if (conf.getVar(ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) {
        work = new TezWork(conf.getVar(HiveConf.ConfVars.HIVEQUERYID));
        cplan.setName("Merge");
        ((TezWork)work).add(cplan);
      } else {
        work = new MapredWork();
        ((MapredWork)work).setMapWork(cplan);
      }
    }
    // use CombineHiveInputFormat for map-only merging
    cplan.setInputformat("org.apache.hadoop.hive.ql.io.CombineHiveInputFormat");
    // NOTE: we should gather stats in MR1 rather than MR2 at merge job since we don't
    // know if merge MR2 will be triggered at execution time
    ConditionalTask cndTsk = GenMapRedUtils.createCondTask(conf, currTask, dummyMv, work,
        fsInputDesc.getFinalDirName().toString());
View Full Code Here

    TableDesc tblDesc = fsDesc.getTableInfo();
    aliases.add(inputDir); // dummy alias: just use the input path

    // constructing the default MapredWork
    MapredWork cMrPlan = GenMapRedUtils.getMapRedWorkFromConf(conf);
    MapWork cplan = cMrPlan.getMapWork();
    cplan.getPathToAliases().put(inputDir, aliases);
    cplan.getPathToPartitionInfo().put(inputDir, new PartitionDesc(tblDesc, null));
    cplan.getAliasToWork().put(inputDir, topOp);
    cplan.setMapperCannotSpanPartns(true);

    return cplan;
  }
View Full Code Here

   */
  private Schema getSchema(JobConf job, FileSplit split) throws AvroSerdeException, IOException {
    FileSystem fs = split.getPath().getFileSystem(job);
    // Inside of a MR job, we can pull out the actual properties
    if(AvroSerdeUtils.insideMRJob(job)) {
      MapWork mapWork = Utilities.getMapWork(job);

      // Iterate over the Path -> Partition descriptions to find the partition
      // that matches our input split.
      for (Map.Entry<String,PartitionDesc> pathsAndParts: mapWork.getPathToPartitionInfo().entrySet()){
        String partitionPath = pathsAndParts.getKey();
        if(pathIsInPartition(split.getPath(), partitionPath)) {
          if(LOG.isInfoEnabled()) {
              LOG.info("Matching partition " + partitionPath +
                      " with input split " + split);
View Full Code Here

TOP

Related Classes of org.apache.hadoop.hive.ql.plan.MapWork

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.