Examples of org.apache.hadoop.hive.ql.exec.MapRedTask

org.apache.hadoop.hive.ql.exec.MapRedTask
Extension of ExecDriver: - can optionally spawn a map-reduce task from a separate jvm - will make last minute adjustments to map-reduce job parameters, viz: * estimating number of reducers * estimating whether job should run locally

          // create map join task and set big table as i
          // deep copy a new mapred work from xml
          InputStream in = new ByteArrayInputStream(xml.getBytes("UTF-8"));
          MapredWork newWork = Utilities.deserializeMapRedWork(in, physicalContext.getConf());
          // create a mapred task for this work
          MapRedTask newTask = (MapRedTask) TaskFactory.get(newWork, physicalContext
              .getParseContext().getConf());
          JoinOperator newJoinOp = getJoinOp(newTask);


          // optimize this newWork and assume big table position is i
          bigTableAlias = MapJoinProcessor.genMapJoinOpAndLocalWork(newWork, newJoinOp, i);


          // add into conditional task
          listWorks.add(newWork);
          listTasks.add(newTask);
          newTask.setTaskTag(Task.CONVERTED_MAPJOIN);


          //set up backup task
          newTask.setBackupTask(currTask);
          newTask.setBackupChildrenTasks(currTask.getChildTasks());


          // put the mapping alias to task
          aliasToTask.put(bigTableAlias, newTask);


          // set alias to path

View Full Code Here

    }


    //
    // 2. Constructing a conditional task consisting of a move task and a map reduce task
    //
    MapRedTask currTask = (MapRedTask) ctx.getCurrTask();
    MoveWork dummyMv = new MoveWork(null, null, null,
        new LoadFileDesc(fsInputDesc.getDirName(), finalName, true, null, null), false);
    MapredWork cplan = createMergeTask(ctx.getConf(), tsMerge, fsInputDesc);
    // use CombineHiveInputFormat for map-only merging
    cplan.setInputformat("org.apache.hadoop.hive.ql.io.CombineHiveInputFormat");

View Full Code Here

    }


    //
    // 2. Constructing a conditional task consisting of a move task and a map reduce task
    //
    MapRedTask currTask = (MapRedTask) ctx.getCurrTask();
    MoveWork dummyMv = new MoveWork(null, null, null,
        new LoadFileDesc(fsInputDesc.getDirName(), finalName, true, null, null), false);
    MapredWork cplan;


    if(parseCtx.getConf().getBoolVar(HiveConf.ConfVars.

View Full Code Here


    // we can only process MapReduce tasks to check input size
    if (!context.getCurrentTask().isMapRedTask()) {
      return null;
    }
    MapRedTask currentTask = (MapRedTask) context.getCurrentTask();


    // get potential reentrant index queries from each index
    Map<Index, HiveIndexQueryContext> queryContexts = new HashMap<Index, HiveIndexQueryContext>();
    Collection<List<Index>> tableIndexes = indexes.values();
    for (List<Index> indexesOnTable : tableIndexes) {
      List<List<Index>> indexesByType = new ArrayList<List<Index>>();
      for (Index index : indexesOnTable) {
        boolean added = false;
        for (List<Index> indexType : indexesByType) {
          if (indexType.isEmpty()) {
            indexType.add(index);
            added = true;
          } else if (indexType.get(0).getIndexHandlerClass().equals(
                index.getIndexHandlerClass())) {
            indexType.add(index);
            added = true;
            break;
          }
        }
        if (!added) {
          List<Index> newType = new ArrayList<Index>();
          newType.add(index);
          indexesByType.add(newType);
        }
      }


      // choose index type with most indexes of the same type on the table
      // TODO HIVE-2130 This would be a good place for some sort of cost based choice?
      List<Index> bestIndexes = indexesByType.get(0);
      for (List<Index> indexTypes : indexesByType) {
        if (bestIndexes.size() < indexTypes.size()) {
          bestIndexes = indexTypes;
        }
      }


      // rewrite index queries for the chosen index type
      HiveIndexQueryContext queryContext = new HiveIndexQueryContext();
      queryContext.setQueryPartitions(queryPartitions);
      rewriteForIndexes(predicate, bestIndexes, pctx, currentTask, queryContext);
      List<Task<?>> indexTasks = queryContext.getQueryTasks();


      if (indexTasks != null && indexTasks.size() > 0) {
        queryContexts.put(bestIndexes.get(0), queryContext);
      }
    }
    // choose an index rewrite to use
    if (queryContexts.size() > 0) {
      // TODO HIVE-2130 This would be a good place for some sort of cost based choice?
      Index chosenIndex = queryContexts.keySet().iterator().next();


      // modify the parse context to use indexing
      // we need to delay this until we choose one index so that we don't attempt to modify pctx multiple times
      HiveIndexQueryContext queryContext = queryContexts.get(chosenIndex);


      // prepare the map reduce job to use indexing
      MapredWork work = currentTask.getWork();
      work.setInputformat(queryContext.getIndexInputFormat());
      work.setIndexIntermediateFile(queryContext.getIndexIntermediateFile());


      // modify inputs based on index query
      Set<ReadEntity> inputs = pctx.getSemanticInputs();

View Full Code Here

    }


    //
    // 2. Constructing a conditional task consisting of a move task and a map reduce task
    //
    MapRedTask currTask = (MapRedTask) ctx.getCurrTask();
    MoveWork dummyMv = new MoveWork(null, null, null,
        new LoadFileDesc(fsInputDesc.getFinalDirName(), finalName, true, null, null), false);
    MapredWork cplan;


    if(parseCtx.getConf().getBoolVar(HiveConf.ConfVars.

View Full Code Here

          // create map join task and set big table as i
          // deep copy a new mapred work from xml
          InputStream in = new ByteArrayInputStream(xml.getBytes("UTF-8"));
          MapredWork newWork = Utilities.deserializeMapRedWork(in, physicalContext.getConf());
          // create a mapred task for this work
          MapRedTask newTask = (MapRedTask) TaskFactory.get(newWork, physicalContext
              .getParseContext().getConf());
          JoinOperator newJoinOp = getJoinOp(newTask);


          // optimize this newWork and assume big table position is i
          bigTableAlias = MapJoinProcessor.genMapJoinOpAndLocalWork(newWork, newJoinOp, i);


          Long aliasKnownSize = aliasToSize.get(bigTableAlias);
          if (aliasKnownSize != null && aliasKnownSize.longValue() > 0) {
            long smallTblTotalKnownSize = aliasTotalKnownInputSize
                - aliasKnownSize.longValue();
            if(smallTblTotalKnownSize > ThresholdOfSmallTblSizeSum) {
              //this table is not good to be a big table.
              continue;
            }
          }


          // add into conditional task
          listWorks.add(newWork);
          listTasks.add(newTask);
          newTask.setTaskTag(Task.CONVERTED_MAPJOIN);


          //set up backup task
          newTask.setBackupTask(currTask);
          newTask.setBackupChildrenTasks(currTask.getChildTasks());


          // put the mapping alias to task
          aliasToTask.put(bigTableAlias, newTask);
        }
      } catch (Exception e) {

View Full Code Here

          // create map join task and set big table as i
          // deep copy a new mapred work from xml
          InputStream in = new ByteArrayInputStream(xml.getBytes("UTF-8"));
          MapredWork newWork = Utilities.deserializeMapRedWork(in, physicalContext.getConf());
          // create a mapred task for this work
          MapRedTask newTask = (MapRedTask) TaskFactory.get(newWork, physicalContext
              .getParseContext().getConf());
          JoinOperator newJoinOp = getJoinOp(newTask);


          // optimize this newWork and assume big table position is i
          bigTableAlias = MapJoinProcessor.genMapJoinOpAndLocalWork(newWork, newJoinOp, i);


          Long aliasKnownSize = aliasToSize.get(bigTableAlias);
          if (aliasKnownSize != null && aliasKnownSize.longValue() > 0) {
            long smallTblTotalKnownSize = aliasTotalKnownInputSize
                - aliasKnownSize.longValue();
            if(smallTblTotalKnownSize > ThresholdOfSmallTblSizeSum) {
              //this table is not good to be a big table.
              continue;
            }
          }
          
          // add into conditional task
          listWorks.add(newWork);
          listTasks.add(newTask);
          newTask.setTaskTag(Task.CONVERTED_MAPJOIN);


          //set up backup task
          newTask.setBackupTask(currTask);
          newTask.setBackupChildrenTasks(currTask.getChildTasks());


          // put the mapping alias to task
          aliasToTask.put(bigTableAlias, newTask);
        }
      } catch (Exception e) {

View Full Code Here

    }


    //
    // 2. Constructing a conditional task consisting of a move task and a map reduce task
    //
    MapRedTask currTask = (MapRedTask) ctx.getCurrTask();
    MoveWork dummyMv = new MoveWork(null, null, null,
        new LoadFileDesc(fsInputDesc.getDirName(), finalName, true, null, null), false);
    MapredWork cplan;


    if(parseCtx.getConf().getBoolVar(HiveConf.ConfVars.

View Full Code Here

    }


    //
    // 2. Constructing a conditional task consisting of a move task and a map reduce task
    //
    MapRedTask currTask = (MapRedTask) ctx.getCurrTask();
    MoveWork dummyMv = new MoveWork(null, null, null,
        new LoadFileDesc(fsInputDesc.getDirName(), finalName, true, null, null), false);
    MapredWork cplan;


    if(parseCtx.getConf().getBoolVar(HiveConf.ConfVars.

View Full Code Here


    // we can only process MapReduce tasks to check input size
    if (!context.getCurrentTask().isMapRedTask()) {
      return null;
    }
    MapRedTask currentTask = (MapRedTask) context.getCurrentTask();


    // get potential reentrant index queries from each index
    Map<Index, HiveIndexQueryContext> queryContexts = new HashMap<Index, HiveIndexQueryContext>();
    // make sure we have an index on the table being scanned
    TableDesc tblDesc = operator.getTableDesc();
    Table srcTable = pctx.getTopToTable().get(operator);
    if (indexes == null || indexes.get(srcTable) == null) {
      return null;
    }


    List<Index> tableIndexes = indexes.get(srcTable);
    Map<String, List<Index>> indexesByType = new HashMap<String, List<Index>>();
    for (Index indexOnTable : tableIndexes) {
      if (indexesByType.get(indexOnTable.getIndexHandlerClass()) == null) {
        List<Index> newType = new ArrayList<Index>();
        newType.add(indexOnTable);
        indexesByType.put(indexOnTable.getIndexHandlerClass(), newType);
      } else {
        indexesByType.get(indexOnTable.getIndexHandlerClass()).add(indexOnTable);
      }
    }


    // choose index type with most indexes of the same type on the table
    // TODO HIVE-2130 This would be a good place for some sort of cost based choice?
    List<Index> bestIndexes = indexesByType.values().iterator().next();
    for (List<Index> indexTypes : indexesByType.values()) {
      if (bestIndexes.size() < indexTypes.size()) {
        bestIndexes = indexTypes;
      }
    }


    // rewrite index queries for the chosen index type
    HiveIndexQueryContext tmpQueryContext = new HiveIndexQueryContext();
    tmpQueryContext.setQueryPartitions(queryPartitions);
    rewriteForIndexes(predicate, bestIndexes, pctx, currentTask, tmpQueryContext);
    List<Task<?>> indexTasks = tmpQueryContext.getQueryTasks();


    if (indexTasks != null && indexTasks.size() > 0) {
      queryContexts.put(bestIndexes.get(0), tmpQueryContext);
    }
    // choose an index rewrite to use
    if (queryContexts.size() > 0) {
      // TODO HIVE-2130 This would be a good place for some sort of cost based choice?
      Index chosenIndex = queryContexts.keySet().iterator().next();


      // modify the parse context to use indexing
      // we need to delay this until we choose one index so that we don't attempt to modify pctx multiple times
      HiveIndexQueryContext queryContext = queryContexts.get(chosenIndex);


      // prepare the map reduce job to use indexing
      MapredWork work = currentTask.getWork();
      work.setInputformat(queryContext.getIndexInputFormat());
      work.addIndexIntermediateFile(queryContext.getIndexIntermediateFile());
      // modify inputs based on index query
      Set<ReadEntity> inputs = pctx.getSemanticInputs();
      inputs.addAll(queryContext.getAdditionalSemanticInputs());

View Full Code Here

0 1

TOP

Related Classes of org.apache.hadoop.hive.ql.exec.MapRedTask

com.sap.hadoop.windowing.runtime2.mr.MRExecutor

org.apache.hadoop.fs.Path

org.apache.hadoop.hive.common.io.CachingPrintStream

org.apache.hadoop.hive.ql.Context

org.apache.hadoop.hive.ql.exec.Utilities.StreamPrinter

org.apache.hadoop.hive.ql.io.TestSymlinkTextInputFormat

org.apache.hadoop.hive.ql.optimizer.GenMRFileSink1

org.apache.hadoop.hive.ql.optimizer.physical.CommonJoinResolver$CommonJoinTaskDispatcher

org.apache.hadoop.hive.ql.optimizer.physical.index.IndexWhereProcessor

org.apache.hadoop.hive.ql.plan.mapredWork

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.