Package org.apache.hadoop.hive.ql.plan

Examples of org.apache.hadoop.hive.ql.plan.TezWork$Dependency


        for (Operator<? extends OperatorDesc> op : opMap.values()) {
          setKeyAndValueDesc(work.getReduceWork(), op);
        }
      }
    } else if (task != null && (task.getWork() instanceof TezWork)) {
      TezWork work = (TezWork)task.getWork();
      for (BaseWork w : work.getAllWorkUnsorted()) {
        if (w instanceof MapWork) {
          ((MapWork)w).deriveExplainAttributes();
        }
      }
    }
View Full Code Here


      }

    } else {
      cplan = createMRWorkForMergingFiles(conf, tsMerge, fsInputDesc);
      if (conf.getVar(ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) {
        work = new TezWork(conf.getVar(HiveConf.ConfVars.HIVEQUERYID));
        cplan.setName("Merge");
        ((TezWork)work).add(cplan);
      } else {
        work = new MapredWork();
        ((MapredWork)work).setMapWork(cplan);
View Full Code Here

      mrWork.getMapWork().setGatheringStats(true);
      if (mrWork.getReduceWork() != null) {
        mrWork.getReduceWork().setGatheringStats(true);
      }
    } else {
      TezWork work = (TezWork) currTask.getWork();
      for (BaseWork w: work.getAllWork()) {
        w.setGatheringStats(true);
      }
    }

    // AggKey in StatsWork is used for stats aggregation while StatsAggPrefix
View Full Code Here

      // if we're visiting a terminal we've created ourselves,
      // just skip and keep going
      return null;
    }

    TezWork tezWork = context.currentTask.getWork();

    // Right now the work graph is pretty simple. If there is no
    // Preceding work we have a root and will generate a map
    // vertex. If there is a preceding work we will generate
    // a reduce vertex
    BaseWork work;
    if (context.rootToWorkMap.containsKey(root)) {
      // having seen the root operator before means there was a branch in the
      // operator graph. There's typically two reasons for that: a) mux/demux
      // b) multi insert. Mux/Demux will hit the same leaf again, multi insert
      // will result into a vertex with multiple FS or RS operators.

      // At this point we don't have to do anything special in this case. Just
      // run through the regular paces w/o creating a new task.
      work = context.rootToWorkMap.get(root);
    } else {
      // create a new vertex
      if (context.preceedingWork == null) {
        work = utils.createMapWork(context, root, tezWork, null);
      } else {
        work = utils.createReduceWork(context, root, tezWork);
      }
      context.rootToWorkMap.put(root, work);
    }

    if (!context.childToWorkMap.containsKey(operator)) {
      List<BaseWork> workItems = new LinkedList<BaseWork>();
      workItems.add(work);
      context.childToWorkMap.put(operator, workItems);
    } else {
      context.childToWorkMap.get(operator).add(work);
    }

    // remember which mapjoin operator links with which work
    if (!context.currentMapJoinOperators.isEmpty()) {
      for (MapJoinOperator mj: context.currentMapJoinOperators) {
        LOG.debug("Processing map join: " + mj);
        // remember the mapping in case we scan another branch of the
        // mapjoin later
        if (!context.mapJoinWorkMap.containsKey(mj)) {
          List<BaseWork> workItems = new LinkedList<BaseWork>();
          workItems.add(work);
          context.mapJoinWorkMap.put(mj, workItems);
        } else {
          context.mapJoinWorkMap.get(mj).add(work);
        }

        /*
         * this happens in case of map join operations.
         * The tree looks like this:
         *
         *        RS <--- we are here perhaps
         *        |
         *     MapJoin
         *     /     \
         *   RS       TS
         *  /
         * TS
         *
         * If we are at the RS pointed above, and we may have already visited the
         * RS following the TS, we have already generated work for the TS-RS.
         * We need to hook the current work to this generated work.
         */
        if (context.linkOpWithWorkMap.containsKey(mj)) {
          Map<BaseWork,TezEdgeProperty> linkWorkMap = context.linkOpWithWorkMap.get(mj);
          if (linkWorkMap != null) {
            if (context.linkChildOpWithDummyOp.containsKey(mj)) {
              for (Operator<?> dummy: context.linkChildOpWithDummyOp.get(mj)) {
                work.addDummyOp((HashTableDummyOperator) dummy);
              }
            }
            for (Entry<BaseWork,TezEdgeProperty> parentWorkMap : linkWorkMap.entrySet()) {
              BaseWork parentWork = parentWorkMap.getKey();
              LOG.debug("connecting "+parentWork.getName()+" with "+work.getName());
              TezEdgeProperty edgeProp = parentWorkMap.getValue();
              tezWork.connect(parentWork, work, edgeProp);
             
              // need to set up output name for reduce sink now that we know the name
              // of the downstream work
              for (ReduceSinkOperator r:
                     context.linkWorkWithReduceSinkMap.get(parentWork)) {
                if (r.getConf().getOutputName() != null) {
                  LOG.debug("Cloning reduce sink for multi-child broadcast edge");
                  // we've already set this one up. Need to clone for the next work.
                  r = (ReduceSinkOperator) OperatorFactory.getAndMakeChild(
                      (ReduceSinkDesc)r.getConf().clone(), r.getParentOperators());
                  context.clonedReduceSinks.add(r);
                }
                r.getConf().setOutputName(work.getName());
                context.connectedReduceSinks.add(r);
              }
            }
          }
        }
      }
      // clear out the set. we don't need it anymore.
      context.currentMapJoinOperators.clear();
    }

    // This is where we cut the tree as described above. We also remember that
    // we might have to connect parent work with this work later.
    for (Operator<?> parent: new ArrayList<Operator<?>>(root.getParentOperators())) {
      context.leafOperatorToFollowingWork.put(parent, work);
      LOG.debug("Removing " + parent + " as parent from " + root);
      root.removeParent(parent);
    }

    if (!context.currentUnionOperators.isEmpty()) {     
      // if there are union all operators we need to add the work to the set
      // of union operators.

      UnionWork unionWork;
      if (context.unionWorkMap.containsKey(operator)) {
        // we've seen this terminal before and have created a union work object.
        // just need to add this work to it. There will be no children of this one
        // since we've passed this operator before.
        assert operator.getChildOperators().isEmpty();
        unionWork = (UnionWork) context.unionWorkMap.get(operator);

      } else {
        // first time through. we need to create a union work object and add this
        // work to it. Subsequent work should reference the union and not the actual
        // work.
        unionWork = utils.createUnionWork(context, operator, tezWork);
      }

      // finally hook everything up
      LOG.debug("Connecting union work ("+unionWork+") with work ("+work+")");
      TezEdgeProperty edgeProp = new TezEdgeProperty(EdgeType.CONTAINS);
      tezWork.connect(unionWork, work, edgeProp);
      unionWork.addUnionOperators(context.currentUnionOperators);
      context.currentUnionOperators.clear();
      context.workWithUnionOperators.add(work);
      work = unionWork;
    }

    // We're scanning a tree from roots to leaf (this is not technically
    // correct, demux and mux operators might form a diamond shape, but
    // we will only scan one path and ignore the others, because the
    // diamond shape is always contained in a single vertex). The scan
    // is depth first and because we remove parents when we pack a pipeline
    // into a vertex we will never visit any node twice. But because of that
    // we might have a situation where we need to connect 'work' that comes after
    // the 'work' we're currently looking at.
    //
    // Also note: the concept of leaf and root is reversed in hive for historical
    // reasons. Roots are data sources, leaves are data sinks. I know.
    if (context.leafOperatorToFollowingWork.containsKey(operator)) {

      BaseWork followingWork = context.leafOperatorToFollowingWork.get(operator);

      LOG.debug("Second pass. Leaf operator: "+operator
        +" has common downstream work:"+followingWork);

      // need to add this branch to the key + value info
      assert operator instanceof ReduceSinkOperator
        && followingWork instanceof ReduceWork;
      ReduceSinkOperator rs = (ReduceSinkOperator) operator;
      ReduceWork rWork = (ReduceWork) followingWork;
      GenMapRedUtils.setKeyAndValueDesc(rWork, rs);

      // remember which parent belongs to which tag
      rWork.getTagToInput().put(rs.getConf().getTag(), work.getName());

      // remember the output name of the reduce sink
      rs.getConf().setOutputName(rWork.getName());

      if (!context.connectedReduceSinks.contains(rs)) {
        // add dependency between the two work items
        TezEdgeProperty edgeProp = new TezEdgeProperty(EdgeType.SIMPLE_EDGE);
        tezWork.connect(work, rWork, edgeProp);
        context.connectedReduceSinks.add(rs);
      }
    } else {
      LOG.debug("First pass. Leaf operator: "+operator);
    }
View Full Code Here

    TezEdgeProperty edgeProp = new TezEdgeProperty(null, edgeType, numBuckets);

    if (mapJoinWork != null) {
      for (BaseWork myWork: mapJoinWork) {
        // link the work with the work associated with the reduce sink that triggered this rule
        TezWork tezWork = context.currentTask.getWork();
        LOG.debug("connecting "+parentWork.getName()+" with "+myWork.getName());
        tezWork.connect(parentWork, myWork, edgeProp);
       
        ReduceSinkOperator r = null;
        if (parentRS.getConf().getOutputName() != null) {
          LOG.debug("Cloning reduce sink for multi-child broadcast edge");
          // we've already set this one up. Need to clone for the next work.
View Full Code Here

        dispatch(tsk, stack, nodeOutputs);
      }

    } else if (currTask instanceof TezTask) {
      TezTask tzTask = (TezTask) currTask;
      TezWork tzWrk = tzTask.getWork();
      checkMapJoins(tzWrk);
      checkTezReducer(tzWrk);
    }
    return null;
  }
View Full Code Here

        throws SemanticException {
      Task<? extends Serializable> currTask = (Task<? extends Serializable>) nd;
      if (currTask instanceof MapRedTask) {
        convertMapWork(((MapRedTask) currTask).getWork().getMapWork());
      } else if (currTask instanceof TezTask) {
        TezWork work = ((TezTask) currTask).getWork();
        for (BaseWork w: work.getAllWork()) {
          if (w instanceof MapWork) {
            convertMapWork((MapWork)w);
          }
        }
      }
View Full Code Here

  }

  @Override
  protected void setInputFormat(Task<? extends Serializable> task) {
    if (task instanceof TezTask) {
      TezWork work = ((TezTask)task).getWork();
      List<BaseWork> all = work.getAllWork();
      for (BaseWork w: all) {
        if (w instanceof MapWork) {
          MapWork mapWork = (MapWork) w;
          HashMap<String, Operator<? extends OperatorDesc>> opMap = mapWork.getAliasToWork();
          if (!opMap.isEmpty()) {
View Full Code Here

    this.moveTask = moveTask;
    this.rootTasks = rootTasks;
    this.inputs = inputs;
    this.outputs = outputs;
    this.currentTask = (TezTask) TaskFactory.get(
         new TezWork(conf.getVar(HiveConf.ConfVars.HIVEQUERYID)), conf);
    this.leafOperatorToFollowingWork = new HashMap<Operator<?>, BaseWork>();
    this.linkOpWithWorkMap = new HashMap<Operator<?>, Map<BaseWork, TezEdgeProperty>>();
    this.linkWorkWithReduceSinkMap = new HashMap<BaseWork, List<ReduceSinkOperator>>();
    this.mapJoinWorkMap = new HashMap<MapJoinOperator, List<BaseWork>>();
    this.rootToWorkMap = new HashMap<Operator<?>, BaseWork>();
View Full Code Here

        }
      }

      assert alias != null;

      TezWork tezWork = context.currentTask.getWork();
      boolean partialScan = parseInfo.isPartialScanAnalyzeCommand();
      boolean noScan = parseInfo.isNoScanAnalyzeCommand();
      if (inputFormat.equals(OrcInputFormat.class) && (noScan || partialScan)) {

        // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS partialscan;
View Full Code Here

TOP

Related Classes of org.apache.hadoop.hive.ql.plan.TezWork$Dependency

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.