Examples of org.apache.hadoop.hive.ql.plan.UnionWork

org.apache.hadoop.hive.ql.plan.UnionWork
Simple wrapper for union all cases. All contributing work for a union all is collected here. Downstream work will connect to the union not the individual work.

  public void resetSequenceNumber() {
    sequenceNumber = 0;
  }


  public UnionWork createUnionWork(GenTezProcContext context, Operator<?> operator, TezWork tezWork) {
    UnionWork unionWork = new UnionWork("Union "+ (++sequenceNumber));
    context.unionWorkMap.put(operator, unionWork);
    tezWork.add(unionWork);
    return unionWork;
  }

View Full Code Here


    if (!context.currentUnionOperators.isEmpty()) {      
      // if there are union all operators we need to add the work to the set
      // of union operators.


      UnionWork unionWork;
      if (context.unionWorkMap.containsKey(operator)) {
        // we've seen this terminal before and have created a union work object.
        // just need to add this work to it. There will be no children of this one
        // since we've passed this operator before.
        assert operator.getChildOperators().isEmpty();
        unionWork = (UnionWork) context.unionWorkMap.get(operator);


      } else {
        // first time through. we need to create a union work object and add this
        // work to it. Subsequent work should reference the union and not the actual
        // work.
        unionWork = utils.createUnionWork(context, operator, tezWork);
      }


      // finally hook everything up
      LOG.debug("Connecting union work ("+unionWork+") with work ("+work+")");
      TezEdgeProperty edgeProp = new TezEdgeProperty(EdgeType.CONTAINS);
      tezWork.connect(unionWork, work, edgeProp);
      unionWork.addUnionOperators(context.currentUnionOperators);
      context.currentUnionOperators.clear();
      context.workWithUnionOperators.add(work);
      work = unionWork;
    }

View Full Code Here

  public void resetSequenceNumber() {
    sequenceNumber = 0;
  }


  public UnionWork createUnionWork(GenTezProcContext context, Operator<?> operator, TezWork tezWork) {
    UnionWork unionWork = new UnionWork("Union "+ (++sequenceNumber));
    context.unionWorkMap.put(operator, unionWork);
    tezWork.add(unionWork);
    return unionWork;
  }

View Full Code Here


    if (!context.currentUnionOperators.isEmpty()) {
      // if there are union all operators we need to add the work to the set
      // of union operators.


      UnionWork unionWork;
      if (context.unionWorkMap.containsKey(operator)) {
        // we've seen this terminal before and have created a union work object.
        // just need to add this work to it. There will be no children of this one
        // since we've passed this operator before.
        assert operator.getChildOperators().isEmpty();
        unionWork = (UnionWork) context.unionWorkMap.get(operator);


      } else {
        // first time through. we need to create a union work object and add this
        // work to it. Subsequent work should reference the union and not the actual
        // work.
        unionWork = utils.createUnionWork(context, operator, tezWork);
      }


      // finally hook everything up
      LOG.debug("Connecting union work ("+unionWork+") with work ("+work+")");
      TezEdgeProperty edgeProp = new TezEdgeProperty(EdgeType.CONTAINS);
      tezWork.connect(unionWork, work, edgeProp);
      unionWork.addUnionOperators(context.currentUnionOperators);
      context.currentUnionOperators.clear();
      context.workWithUnionOperators.add(work);
      work = unionWork;
    }




    // This is where we cut the tree as described above. We also remember that
    // we might have to connect parent work with this work later.
    boolean removeParents = false;
    for (Operator<?> parent: new ArrayList<Operator<?>>(root.getParentOperators())) {
      removeParents = true;
      context.leafOperatorToFollowingWork.put(parent, work);
      LOG.debug("Removing " + parent + " as parent from " + root);
    }
    if (removeParents) {
      for (Operator<?> parent : new ArrayList<Operator<?>>(root.getParentOperators())) {
        root.removeParent(parent);
      }
    }


    // We're scanning a tree from roots to leaf (this is not technically
    // correct, demux and mux operators might form a diamond shape, but
    // we will only scan one path and ignore the others, because the
    // diamond shape is always contained in a single vertex). The scan
    // is depth first and because we remove parents when we pack a pipeline
    // into a vertex we will never visit any node twice. But because of that
    // we might have a situation where we need to connect 'work' that comes after
    // the 'work' we're currently looking at.
    //
    // Also note: the concept of leaf and root is reversed in hive for historical
    // reasons. Roots are data sources, leaves are data sinks. I know.
    if (context.leafOperatorToFollowingWork.containsKey(operator)) {


      BaseWork followingWork = context.leafOperatorToFollowingWork.get(operator);
      long bytesPerReducer = context.conf.getLongVar(HiveConf.ConfVars.BYTESPERREDUCER);


      LOG.debug("Second pass. Leaf operator: "+operator
        +" has common downstream work:"+followingWork);


      if (operator instanceof DummyStoreOperator) {
        // this is the small table side.
        assert (followingWork instanceof MergeJoinWork);
        MergeJoinWork mergeJoinWork = (MergeJoinWork) followingWork;
        CommonMergeJoinOperator mergeJoinOp = mergeJoinWork.getMergeJoinOperator();
        work.setTag(mergeJoinOp.getTagForOperator(operator));
        mergeJoinWork.addMergedWork(null, work);
        tezWork.setVertexType(mergeJoinWork, VertexType.MULTI_INPUT_UNINITIALIZED_EDGES);
        for (BaseWork parentWork : tezWork.getParents(work)) {
          TezEdgeProperty edgeProp = tezWork.getEdgeProperty(parentWork, work);
          tezWork.disconnect(parentWork, work);
          tezWork.connect(parentWork, mergeJoinWork, edgeProp);
        }
        work = mergeJoinWork;
      } else {
        // need to add this branch to the key + value info
        assert operator instanceof ReduceSinkOperator
            && ((followingWork instanceof ReduceWork) || (followingWork instanceof MergeJoinWork)
                || followingWork instanceof UnionWork);
        ReduceSinkOperator rs = (ReduceSinkOperator) operator;
        ReduceWork rWork = null;
        if (followingWork instanceof MergeJoinWork) {
          MergeJoinWork mergeJoinWork = (MergeJoinWork) followingWork;
          rWork = (ReduceWork) mergeJoinWork.getMainWork();
        } else if (followingWork instanceof UnionWork) {
          // this can only be possible if there is merge work followed by the union
          UnionWork unionWork = (UnionWork) followingWork;
          int index = getMergeIndex(tezWork, unionWork, rs);
          // guaranteed to be instance of MergeJoinWork if index is valid
          BaseWork baseWork = tezWork.getChildren(unionWork).get(index);
          if (baseWork instanceof MergeJoinWork) {
            MergeJoinWork mergeJoinWork = (MergeJoinWork) baseWork;

View Full Code Here

  public void resetSequenceNumber() {
    sequenceNumber = 0;
  }


  public UnionWork createUnionWork(GenTezProcContext context, Operator<?> operator, TezWork tezWork) {
    UnionWork unionWork = new UnionWork("Union "+ (++sequenceNumber));
    context.unionWorkMap.put(operator, unionWork);
    tezWork.add(unionWork);
    return unionWork;
  }

View Full Code Here


    if (!context.currentUnionOperators.isEmpty()) {      
      // if there are union all operators we need to add the work to the set
      // of union operators.


      UnionWork unionWork;
      if (context.unionWorkMap.containsKey(operator)) {
        // we've seen this terminal before and have created a union work object.
        // just need to add this work to it. There will be no children of this one
        // since we've passed this operator before.
        assert operator.getChildOperators().isEmpty();
        unionWork = (UnionWork) context.unionWorkMap.get(operator);


      } else {
        // first time through. we need to create a union work object and add this
        // work to it. Subsequent work should reference the union and not the actual
        // work.
        unionWork = utils.createUnionWork(context, operator, tezWork);
      }


      // finally hook everything up
      LOG.debug("Connecting union work ("+unionWork+") with work ("+work+")");
      TezEdgeProperty edgeProp = new TezEdgeProperty(EdgeType.CONTAINS);
      tezWork.connect(unionWork, work, edgeProp);
      unionWork.addUnionOperators(context.currentUnionOperators);
      context.currentUnionOperators.clear();
      context.workWithUnionOperators.add(work);
      work = unionWork;
    }

View Full Code Here

TOP

Related Classes of org.apache.hadoop.hive.ql.plan.UnionWork

org.apache.hadoop.hive.ql.parse.GenTezUtils

org.apache.hadoop.hive.ql.parse.GenTezWork

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.