Examples of org.apache.hadoop.hive.ql.plan.TezEdgeProperty

org.apache.hadoop.hive.ql.plan.TezEdgeProperty

        // add all dependencies (i.e.: edges) to the graph
        for (BaseWork v: work.getChildren(w)) {
          assert workToVertex.containsKey(v);
          Edge e = null;


          TezEdgeProperty edgeProp = work.getEdgeProperty(w, v);


          e = utils.createEdge(wxConf, wx, workToConf.get(v), workToVertex.get(v), edgeProp);
          dag.addEdge(e);
        }
      }

View Full Code Here

    reduceWork.setNumReduceTasks(reduceSink.getConf().getNumReducers());


    setupReduceSink(context, reduceWork, reduceSink);


    tezWork.add(reduceWork);
    TezEdgeProperty edgeProp = new TezEdgeProperty(EdgeType.SIMPLE_EDGE);
    tezWork.connect(
        context.preceedingWork,
        reduceWork, edgeProp);
    context.connectedReduceSinks.add(reduceSink);

View Full Code Here

        edgeType = EdgeType.CUSTOM_EDGE;
      } else {
        edgeType = EdgeType.CUSTOM_SIMPLE_EDGE;
      }
    }
    TezEdgeProperty edgeProp = new TezEdgeProperty(null, edgeType, numBuckets);


    if (mapJoinWork != null) {
      for (BaseWork myWork: mapJoinWork) {
        // link the work with the work associated with the reduce sink that triggered this rule
        TezWork tezWork = context.currentTask.getWork();

View Full Code Here

              }
            }
            for (Entry<BaseWork,TezEdgeProperty> parentWorkMap : linkWorkMap.entrySet()) {
              BaseWork parentWork = parentWorkMap.getKey();
              LOG.debug("connecting "+parentWork.getName()+" with "+work.getName());
              TezEdgeProperty edgeProp = parentWorkMap.getValue();
              tezWork.connect(parentWork, work, edgeProp);
              
              // need to set up output name for reduce sink now that we know the name
              // of the downstream work
              for (ReduceSinkOperator r:
                     context.linkWorkWithReduceSinkMap.get(parentWork)) {
                if (r.getConf().getOutputName() != null) {
                  LOG.debug("Cloning reduce sink for multi-child broadcast edge");
                  // we've already set this one up. Need to clone for the next work.
                  r = (ReduceSinkOperator) OperatorFactory.getAndMakeChild(
                      (ReduceSinkDesc)r.getConf().clone(), r.getParentOperators());
                  context.clonedReduceSinks.add(r);
                }
                r.getConf().setOutputName(work.getName());
                context.connectedReduceSinks.add(r);
              }
            }
          }
        }
      }
      // clear out the set. we don't need it anymore.
      context.currentMapJoinOperators.clear();
    }


    // This is where we cut the tree as described above. We also remember that
    // we might have to connect parent work with this work later.
    for (Operator<?> parent: new ArrayList<Operator<?>>(root.getParentOperators())) {
      context.leafOperatorToFollowingWork.put(parent, work);
      LOG.debug("Removing " + parent + " as parent from " + root);
      root.removeParent(parent);
    }


    if (!context.currentUnionOperators.isEmpty()) {      
      // if there are union all operators we need to add the work to the set
      // of union operators.


      UnionWork unionWork;
      if (context.unionWorkMap.containsKey(operator)) {
        // we've seen this terminal before and have created a union work object.
        // just need to add this work to it. There will be no children of this one
        // since we've passed this operator before.
        assert operator.getChildOperators().isEmpty();
        unionWork = (UnionWork) context.unionWorkMap.get(operator);


      } else {
        // first time through. we need to create a union work object and add this
        // work to it. Subsequent work should reference the union and not the actual
        // work.
        unionWork = utils.createUnionWork(context, operator, tezWork);
      }


      // finally hook everything up
      LOG.debug("Connecting union work ("+unionWork+") with work ("+work+")");
      TezEdgeProperty edgeProp = new TezEdgeProperty(EdgeType.CONTAINS);
      tezWork.connect(unionWork, work, edgeProp);
      unionWork.addUnionOperators(context.currentUnionOperators);
      context.currentUnionOperators.clear();
      context.workWithUnionOperators.add(work);
      work = unionWork;
    }


    // We're scanning a tree from roots to leaf (this is not technically
    // correct, demux and mux operators might form a diamond shape, but
    // we will only scan one path and ignore the others, because the
    // diamond shape is always contained in a single vertex). The scan
    // is depth first and because we remove parents when we pack a pipeline
    // into a vertex we will never visit any node twice. But because of that
    // we might have a situation where we need to connect 'work' that comes after
    // the 'work' we're currently looking at.
    //
    // Also note: the concept of leaf and root is reversed in hive for historical
    // reasons. Roots are data sources, leaves are data sinks. I know.
    if (context.leafOperatorToFollowingWork.containsKey(operator)) {


      BaseWork followingWork = context.leafOperatorToFollowingWork.get(operator);


      LOG.debug("Second pass. Leaf operator: "+operator
        +" has common downstream work:"+followingWork);


      // need to add this branch to the key + value info
      assert operator instanceof ReduceSinkOperator
        && followingWork instanceof ReduceWork;
      ReduceSinkOperator rs = (ReduceSinkOperator) operator;
      ReduceWork rWork = (ReduceWork) followingWork;
      GenMapRedUtils.setKeyAndValueDesc(rWork, rs);


      // remember which parent belongs to which tag
      rWork.getTagToInput().put(rs.getConf().getTag(), work.getName());


      // remember the output name of the reduce sink
      rs.getConf().setOutputName(rWork.getName());


      if (!context.connectedReduceSinks.contains(rs)) {
        // add dependency between the two work items
        TezEdgeProperty edgeProp = new TezEdgeProperty(EdgeType.SIMPLE_EDGE);
        tezWork.connect(work, rWork, edgeProp);
        context.connectedReduceSinks.add(rs);
      }
    } else {
      LOG.debug("First pass. Leaf operator: "+operator);

View Full Code Here

        // add all dependencies (i.e.: edges) to the graph
        for (BaseWork v: work.getChildren(w)) {
          assert workToVertex.containsKey(v);
          Edge e = null;


          TezEdgeProperty edgeProp = work.getEdgeProperty(w, v);


          e = utils.createEdge(wxConf, wx, workToConf.get(v), workToVertex.get(v), edgeProp);
          dag.addEdge(e);
        }
      }

View Full Code Here

    mws[1].setPathToAliases(pathMap);


    rws[0].setReducer(op);
    rws[1].setReducer(op);


    TezEdgeProperty edgeProp = new TezEdgeProperty(EdgeType.SIMPLE_EDGE);
    work.connect(mws[0], rws[0], edgeProp);
    work.connect(mws[1], rws[0], edgeProp);
    work.connect(rws[0], rws[1], edgeProp);


    task = new TezTask(utils);

View Full Code Here

          getParentFromStack(context.currentMergeJoinOperator, stack);
      int pos = context.currentMergeJoinOperator.getTagForOperator(parentOp);
      work.setTag(pos);
      tezWork.setVertexType(work, VertexType.MULTI_INPUT_UNINITIALIZED_EDGES);
      for (BaseWork parentWork : tezWork.getParents(work)) {
        TezEdgeProperty edgeProp = tezWork.getEdgeProperty(parentWork, work);
        tezWork.disconnect(parentWork, work);
        tezWork.connect(parentWork, mergeJoinWork, edgeProp);
      }


      for (BaseWork childWork : tezWork.getChildren(work)) {
        TezEdgeProperty edgeProp = tezWork.getEdgeProperty(work, childWork);
        tezWork.disconnect(work, childWork);
        tezWork.connect(mergeJoinWork, childWork, edgeProp);
      }
      tezWork.remove(work);
      context.rootToWorkMap.put(root, mergeJoinWork);
      context.childToWorkMap.get(operator).remove(work);
      context.childToWorkMap.get(operator).add(mergeJoinWork);
      work = mergeJoinWork;
      context.currentMergeJoinOperator = null;
    }


    // remember which mapjoin operator links with which work
    if (!context.currentMapJoinOperators.isEmpty()) {
      for (MapJoinOperator mj: context.currentMapJoinOperators) {
        LOG.debug("Processing map join: " + mj);
        // remember the mapping in case we scan another branch of the
        // mapjoin later
        if (!context.mapJoinWorkMap.containsKey(mj)) {
          List<BaseWork> workItems = new LinkedList<BaseWork>();
          workItems.add(work);
          context.mapJoinWorkMap.put(mj, workItems);
        } else {
          context.mapJoinWorkMap.get(mj).add(work);
        }


        /*
         * this happens in case of map join operations.
         * The tree looks like this:
         *
         *        RS <--- we are here perhaps
         *        |
         *     MapJoin
         *     /     \
         *   RS       TS
         *  /
         * TS
         *
         * If we are at the RS pointed above, and we may have already visited the
         * RS following the TS, we have already generated work for the TS-RS.
         * We need to hook the current work to this generated work.
         */
        if (context.linkOpWithWorkMap.containsKey(mj)) {
          Map<BaseWork,TezEdgeProperty> linkWorkMap = context.linkOpWithWorkMap.get(mj);
          if (linkWorkMap != null) {
            if (context.linkChildOpWithDummyOp.containsKey(mj)) {
              for (Operator<?> dummy: context.linkChildOpWithDummyOp.get(mj)) {
                work.addDummyOp((HashTableDummyOperator) dummy);
              }
            }
            for (Entry<BaseWork,TezEdgeProperty> parentWorkMap : linkWorkMap.entrySet()) {
              BaseWork parentWork = parentWorkMap.getKey();
              LOG.debug("connecting "+parentWork.getName()+" with "+work.getName());
              TezEdgeProperty edgeProp = parentWorkMap.getValue();
              tezWork.connect(parentWork, work, edgeProp);
              if (edgeProp.getEdgeType() == EdgeType.CUSTOM_EDGE) {
                tezWork.setVertexType(work, VertexType.INITIALIZED_EDGES);
              }


              // need to set up output name for reduce sink now that we know the name
              // of the downstream work
              for (ReduceSinkOperator r:
                     context.linkWorkWithReduceSinkMap.get(parentWork)) {
                if (r.getConf().getOutputName() != null) {
                  LOG.debug("Cloning reduce sink for multi-child broadcast edge");
                  // we've already set this one up. Need to clone for the next work.
                  r = (ReduceSinkOperator) OperatorFactory.getAndMakeChild(
                      (ReduceSinkDesc)r.getConf().clone(), r.getParentOperators());
                  context.clonedReduceSinks.add(r);
                }
                r.getConf().setOutputName(work.getName());
                context.connectedReduceSinks.add(r);
              }
            }
          }
        }
      }
      // clear out the set. we don't need it anymore.
      context.currentMapJoinOperators.clear();
    }


    if (!context.currentUnionOperators.isEmpty()) {
      // if there are union all operators we need to add the work to the set
      // of union operators.


      UnionWork unionWork;
      if (context.unionWorkMap.containsKey(operator)) {
        // we've seen this terminal before and have created a union work object.
        // just need to add this work to it. There will be no children of this one
        // since we've passed this operator before.
        assert operator.getChildOperators().isEmpty();
        unionWork = (UnionWork) context.unionWorkMap.get(operator);


      } else {
        // first time through. we need to create a union work object and add this
        // work to it. Subsequent work should reference the union and not the actual
        // work.
        unionWork = utils.createUnionWork(context, operator, tezWork);
      }


      // finally hook everything up
      LOG.debug("Connecting union work ("+unionWork+") with work ("+work+")");
      TezEdgeProperty edgeProp = new TezEdgeProperty(EdgeType.CONTAINS);
      tezWork.connect(unionWork, work, edgeProp);
      unionWork.addUnionOperators(context.currentUnionOperators);
      context.currentUnionOperators.clear();
      context.workWithUnionOperators.add(work);
      work = unionWork;
    }




    // This is where we cut the tree as described above. We also remember that
    // we might have to connect parent work with this work later.
    boolean removeParents = false;
    for (Operator<?> parent: new ArrayList<Operator<?>>(root.getParentOperators())) {
      removeParents = true;
      context.leafOperatorToFollowingWork.put(parent, work);
      LOG.debug("Removing " + parent + " as parent from " + root);
    }
    if (removeParents) {
      for (Operator<?> parent : new ArrayList<Operator<?>>(root.getParentOperators())) {
        root.removeParent(parent);
      }
    }


    // We're scanning a tree from roots to leaf (this is not technically
    // correct, demux and mux operators might form a diamond shape, but
    // we will only scan one path and ignore the others, because the
    // diamond shape is always contained in a single vertex). The scan
    // is depth first and because we remove parents when we pack a pipeline
    // into a vertex we will never visit any node twice. But because of that
    // we might have a situation where we need to connect 'work' that comes after
    // the 'work' we're currently looking at.
    //
    // Also note: the concept of leaf and root is reversed in hive for historical
    // reasons. Roots are data sources, leaves are data sinks. I know.
    if (context.leafOperatorToFollowingWork.containsKey(operator)) {


      BaseWork followingWork = context.leafOperatorToFollowingWork.get(operator);
      long bytesPerReducer = context.conf.getLongVar(HiveConf.ConfVars.BYTESPERREDUCER);


      LOG.debug("Second pass. Leaf operator: "+operator
        +" has common downstream work:"+followingWork);


      if (operator instanceof DummyStoreOperator) {
        // this is the small table side.
        assert (followingWork instanceof MergeJoinWork);
        MergeJoinWork mergeJoinWork = (MergeJoinWork) followingWork;
        CommonMergeJoinOperator mergeJoinOp = mergeJoinWork.getMergeJoinOperator();
        work.setTag(mergeJoinOp.getTagForOperator(operator));
        mergeJoinWork.addMergedWork(null, work);
        tezWork.setVertexType(mergeJoinWork, VertexType.MULTI_INPUT_UNINITIALIZED_EDGES);
        for (BaseWork parentWork : tezWork.getParents(work)) {
          TezEdgeProperty edgeProp = tezWork.getEdgeProperty(parentWork, work);
          tezWork.disconnect(parentWork, work);
          tezWork.connect(parentWork, mergeJoinWork, edgeProp);
        }
        work = mergeJoinWork;
      } else {
        // need to add this branch to the key + value info
        assert operator instanceof ReduceSinkOperator
            && ((followingWork instanceof ReduceWork) || (followingWork instanceof MergeJoinWork)
                || followingWork instanceof UnionWork);
        ReduceSinkOperator rs = (ReduceSinkOperator) operator;
        ReduceWork rWork = null;
        if (followingWork instanceof MergeJoinWork) {
          MergeJoinWork mergeJoinWork = (MergeJoinWork) followingWork;
          rWork = (ReduceWork) mergeJoinWork.getMainWork();
        } else if (followingWork instanceof UnionWork) {
          // this can only be possible if there is merge work followed by the union
          UnionWork unionWork = (UnionWork) followingWork;
          int index = getMergeIndex(tezWork, unionWork, rs);
          // guaranteed to be instance of MergeJoinWork if index is valid
          BaseWork baseWork = tezWork.getChildren(unionWork).get(index);
          if (baseWork instanceof MergeJoinWork) {
            MergeJoinWork mergeJoinWork = (MergeJoinWork) baseWork;
            // disconnect the connection to union work and connect to merge work
            followingWork = mergeJoinWork;
            rWork = (ReduceWork) mergeJoinWork.getMainWork();
          } else {
            throw new SemanticException("Unknown work type found: "
                + baseWork.getClass().getCanonicalName());
          }
        } else {
          rWork = (ReduceWork) followingWork;
        }
        GenMapRedUtils.setKeyAndValueDesc(rWork, rs);


        // remember which parent belongs to which tag
        int tag = rs.getConf().getTag();
        rWork.getTagToInput().put(tag == -1 ? 0 : tag, work.getName());


        // remember the output name of the reduce sink
        rs.getConf().setOutputName(rWork.getName());


        if (!context.connectedReduceSinks.contains(rs)) {
          // add dependency between the two work items
          TezEdgeProperty edgeProp;
          if (rWork.isAutoReduceParallelism()) {
            edgeProp =
                new TezEdgeProperty(context.conf, EdgeType.SIMPLE_EDGE, true,
                    rWork.getMinReduceTasks(), rWork.getMaxReduceTasks(), bytesPerReducer);
          } else {
            edgeProp = new TezEdgeProperty(EdgeType.SIMPLE_EDGE);
          }
          tezWork.connect(work, followingWork, edgeProp);
          context.connectedReduceSinks.add(rs);
        }
      }

View Full Code Here


    setupReduceSink(context, reduceWork, reduceSink);


    tezWork.add(reduceWork);


    TezEdgeProperty edgeProp;
    if (reduceWork.isAutoReduceParallelism()) {
      edgeProp =
          new TezEdgeProperty(context.conf, EdgeType.SIMPLE_EDGE, true,
              reduceWork.getMinReduceTasks(), reduceWork.getMaxReduceTasks(), bytesPerReducer);
    } else {
      edgeProp = new TezEdgeProperty(EdgeType.SIMPLE_EDGE);
    }


    tezWork.connect(
        context.preceedingWork,
        reduceWork, edgeProp);

View Full Code Here

        // add all dependencies (i.e.: edges) to the graph
        for (BaseWork v: work.getChildren(w)) {
          assert workToVertex.containsKey(v);
          Edge e = null;


          TezEdgeProperty edgeProp = work.getEdgeProperty(w, v);


          e = utils.createEdge(wxConf, wx, workToVertex.get(v), edgeProp, work.getVertexType(v));
          dag.addEdge(e);
        }
      }

View Full Code Here

        edgeType = EdgeType.CUSTOM_EDGE;
      } else {
        edgeType = EdgeType.CUSTOM_SIMPLE_EDGE;
      }
    }
    TezEdgeProperty edgeProp = new TezEdgeProperty(null, edgeType, numBuckets);


    if (mapJoinWork != null) {
      for (BaseWork myWork: mapJoinWork) {
        // link the work with the work associated with the reduce sink that triggered this rule
        TezWork tezWork = context.currentTask.getWork();

View Full Code Here

0 1

TOP

Related Classes of org.apache.hadoop.hive.ql.plan.TezEdgeProperty

org.apache.hadoop.hive.ql.exec.tez.TestTezTask

org.apache.hadoop.hive.ql.exec.tez.TezTask

org.apache.hadoop.hive.ql.optimizer.MergeJoinProc

org.apache.hadoop.hive.ql.optimizer.ReduceSinkMapJoinProc

org.apache.hadoop.hive.ql.parse.GenTezUtils

org.apache.hadoop.hive.ql.parse.GenTezWork

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.