Package org.apache.pig.backend.hadoop.executionengine.tez.plan

Examples of org.apache.pig.backend.hadoop.executionengine.tez.plan.TezOperator$LoaderInfo


            Vertex[] groupMembers = new Vertex[predecessors.size()];

            for (int i = 0; i < predecessors.size(); i++) {
                // Since this is a dependency order walker, predecessor vertices
                // must have already been created.
                TezOperator pred = predecessors.get(i);
                try {
                    if (pred.isVertexGroup()) {
                        VertexGroup from = pred.getVertexGroupInfo().getVertexGroup();
                        // The plan of vertex group is empty. Since we create the Edge based on
                        // some of the operators in the plan refer to one of the vertex group members.
                        // Both the vertex group and its members reference same EdgeDescriptor object to the
                        // the successor
                        GroupInputEdge edge = newGroupInputEdge(
                                getPlan().getOperator(pred.getVertexGroupMembers().get(0)), tezOp, from, to);
                        dag.addEdge(edge);
                    } else {
                        Vertex from = dag.getVertex(pred.getOperatorKey().toString());
                        if (tezOp.isVertexGroup()) {
                            groupMembers[i] = from;
                        } else {
                            EdgeProperty prop = newEdge(pred, tezOp);
                            Edge edge = Edge.create(from, to, prop);
                            dag.addEdge(edge);
                        }
                    }
                } catch (IOException e) {
                    throw new VisitorException("Cannot create edge from "
                            + pred.name() + " to " + tezOp.name(), e);
                }
            }

            if (tezOp.isVertexGroup()) {
                String groupName = tezOp.getOperatorKey().toString();
View Full Code Here


            setIntermediateOutputKeyValue(pack.getPkgr().getKeyType(), payloadConf, tezOp);
        } else if (roots.size() == 1 && roots.get(0) instanceof POIdentityInOutTez) {
            POIdentityInOutTez identityInOut = (POIdentityInOutTez) roots.get(0);
            // TODO Need to fix multiple input key mapping
            TezOperator identityInOutPred = null;
            for (TezOperator pred : mPlan.getPredecessors(tezOp)) {
                if (!pred.isSampleAggregation()) {
                    identityInOutPred = pred;
                    break;
                }
            }
            identityInOut.setInputKey(identityInOutPred.getOperatorKey().toString());
        } else if (roots.size() == 1 && roots.get(0) instanceof POValueInputTez) {
            POValueInputTez valueInput = (POValueInputTez) roots.get(0);

            LinkedList<String> scalarInputs = new LinkedList<String>();
            for (POUserFunc userFunc : PlanHelper.getPhysicalOperators(tezOp.plan, POUserFunc.class) ) {
View Full Code Here

        if((tezOp.isLimit() || tezOp.isLimitAfterSort()) && tezOp.getRequestedParallelism() == 1) {
            return;
        }

        TezOperator unionOp = tezOp;
        String unionOpKey = unionOp.getOperatorKey().toString();
        String scope = unionOp.getOperatorKey().scope;
        TezOperPlan tezPlan = getPlan();

        //TODO: PIG-3856 Handle replicated join. Replicate join input that was broadcast to union vertex
        // now needs to be broadcast to all the union predecessors. How do we do that??
        // Wait for shared edge and do it or write multiple times??
        // For now don't optimize
        // Create a copy as disconnect while iterating modifies the original list
        List<TezOperator> predecessors = new ArrayList<TezOperator>(tezPlan.getPredecessors(unionOp));
        if (predecessors.size() > unionOp.getVertexGroupMembers().size()) {
            return;
        }

        PhysicalPlan unionOpPlan = unionOp.plan;

        // Union followed by Split followed by Store could have multiple stores
        List<POStoreTez> unionStoreOutputs = PlanHelper.getPhysicalOperators(unionOpPlan, POStoreTez.class);
        TezOperator[] storeVertexGroupOps = new TezOperator[unionStoreOutputs.size()];
        for (int i=0; i < storeVertexGroupOps.length; i++) {
            storeVertexGroupOps[i] = new TezOperator(OperatorKey.genOpKey(scope));
            storeVertexGroupOps[i].setVertexGroupInfo(new VertexGroupInfo(unionStoreOutputs.get(i)));
            storeVertexGroupOps[i].setVertexGroupMembers(unionOp.getVertexGroupMembers());
            tezPlan.add(storeVertexGroupOps[i]);
        }

        // Case of split, orderby, skewed join, rank, etc will have multiple outputs
        List<TezOutput> unionOutputs = PlanHelper.getPhysicalOperators(unionOpPlan, TezOutput.class);
        // One TezOutput can write to multiple LogicalOutputs (POCounterTez, POValueOutputTez, etc)
        List<String> unionOutputKeys = new ArrayList<String>();
        for (TezOutput output : unionOutputs) {
            if (output instanceof POStoreTez) {
                continue;
            }
            for (String key : output.getTezOutputs()) {
                unionOutputKeys.add(key);
            }
        }

        // Create vertex group operator for each output
        TezOperator[] outputVertexGroupOps = new TezOperator[unionOutputKeys.size()];
        String[] newOutputKeys = new String[unionOutputKeys.size()];
        for (int i=0; i < outputVertexGroupOps.length; i++) {
            outputVertexGroupOps[i] = new TezOperator(OperatorKey.genOpKey(scope));
            outputVertexGroupOps[i].setVertexGroupInfo(new VertexGroupInfo());
            outputVertexGroupOps[i].getVertexGroupInfo().setOutput(unionOutputKeys.get(i));
            outputVertexGroupOps[i].setVertexGroupMembers(unionOp.getVertexGroupMembers());
            newOutputKeys[i] = outputVertexGroupOps[i].getOperatorKey().toString();
            tezPlan.add(outputVertexGroupOps[i]);
        }

        try {

             // Clone plan of union and merge it into the predecessor operators
             // Remove POShuffledValueInputTez from union plan root
            unionOpPlan.remove(unionOpPlan.getRoots().get(0));
            for (OperatorKey predKey : unionOp.getVertexGroupMembers()) {
                TezOperator pred = tezPlan.getOperator(predKey);
                PhysicalPlan predPlan = pred.plan;
                PhysicalOperator predLeaf = predPlan.getLeaves().get(0);
                // if predLeaf not POValueOutputTez
                if (predLeaf instanceof POSplit) {
                    // Find the subPlan that connects to the union operator
                    predPlan = getUnionPredPlanFromSplit(predPlan, unionOpKey);
                    predLeaf = predPlan.getLeaves().get(0);
                }

                PhysicalPlan clonePlan = unionOpPlan.clone();
                //Clone changes the operator keys
                List<POStoreTez> clonedUnionStoreOutputs = PlanHelper.getPhysicalOperators(clonePlan, POStoreTez.class);

                // Remove POValueOutputTez from predecessor leaf
                predPlan.remove(predLeaf);
                boolean isEmptyPlan = predPlan.isEmpty();
                if (!isEmptyPlan) {
                    predLeaf = predPlan.getLeaves().get(0);
                }
                predPlan.merge(clonePlan);
                if (!isEmptyPlan) {
                    predPlan.connect(predLeaf, clonePlan.getRoots().get(0));
                }

                // Connect predecessor to the storeVertexGroups
                int i = 0;
                for (TezOperator storeVertexGroup : storeVertexGroupOps) {
                    storeVertexGroup.getVertexGroupInfo().addInput(pred.getOperatorKey());
                    //Set the output key of cloned POStore to that of the initial union POStore.
                    clonedUnionStoreOutputs.get(i).setOutputKey(
                            storeVertexGroup.getVertexGroupInfo().getStore()
                                    .getOperatorKey().toString());
                    pred.addVertexGroupStore(clonedUnionStoreOutputs.get(i++).getOperatorKey(),
                            storeVertexGroup.getOperatorKey());
                    tezPlan.connect(pred, storeVertexGroup);
                }

                for (TezOperator outputVertexGroup : outputVertexGroupOps) {
                    outputVertexGroup.getVertexGroupInfo().addInput(pred.getOperatorKey());
                    tezPlan.connect(pred, outputVertexGroup);
                }

                copyOperatorProperties(pred, unionOp);
                tezPlan.disconnect(pred, unionOp);
            }

            List<TezOperator> successors = tezPlan.getSuccessors(unionOp);
            List<TezOutput> valueOnlyOutputs = new ArrayList<TezOutput>();
            for (TezOutput tezOutput : unionOutputs) {
                if (tezOutput instanceof POValueOutputTez) {
                    valueOnlyOutputs.add(tezOutput);
                }
            }
            // Connect to outputVertexGroupOps
            // Copy output edges of union -> successor to predecessor->successor, vertexgroup -> successor
            // and connect vertexgroup -> successor in the plan.
            for (Entry<OperatorKey, TezEdgeDescriptor> entry : unionOp.outEdges.entrySet()) {
                TezOperator succOp = tezPlan.getOperator(entry.getKey());
                // Case of union followed by union.
                // unionOp.outEdges will not point to vertex group, but to its output.
                // So find the vertex group if there is one.
                TezOperator succOpVertexGroup = null;
                for (TezOperator succ : successors) {
                    if (succ.isVertexGroup()
                            && succ.getVertexGroupInfo().getOutput()
                                    .equals(succOp.getOperatorKey().toString())) {
                        succOpVertexGroup = succ;
                        break;
                    }
                }
                TezEdgeDescriptor edge = entry.getValue();
                // Edge cannot be one to one as it will get input from two or
                // more union predecessors. Change it to SCATTER_GATHER
                if (edge.dataMovementType == DataMovementType.ONE_TO_ONE) {
                    edge.dataMovementType = DataMovementType.SCATTER_GATHER;
                    edge.partitionerClass = RoundRobinPartitioner.class;
                    edge.outputClassName = UnorderedPartitionedKVOutput.class.getName();
                    edge.inputClassName = UnorderedKVInput.class.getName();
                }
                TezOperator vertexGroupOp = outputVertexGroupOps[unionOutputKeys.indexOf(entry.getKey().toString())];
                for (OperatorKey predKey : vertexGroupOp.getVertexGroupMembers()) {
                    TezOperator pred = tezPlan.getOperator(predKey);
                    // Keep the output edge directly to successor
                    // Don't need to keep output edge for vertexgroup
                    pred.outEdges.put(entry.getKey(), edge);
                    succOp.inEdges.put(predKey, edge);
                    if (succOpVertexGroup != null) {
View Full Code Here

            if (splittees.size()==1 && successors.size()==1) {
                // We don't need a POSplit here, we can merge the splittee into spliter
                PhysicalOperator firstNodeLeaf = tezOp.plan.getLeaves().get(0);
                PhysicalOperator firstNodeLeafPred  = tezOp.plan.getPredecessors(firstNodeLeaf).get(0);

                TezOperator singleSplitee = splittees.get(0);
                PhysicalOperator secondNodeRoot =  singleSplitee.plan.getRoots().get(0);
                PhysicalOperator secondNodeSucc = singleSplitee.plan.getSuccessors(secondNodeRoot).get(0);

                tezOp.plan.remove(firstNodeLeaf);
                singleSplitee.plan.remove(secondNodeRoot);
View Full Code Here

        // operators from the reduce side, and the second predecessor
        // cannot see them
        if (predecessors.size()>1) {
            return;
        }
        TezOperator from = predecessors.get(0);

        List<POLocalRearrangeTez> rearranges = PlanHelper.getPhysicalOperators(from.plan, POLocalRearrangeTez.class);
        if (rearranges.isEmpty()) {
            return;
        }

        POLocalRearrangeTez connectingLR = null;
        PhysicalPlan rearrangePlan = from.plan;
        for (POLocalRearrangeTez lr : rearranges) {
            if (lr.getOutputKey().equals(to.getOperatorKey().toString())) {
                connectingLR = lr;
                break;
            }
        }

        if (connectingLR == null) {
            return;
        }

        // Detected the POLocalRearrange -> POPackage pattern
        TezEdgeDescriptor inEdge = to.inEdges.get(from.getOperatorKey());
        // Only optimize for Cogroup case
        if (from.isGlobalSort()) {
            return;
        }

        // If there is a custom partitioner do not do secondary key optimization.
        if (inEdge.partitionerClass != null) {
            return;
        }

        if (from.plan.getOperator(connectingLR.getOperatorKey()) == null) {
            // The POLocalRearrange is sub-plan of a POSplit
            rearrangePlan = PlanHelper.getLocalRearrangePlanFromSplit(from.plan, connectingLR.getOperatorKey());
        }

        SecondaryKeyOptimizerInfo info = SecondaryKeyOptimizerUtil.applySecondaryKeySort(rearrangePlan, to.plan);
        if (info != null) {
            numSortRemoved += info.getNumSortRemoved();
            numDistinctChanged += info.getNumDistinctChanged();
            numUseSecondaryKey += info.getNumUseSecondaryKey();
            if (info.isUseSecondaryKey()) {
                // Set it on the receiving vertex and the connecting edge.
                to.setUseSecondaryKey(true);
                inEdge.setUseSecondaryKey(true);
                inEdge.setSecondarySortOrder(info.getSecondarySortOrder());
                log.info("Using Secondary Key Optimization in the edge between vertex - "
                        + from.getOperatorKey()
                        + " and vertex - "
                        + to.getOperatorKey());
            }
        }
    }
View Full Code Here

        }

        double estimatedParallelism = 0;

        for (Entry<OperatorKey, TezEdgeDescriptor> entry : tezOper.inEdges.entrySet()) {
            TezOperator pred = getPredecessorWithKey(plan, tezOper, entry.getKey().toString());

            // Don't include broadcast edge, broadcast edge is used for
            // replicated join (covered in TezParallelismFactorVisitor.visitFRJoin)
            // and sample/scalar (does not impact parallelism)
            if (entry.getValue().dataMovementType==DataMovementType.SCATTER_GATHER ||
                    entry.getValue().dataMovementType==DataMovementType.ONE_TO_ONE) {
                double predParallelism = pred.getEffectiveParallelism();
                if (predParallelism==-1) {
                    throw new IOException("Cannot estimate parallelism for " + tezOper.getOperatorKey().toString()
                            + ", effective parallelism for predecessor " + tezOper.getOperatorKey().toString()
                            + " is -1");
                }
View Full Code Here

                boolean isOneToOneParallelism = false;
                intermediateReducer = TezCompilerUtil.isIntermediateReducer(tezOp);

                for (Map.Entry<OperatorKey,TezEdgeDescriptor> entry : tezOp.inEdges.entrySet()) {
                    if (entry.getValue().dataMovementType == DataMovementType.ONE_TO_ONE) {
                        TezOperator pred = mPlan.getOperator(entry.getKey());
                        parallelism = pred.getEffectiveParallelism();
                        if (prevParallelism == -1) {
                            prevParallelism = parallelism;
                        } else if (prevParallelism != parallelism) {
                            throw new VisitorException("one to one sources parallelism for vertex "
                                    + tezOp.getOperatorKey().toString() + " are not equal");
                        }
                        tezOp.setRequestedParallelism(pred.getRequestedParallelism());
                        tezOp.setEstimatedParallelism(pred.getEstimatedParallelism());
                        isOneToOneParallelism = true;
                        incrementTotalParallelism(tezOp, parallelism);
                        parallelism = -1;
                    }
                }
                if (!isOneToOneParallelism) {
                    if (tezOp.getRequestedParallelism() != -1) {
                        parallelism = tezOp.getRequestedParallelism();
                    } else if (pc.defaultParallel != -1) {
                        parallelism = pc.defaultParallel;
                    }
                    boolean overrideRequestedParallelism = false;
                    if (parallelism != -1
                            && autoParallelismEnabled
                            && intermediateReducer
                            && !tezOp.isDontEstimateParallelism()
                            && tezOp.isOverrideIntermediateParallelism()) {
                        overrideRequestedParallelism = true;
                    }
                    if (parallelism == -1 || overrideRequestedParallelism) {
                        if (tezOp.getEstimatedParallelism() == -1) {
                            // Override user specified parallelism with the estimated value
                            // if it is intermediate reducer
                            parallelism = estimator.estimateParallelism(mPlan, tezOp, conf);
                            if (overrideRequestedParallelism) {
                                tezOp.setRequestedParallelism(parallelism);
                            } else {
                                tezOp.setEstimatedParallelism(parallelism);
                            }
                        } else {
                            parallelism = tezOp.getEstimatedParallelism();
                        }
                        if (tezOp.isGlobalSort() || tezOp.isSkewedJoin()) {
                            if (!overrideRequestedParallelism) {
                                incrementTotalParallelism(tezOp, parallelism);
                                // PartitionerDefinedVertexManager will determine parallelism.
                                // So call setVertexParallelism with -1
                                // setEstimatedParallelism still needs to have some positive value
                                // so that TezDAGBuilder sets the PartitionerDefinedVertexManager
                                parallelism = -1;
                            } else {
                                // We are overriding the parallelism. We need to update the
                                // Constant value in sampleAggregator to same parallelism
                                // Currently will happen when you have orderby or
                                // skewed join followed by group by with combiner
                                for (TezOperator pred : mPlan.getPredecessors(tezOp)) {
                                    if (pred.isSampleBasedPartitioner()) {
                                        for (TezOperator partitionerPred : mPlan.getPredecessors(pred)) {
                                            if (partitionerPred.isSampleAggregation()) {
                                                LOG.debug("Updating constant value to " + parallelism + " in " + partitionerPred.plan);
                                                LOG.info("Increased requested parallelism of " + partitionerPred.getOperatorKey() + " to " + parallelism);
                                                ParallelConstantVisitor visitor =
View Full Code Here

                + "b = group a by name parallel 3;"
                + "c = foreach b generate group as name, AVG(a.age) as age;"
                + "d = order c by age;"
                + "store d into 'output';";
        Pair<TezOperPlan, DAG> compiledPlan = compile(query);
        TezOperator sortOper = compiledPlan.first.getLeaves().get(0);
        Vertex sortVertex = compiledPlan.second.getVertex(sortOper.getOperatorKey().toString());
        assertEquals(sortVertex.getParallelism(), -1);
    }
View Full Code Here

                + "c = group b by name;"
                + "d = foreach c generate group, flatten(b.gpa);"
                + "e = group d by group;"
                + "store e into 'output';";
        Pair<TezOperPlan, DAG> compiledPlan = compile(query);
        TezOperator leafOper = compiledPlan.first.getLeaves().get(0);
        Vertex leafVertex = compiledPlan.second.getVertex(leafOper.getOperatorKey().toString());
        assertEquals(leafVertex.getParallelism(), 70);
    }
View Full Code Here

                + "b = load '5' using " + ArbitarySplitsLoader.class.getName()
                + "() as (name:chararray, course:chararray);"
                + "c = join a by name, b by name;"
                + "store c into 'output';";
        Pair<TezOperPlan, DAG> compiledPlan = compile(query);
        TezOperator leafOper = compiledPlan.first.getLeaves().get(0);
        Vertex leafVertex = compiledPlan.second.getVertex(leafOper.getOperatorKey().toString());
        assertEquals(leafVertex.getParallelism(), 15);
    }
View Full Code Here

TOP

Related Classes of org.apache.pig.backend.hadoop.executionengine.tez.plan.TezOperator$LoaderInfo

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.