Package org.apache.pig.backend.hadoop.executionengine.tez.plan.operator

Examples of org.apache.pig.backend.hadoop.executionengine.tez.plan.operator.POValueOutputTez$EmptyWritable


                    userFunc.setFuncSpec(newSpec);

                    if (storeSeen.containsKey(store)) {
                        storeSeen.get(store).addOutputKey(tezOp.getOperatorKey().toString());
                    } else {
                        POValueOutputTez output = new POValueOutputTez(OperatorKey.genOpKey(scope));
                        output.addOutputKey(tezOp.getOperatorKey().toString());
                        from.plan.remove(from.plan.getOperator(store.getOperatorKey()));
                        from.plan.addAsLeaf(output);
                        storeSeen.put(store, output);

                        //Remove unused store filename
View Full Code Here


                if (p instanceof POStore) {
                    PhysicalOperator store = oper.plan.getOperator(p.getOperatorKey());
                    // replace POStore to POValueOutputTez, convert the tezOperator to splitter
                    oper.plan.disconnect(oper.plan.getPredecessors(store).get(0), store);
                    oper.plan.remove(store);
                    POValueOutputTez valueOutput = new POValueOutputTez(new OperatorKey(scope,nig.getNextNodeId(scope)));
                    oper.plan.addAsLeaf(valueOutput);
                    oper.setSplitter(true);

                    // Create a splittee of store only
                    TezOperator storeOnlyTezOperator = getTezOp();
View Full Code Here

                    return;
                }
            }

            // Need to add POValueOutputTez to the end of the last tezOp
            POValueOutputTez output = new POValueOutputTez(OperatorKey.genOpKey(scope));
            output.copyAliasFrom(op);
            curTezOp.plan.addAsLeaf(output);
            TezOperator prevOp = curTezOp;

            // Mark the start of a new TezOperator which will do the actual limiting with 1 task.
            blocking();

            // Explicitly set the parallelism for the new vertex to 1.
            curTezOp.setRequestedParallelism(1);
            curTezOp.setDontEstimateParallelism(true);

            output.addOutputKey(curTezOp.getOperatorKey().toString());
            // LIMIT does not make any ordering guarantees and this is unsorted shuffle.
            TezEdgeDescriptor edge = curTezOp.inEdges.get(prevOp.getOperatorKey());
            TezCompilerUtil.configureValueOnlyTupleOutput(edge, DataMovementType.SCATTER_GATHER);

            // Limit after order by with scalar expression
            if (limitAfterSort) {
                curTezOp.markLimitAfterSort();
                output.setTaskIndexWithRecordIndexAsKey(true);
                // POValueOutputTez will write key (task index, record index) in
                // sorted order. So using UnorderedKVOutput instead of OrderedPartitionedKVOutput.
                // But input needs to be merged in sorter order and requires OrderedGroupedKVInput
                edge.outputClassName = UnorderedKVOutput.class.getName();
                edge.inputClassName = OrderedGroupedKVInput.class.getName();
View Full Code Here

            // it will reflect for joinJobs[0] so that 1-1 edge will work.
            joinJobs[0].setRequestedParallelismByReference(prevOp);

            TezCompilerUtil.connect(tezPlan, prevOp, sampleJobPair.first);

            POValueOutputTez sampleOut = (POValueOutputTez) sampleJobPair.first.plan.getLeaves().get(0);
            for (int i = 0; i < 2; i++) {
                joinJobs[i].setSampleOperator(sampleJobPair.first);

                // Configure broadcast edges for distribution map
                edge = TezCompilerUtil.connect(tezPlan, sampleJobPair.first, joinJobs[i]);
                TezCompilerUtil.configureValueOnlyTupleOutput(edge, DataMovementType.BROADCAST);
                sampleOut.addOutputKey(joinJobs[i].getOperatorKey().toString());

                // Configure skewed partitioner for join
                edge = joinJobs[2].inEdges.get(joinJobs[i].getOperatorKey());
                edge.partitionerClass = SkewedPartitionerTez.class;
            }
View Full Code Here

        POForEach nfe3 = new POForEach(new OperatorKey(scope,nig.getNextNodeId(scope)), -1, ep4s, flattened3);

        oper.plan.add(nfe3);
        oper.plan.connect(nfe2, nfe3);

        POValueOutputTez sampleOut = new POValueOutputTez(OperatorKey.genOpKey(scope));
        oper.plan.add(sampleOut);
        oper.plan.connect(nfe3, sampleOut);
        oper.setClosed(true);

        oper.setRequestedParallelism(1);
View Full Code Here

            lr.setOutputKey(sortOpers[0].getOperatorKey().toString());
            lrSample.setOutputKey(quantJobParallelismPair.first.getOperatorKey().toString());

            edge = TezCompilerUtil.connect(tezPlan, quantJobParallelismPair.first, sortOpers[0]);
            TezCompilerUtil.configureValueOnlyTupleOutput(edge, DataMovementType.BROADCAST);
            POValueOutputTez sampleOut = (POValueOutputTez)quantJobParallelismPair.first.plan.getLeaves().get(0);
            sampleOut.addOutputKey(sortOpers[0].getOperatorKey().toString());
            sortOpers[0].setSampleOperator(quantJobParallelismPair.first);

            edge = TezCompilerUtil.connect(tezPlan, sortOpers[0], sortOpers[1]);
            edge.partitionerClass = WeightedRangePartitionerTez.class;

            curTezOp = sortOpers[1];

            // TODO: Review sort udf
//            if(op.isUDFComparatorUsed){
//                curTezOp.UDFs.add(op.getMSortFunc().getFuncSpec().toString());
//                curTezOp.isUDFComparatorUsed = true;
//            }
            quantJobParallelismPair.first.setSortOperator(sortOpers[1]);

            // If Order by followed by Limit and parallelism of order by is not 1
            // add a new vertex for Limit with parallelism 1.
            // Equivalent of LimitAdjuster.java in MR
            if (op.isLimited() && rp != 1) {
                POValueOutputTez output = new POValueOutputTez(OperatorKey.genOpKey(scope));
                output.copyAliasFrom(op);
                sortOpers[1].plan.addAsLeaf(output);

                TezOperator limitOper = getTezOp();
                tezPlan.add(limitOper);
                curTezOp = limitOper;

                // Explicitly set the parallelism for the new vertex to 1.
                limitOper.setRequestedParallelism(1);
                limitOper.setDontEstimateParallelism(true);
                limitOper.markLimitAfterSort();

                edge = TezCompilerUtil.connect(tezPlan, sortOpers[1], limitOper);
                // LIMIT in this case should be ordered. So we output unordered with key as task index
                // and on the input we use OrderedGroupedKVInput to do ordered merge to retain sorted order.
                output.addOutputKey(limitOper.getOperatorKey().toString());
                output.setTaskIndexWithRecordIndexAsKey(true);
                edge = curTezOp.inEdges.get(sortOpers[1].getOperatorKey());
                TezCompilerUtil.configureValueOnlyTupleOutput(edge, DataMovementType.SCATTER_GATHER);
                // POValueOutputTez will write key (task index, record index) in
                // sorted order. So using UnorderedKVOutput instead of OrderedPartitionedKVOutput.
                // But input needs to be merged in sorter order and requires OrderedGroupedKVInput
View Full Code Here

    @Override
    public void visitSplit(POSplit op) throws VisitorException {
        try {
            TezOperator splitOp = curTezOp;
            POValueOutputTez output = null;
            if (splitsSeen.containsKey(op.getOperatorKey())) {
                splitOp = splitsSeen.get(op.getOperatorKey());
                output = (POValueOutputTez)splitOp.plan.getLeaves().get(0);
            } else {
                splitsSeen.put(op.getOperatorKey(), splitOp);
                splitOp.setSplitter(true);
                phyToTezOpMap.put(op, splitOp);
                output = new POValueOutputTez(OperatorKey.genOpKey(scope));
                output.copyAliasFrom(op);
                splitOp.plan.addAsLeaf(output);
            }
            curTezOp = getTezOp();
            curTezOp.setSplitParent(splitOp.getOperatorKey());
            tezPlan.add(curTezOp);
            output.addOutputKey(curTezOp.getOperatorKey().toString());
            TezEdgeDescriptor edge = TezCompilerUtil.connect(tezPlan, splitOp, curTezOp);
            //TODO shared edge once support is available in Tez
            TezCompilerUtil.configureValueOnlyTupleOutput(edge, DataMovementType.ONE_TO_ONE);
            curTezOp.setRequestedParallelismByReference(splitOp);
            POValueInputTez input = new POValueInputTez(OperatorKey.genOpKey(scope));
View Full Code Here

                // Some predecessors of union need not be part of the union (For eg: replicated join).
                // So mark predecessors that are input to the union operation.
                unionTezOp.addUnionPredecessor(prevTezOp.getOperatorKey());
                TezEdgeDescriptor edge = TezCompilerUtil.connect(tezPlan, prevTezOp, unionTezOp);
                TezCompilerUtil.configureValueOnlyTupleOutput(edge, DataMovementType.SCATTER_GATHER);
                outputs[i] = new POValueOutputTez(OperatorKey.genOpKey(scope));
                outputs[i].addOutputKey(unionTezOp.getOperatorKey().toString());
                unionInput.addInputKey(prevTezOp.getOperatorKey().toString());
                prevTezOp.plan.addAsLeaf(outputs[i]);
                prevTezOp.setClosed(true);
                if (prevTezOp.isUseMRMapSettings()) {
View Full Code Here

    public static PhysicalPlan getUnionPredPlanFromSplit(PhysicalPlan plan, String unionOpKey) throws VisitorException {
        List<POSplit> splits = PlanHelper.getPhysicalOperators(plan, POSplit.class);
        for (POSplit split : splits) {
            for (PhysicalPlan subPlan : split.getPlans()) {
                if (subPlan.getLeaves().get(0) instanceof POValueOutputTez) {
                    POValueOutputTez out = (POValueOutputTez) subPlan.getLeaves().get(0);
                    if (out.containsOutputKey(unionOpKey)) {
                        return subPlan;
                    }
                }
            }
        }
View Full Code Here

                addSubPlanPropertiesToParent(tezOp, singleSplitee);

                removeSplittee(getPlan(), tezOp, singleSplitee);
            } else {
                POValueOutputTez valueOutput = (POValueOutputTez)tezOp.plan.getLeaves().get(0);
                POSplit split = new POSplit(OperatorKey.genOpKey(valueOutput.getOperatorKey().getScope()));
                split.copyAliasFrom(valueOutput);
                for (TezOperator splitee : splittees) {
                    PhysicalOperator spliteeRoot =  splitee.plan.getRoots().get(0);
                    splitee.plan.remove(spliteeRoot);
                    split.addPlan(splitee.plan);

                    addSubPlanPropertiesToParent(tezOp, splitee);

                    removeSplittee(getPlan(), tezOp, splitee);
                    valueOutput.removeOutputKey(splitee.getOperatorKey().toString());
                }
                if (valueOutput.getTezOutputs().length > 0) {
                    // We still need valueOutput
                    PhysicalPlan phyPlan = new PhysicalPlan();
                    phyPlan.addAsLeaf(valueOutput);
                    split.addPlan(phyPlan);
                }
View Full Code Here

TOP

Related Classes of org.apache.pig.backend.hadoop.executionengine.tez.plan.operator.POValueOutputTez$EmptyWritable

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.