Package org.apache.pig.backend.hadoop.executionengine.tez.plan

Examples of org.apache.pig.backend.hadoop.executionengine.tez.plan.TezOperPlan


                        InputSizeReducerEstimator.DEFAULT_BYTES_PER_REDUCER));
    }

    @Override
    public void visitTezOp(TezOperator tezOp) throws VisitorException {
        TezOperPlan tezPlan = getPlan();
        List<TezOperator> predecessors = tezPlan.getPredecessors(tezOp);

        // Construct vertex for the current Tez operator
        Vertex to = null;
        try {
            if (!tezOp.isVertexGroup()) {
View Full Code Here


        tezStats.initialize(tezPlanContainer);
        tezScriptState.emitInitialPlanNotification(tezPlanContainer);
        tezScriptState.emitLaunchStartedNotification(tezPlanContainer.size()); //number of DAGs to Launch

        TezPlanContainerNode tezPlanContainerNode;
        TezOperPlan tezPlan;
        int processedDAGs = 0;
        while ((tezPlanContainerNode = tezPlanContainer.getNextPlan(processedPlans)) != null) {
            tezPlan = tezPlanContainerNode.getTezOperPlan();
            processLoadAndParallelism(tezPlan, pc);
            processedPlans.add(tezPlan);
            ProgressReporter reporter = new ProgressReporter(tezPlanContainer.size(), processedDAGs);
            if (tezPlan.size()==1 && tezPlan.getRoots().get(0) instanceof NativeTezOper) {
                // Native Tez Plan
                NativeTezOper nativeOper = (NativeTezOper)tezPlan.getRoots().get(0);
                tezScriptState.emitJobsSubmittedNotification(1);
                nativeOper.runJob(tezPlanContainerNode.getOperatorKey().toString());
            } else {
                TezPOPackageAnnotator pkgAnnotator = new TezPOPackageAnnotator(tezPlan);
                pkgAnnotator.visit();

                runningJob = jc.compile(tezPlanContainerNode, tezPlanContainer);
                //TODO: Exclude vertex groups from numVerticesToLaunch ??
                tezScriptState.dagLaunchNotification(runningJob.getName(), tezPlan, tezPlan.size());
                runningJob.setPigStats(tezStats);

                // Set the thread UDFContext so registered classes are available.
                final UDFContext udfContext = UDFContext.getUDFContext();
                Thread task = new Thread(runningJob) {
View Full Code Here

        TezCompiler comp = new TezCompiler(php, pc);
        comp.compile();
        TezPlanContainer planContainer = comp.getPlanContainer();
        for (Map.Entry<OperatorKey, TezPlanContainerNode> entry : planContainer
                .getKeys().entrySet()) {
            TezOperPlan tezPlan = entry.getValue().getTezOperPlan();
            optimize(tezPlan, pc);
        }
        return planContainer;
    }
View Full Code Here

    private TezJob getJob(TezPlanContainerNode tezPlanNode, TezPlanContainer planContainer)
            throws JobCreationException {
        try {
            Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
            localResources.putAll(planContainer.getLocalResources());
            TezOperPlan tezPlan = tezPlanNode.getTezOperPlan();
            localResources.putAll(tezPlan.getExtraResources());
            String shipFiles = pigContext.getProperties().getProperty("pig.streaming.ship.files");
            if (shipFiles != null) {
                for (String file : shipFiles.split(",")) {
                    TezResourceManager.getInstance().addTezResource(new File(file).toURI());
                }
            }
            String cacheFiles = pigContext.getProperties().getProperty("pig.streaming.cache.files");
            if (cacheFiles != null) {
                for (String file : cacheFiles.split(",")) {
                    // Do new URI() before passing to Path constructor else it encodes # when there is symlink
                    TezResourceManager.getInstance().addTezResource(new Path(new URI(file.trim())).toUri());
                }
            }
            for (Map.Entry<String, LocalResource> entry : localResources.entrySet()) {
                log.info("Local resource: " + entry.getKey());
            }
            DAG tezDag = buildDAG(tezPlanNode, localResources);
            return new TezJob(tezConf, tezDag, localResources, tezPlan.getEstimatedTotalParallelism());
        } catch (Exception e) {
            int errCode = 2017;
            String msg = "Internal error creating job configuration.";
            throw new JobCreationException(msg, errCode, PigException.BUG, e);
        }
View Full Code Here

        }

        TezOperator unionOp = tezOp;
        String unionOpKey = unionOp.getOperatorKey().toString();
        String scope = unionOp.getOperatorKey().scope;
        TezOperPlan tezPlan = getPlan();

        //TODO: PIG-3856 Handle replicated join. Replicate join input that was broadcast to union vertex
        // now needs to be broadcast to all the union predecessors. How do we do that??
        // Wait for shared edge and do it or write multiple times??
        // For now don't optimize
        // Create a copy as disconnect while iterating modifies the original list
        List<TezOperator> predecessors = new ArrayList<TezOperator>(tezPlan.getPredecessors(unionOp));
        if (predecessors.size() > unionOp.getVertexGroupMembers().size()) {
            return;
        }

        PhysicalPlan unionOpPlan = unionOp.plan;

        // Union followed by Split followed by Store could have multiple stores
        List<POStoreTez> unionStoreOutputs = PlanHelper.getPhysicalOperators(unionOpPlan, POStoreTez.class);
        TezOperator[] storeVertexGroupOps = new TezOperator[unionStoreOutputs.size()];
        for (int i=0; i < storeVertexGroupOps.length; i++) {
            storeVertexGroupOps[i] = new TezOperator(OperatorKey.genOpKey(scope));
            storeVertexGroupOps[i].setVertexGroupInfo(new VertexGroupInfo(unionStoreOutputs.get(i)));
            storeVertexGroupOps[i].setVertexGroupMembers(unionOp.getVertexGroupMembers());
            tezPlan.add(storeVertexGroupOps[i]);
        }

        // Case of split, orderby, skewed join, rank, etc will have multiple outputs
        List<TezOutput> unionOutputs = PlanHelper.getPhysicalOperators(unionOpPlan, TezOutput.class);
        // One TezOutput can write to multiple LogicalOutputs (POCounterTez, POValueOutputTez, etc)
        List<String> unionOutputKeys = new ArrayList<String>();
        for (TezOutput output : unionOutputs) {
            if (output instanceof POStoreTez) {
                continue;
            }
            for (String key : output.getTezOutputs()) {
                unionOutputKeys.add(key);
            }
        }

        // Create vertex group operator for each output
        TezOperator[] outputVertexGroupOps = new TezOperator[unionOutputKeys.size()];
        String[] newOutputKeys = new String[unionOutputKeys.size()];
        for (int i=0; i < outputVertexGroupOps.length; i++) {
            outputVertexGroupOps[i] = new TezOperator(OperatorKey.genOpKey(scope));
            outputVertexGroupOps[i].setVertexGroupInfo(new VertexGroupInfo());
            outputVertexGroupOps[i].getVertexGroupInfo().setOutput(unionOutputKeys.get(i));
            outputVertexGroupOps[i].setVertexGroupMembers(unionOp.getVertexGroupMembers());
            newOutputKeys[i] = outputVertexGroupOps[i].getOperatorKey().toString();
            tezPlan.add(outputVertexGroupOps[i]);
        }

        try {

             // Clone plan of union and merge it into the predecessor operators
             // Remove POShuffledValueInputTez from union plan root
            unionOpPlan.remove(unionOpPlan.getRoots().get(0));
            for (OperatorKey predKey : unionOp.getVertexGroupMembers()) {
                TezOperator pred = tezPlan.getOperator(predKey);
                PhysicalPlan predPlan = pred.plan;
                PhysicalOperator predLeaf = predPlan.getLeaves().get(0);
                // if predLeaf not POValueOutputTez
                if (predLeaf instanceof POSplit) {
                    // Find the subPlan that connects to the union operator
                    predPlan = getUnionPredPlanFromSplit(predPlan, unionOpKey);
                    predLeaf = predPlan.getLeaves().get(0);
                }

                PhysicalPlan clonePlan = unionOpPlan.clone();
                //Clone changes the operator keys
                List<POStoreTez> clonedUnionStoreOutputs = PlanHelper.getPhysicalOperators(clonePlan, POStoreTez.class);

                // Remove POValueOutputTez from predecessor leaf
                predPlan.remove(predLeaf);
                boolean isEmptyPlan = predPlan.isEmpty();
                if (!isEmptyPlan) {
                    predLeaf = predPlan.getLeaves().get(0);
                }
                predPlan.merge(clonePlan);
                if (!isEmptyPlan) {
                    predPlan.connect(predLeaf, clonePlan.getRoots().get(0));
                }

                // Connect predecessor to the storeVertexGroups
                int i = 0;
                for (TezOperator storeVertexGroup : storeVertexGroupOps) {
                    storeVertexGroup.getVertexGroupInfo().addInput(pred.getOperatorKey());
                    //Set the output key of cloned POStore to that of the initial union POStore.
                    clonedUnionStoreOutputs.get(i).setOutputKey(
                            storeVertexGroup.getVertexGroupInfo().getStore()
                                    .getOperatorKey().toString());
                    pred.addVertexGroupStore(clonedUnionStoreOutputs.get(i++).getOperatorKey(),
                            storeVertexGroup.getOperatorKey());
                    tezPlan.connect(pred, storeVertexGroup);
                }

                for (TezOperator outputVertexGroup : outputVertexGroupOps) {
                    outputVertexGroup.getVertexGroupInfo().addInput(pred.getOperatorKey());
                    tezPlan.connect(pred, outputVertexGroup);
                }

                copyOperatorProperties(pred, unionOp);
                tezPlan.disconnect(pred, unionOp);
            }

            List<TezOperator> successors = tezPlan.getSuccessors(unionOp);
            List<TezOutput> valueOnlyOutputs = new ArrayList<TezOutput>();
            for (TezOutput tezOutput : unionOutputs) {
                if (tezOutput instanceof POValueOutputTez) {
                    valueOnlyOutputs.add(tezOutput);
                }
            }
            // Connect to outputVertexGroupOps
            // Copy output edges of union -> successor to predecessor->successor, vertexgroup -> successor
            // and connect vertexgroup -> successor in the plan.
            for (Entry<OperatorKey, TezEdgeDescriptor> entry : unionOp.outEdges.entrySet()) {
                TezOperator succOp = tezPlan.getOperator(entry.getKey());
                // Case of union followed by union.
                // unionOp.outEdges will not point to vertex group, but to its output.
                // So find the vertex group if there is one.
                TezOperator succOpVertexGroup = null;
                for (TezOperator succ : successors) {
                    if (succ.isVertexGroup()
                            && succ.getVertexGroupInfo().getOutput()
                                    .equals(succOp.getOperatorKey().toString())) {
                        succOpVertexGroup = succ;
                        break;
                    }
                }
                TezEdgeDescriptor edge = entry.getValue();
                // Edge cannot be one to one as it will get input from two or
                // more union predecessors. Change it to SCATTER_GATHER
                if (edge.dataMovementType == DataMovementType.ONE_TO_ONE) {
                    edge.dataMovementType = DataMovementType.SCATTER_GATHER;
                    edge.partitionerClass = RoundRobinPartitioner.class;
                    edge.outputClassName = UnorderedPartitionedKVOutput.class.getName();
                    edge.inputClassName = UnorderedKVInput.class.getName();
                }
                TezOperator vertexGroupOp = outputVertexGroupOps[unionOutputKeys.indexOf(entry.getKey().toString())];
                for (OperatorKey predKey : vertexGroupOp.getVertexGroupMembers()) {
                    TezOperator pred = tezPlan.getOperator(predKey);
                    // Keep the output edge directly to successor
                    // Don't need to keep output edge for vertexgroup
                    pred.outEdges.put(entry.getKey(), edge);
                    succOp.inEdges.put(predKey, edge);
                    if (succOpVertexGroup != null) {
                        succOpVertexGroup.getVertexGroupMembers().add(predKey);
                        succOpVertexGroup.getVertexGroupInfo().addInput(predKey);
                        // Connect directly to the successor vertex group
                        tezPlan.disconnect(pred, vertexGroupOp);
                        tezPlan.connect(pred, succOpVertexGroup);
                    }
                }
                if (succOpVertexGroup != null) {
                    succOpVertexGroup.getVertexGroupMembers().remove(unionOp.getOperatorKey());
                    succOpVertexGroup.getVertexGroupInfo().removeInput(unionOp.getOperatorKey());
                    //Discard the new vertex group created
                    tezPlan.remove(vertexGroupOp);
                } else {
                    tezPlan.connect(vertexGroupOp, succOp);
                }
            }
        } catch (Exception e) {
            throw new VisitorException(e);
        }

        List<TezOperator> succs = tezPlan.getSuccessors(unionOp);
        // Create a copy as disconnect while iterating modifies the original list
        List<TezOperator> successors = succs == null ? null : new ArrayList<TezOperator>(succs);
        if (successors != null) {
            // Successor inputs should now point to the vertex groups.
            for (TezOperator succ : successors) {
                LinkedList<TezInput> inputs = PlanHelper.getPhysicalOperators(succ.plan, TezInput.class);
                for (TezInput input : inputs) {
                    for (String key : input.getTezInputs()) {
                        if (key.equals(unionOpKey)) {
                            input.replaceInput(key,
                                    newOutputKeys[unionOutputKeys.indexOf(succ.getOperatorKey().toString())]);
                        }
                    }
                }
                tezPlan.disconnect(unionOp, succ);
            }
        }

        //Remove union operator from the plan
        tezPlan.remove(unionOp);

    }
View Full Code Here

        TestJobSubmission.oneTimeSetUp();
    }

    @Override
    public void checkJobControlCompilerErrResult(PhysicalPlan pp, PigContext pc) throws Exception {
        TezOperPlan tezPlan = buildTezPlan(pp, pc);

        LoaderProcessor loaderStorer = new LoaderProcessor(tezPlan, pc);
        loaderStorer.visit();

        ParallelismSetter parallelismSetter = new ParallelismSetter(tezPlan, pc);
View Full Code Here

        }
    }

    @Override
    public void checkDefaultParallelResult(PhysicalPlan pp, PigContext pc) throws Exception {
        TezOperPlan tezPlan = buildTezPlan(pp, pc);

        LoaderProcessor loaderStorer = new LoaderProcessor(tezPlan, pc);
        loaderStorer.visit();

        ParallelismSetter parallelismSetter = new ParallelismSetter(tezPlan, pc);
View Full Code Here

        assertEquals(ts.getParallelism(), 1);
    }

    @Override
    public void checkGroupConstWithParallelResult(PhysicalPlan pp, PigContext pc) throws Exception {
        TezOperPlan tezPlan = buildTezPlan(pp, pc);

        LoaderProcessor loaderStorer = new LoaderProcessor(tezPlan, pc);
        loaderStorer.visit();

        ParallelismSetter parallelismSetter = new ParallelismSetter(tezPlan, pc);
View Full Code Here

        }
    }

    @Override
    public void checkGroupNonConstWithParallelResult(PhysicalPlan pp, PigContext pc) throws Exception {
        TezOperPlan tezPlan = buildTezPlan(pp, pc);

        LoaderProcessor loaderStorer = new LoaderProcessor(tezPlan, pc);
        loaderStorer.visit();

        ParallelismSetter parallelismSetter = new ParallelismSetter(tezPlan, pc);
View Full Code Here

    @Override
    public SecondaryKeyOptimizer visitSecondaryKeyOptimizer(String query)
            throws Exception, VisitorException {
        PhysicalPlan pp = Util.buildPp(pigServer, query);
        TezCompiler comp = new TezCompiler(pp, pc);
        TezOperPlan tezPlan = comp.compile();
        boolean nocombiner = Boolean.parseBoolean(pc.getProperties().getProperty(
                PigConfiguration.PROP_NO_COMBINER, "false"));

        // Run CombinerOptimizer on Tez plan
        if (!nocombiner) {
View Full Code Here

TOP

Related Classes of org.apache.pig.backend.hadoop.executionengine.tez.plan.TezOperPlan

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.