Package org.apache.pig.backend.hadoop.executionengine.tez.plan.operator

Examples of org.apache.pig.backend.hadoop.executionengine.tez.plan.operator.POIdentityInOutTez


                }
            }

            setIntermediateOutputKeyValue(pack.getPkgr().getKeyType(), payloadConf, tezOp);
        } else if (roots.size() == 1 && roots.get(0) instanceof POIdentityInOutTez) {
            POIdentityInOutTez identityInOut = (POIdentityInOutTez) roots.get(0);
            // TODO Need to fix multiple input key mapping
            TezOperator identityInOutPred = null;
            for (TezOperator pred : mPlan.getPredecessors(tezOp)) {
                if (!pred.isSampleAggregation()) {
                    identityInOutPred = pred;
                    break;
                }
            }
            identityInOut.setInputKey(identityInOutPred.getOperatorKey().toString());
        } else if (roots.size() == 1 && roots.get(0) instanceof POValueInputTez) {
            POValueInputTez valueInput = (POValueInputTez) roots.get(0);

            LinkedList<String> scalarInputs = new LinkedList<String>();
            for (POUserFunc userFunc : PlanHelper.getPhysicalOperators(tezOp.plan, POUserFunc.class) ) {
View Full Code Here


            lrTez.setKeyType(type);
            lrTez.setPlans(groups);
            lrTez.setSkewedJoin(true);
            lrTez.setResultType(DataType.TUPLE);

            POIdentityInOutTez identityInOutTez = new POIdentityInOutTez(
                    OperatorKey.genOpKey(scope), lrTez);
            identityInOutTez.setInputKey(prevOp.getOperatorKey().toString());
            joinJobs[0].plan.addAsLeaf(identityInOutTez);
            joinJobs[0].setClosed(true);
            joinJobs[0].markSampleBasedPartitioner();
            rearrangeOutputs[0] = joinJobs[0];

            compiledInputs = new TezOperator[] {joinInputs[1]};

            // Run POPartitionRearrange for second join table. Note we set the
            // parallelism of POPartitionRearrange to -1, so its parallelism
            // will be determined by the size of streaming table.
            POPartitionRearrangeTez pr =
                    new POPartitionRearrangeTez(OperatorKey.genOpKey(scope));
            try {
                pr.setIndex(1);
            } catch (ExecException e) {
                int errCode = 2058;
                String msg = "Unable to set index on newly created POPartitionRearrange.";
                throw new PlanException(msg, errCode, PigException.BUG, e);
            }

            groups = joinPlans.get(l.get(1));
            pr.setPlans(groups);
            pr.setKeyType(type);
            pr.setSkewedJoin(true);
            pr.setResultType(DataType.TUPLE);
            joinJobs[1].plan.addAsLeaf(pr);
            joinJobs[1].setClosed(true);
            rearrangeOutputs[1] = joinJobs[1];

            compiledInputs = rearrangeOutputs;

            // Create POGlobalRearrange
            POGlobalRearrange gr =
                    new POGlobalRearrange(OperatorKey.genOpKey(scope), rp);
            // Skewed join has its own special partitioner
            gr.setResultType(DataType.TUPLE);
            gr.visit(this);
            joinJobs[2] = curTezOp;
            joinJobs[2].setRequestedParallelism(rp);

            compiledInputs = new TezOperator[] {joinJobs[2]};

            // Create POPakcage
            POPackage pkg = getPackage(2, type);
            pkg.setResultType(DataType.TUPLE);
            boolean [] inner = op.getInnerFlags();
            pkg.getPkgr().setInner(inner);
            pkg.visit(this);

            compiledInputs = new TezOperator[] {curTezOp};

            // Create POForEach
            List<PhysicalPlan> eps = new ArrayList<PhysicalPlan>();
            List<Boolean> flat = new ArrayList<Boolean>();

            // Add corresponding POProjects
            for (int i=0; i < 2; i++) {
                ep = new PhysicalPlan();
                POProject prj = new POProject(OperatorKey.genOpKey(scope));
                prj.setColumn(i+1);
                prj.setOverloaded(false);
                prj.setResultType(DataType.BAG);
                ep.add(prj);
                eps.add(ep);
                if (!inner[i]) {
                    // Add an empty bag for outer join
                    CompilerUtils.addEmptyBagOuterJoin(ep, op.getSchema(i));
                }
                flat.add(true);
            }

            POForEach fe =
                    new POForEach(OperatorKey.genOpKey(scope), -1, eps, flat);
            fe.setResultType(DataType.TUPLE);
            fe.visit(this);

            // Connect vertices
            lrTez.setOutputKey(joinJobs[0].getOperatorKey().toString());
            lrTezSample.setOutputKey(sampleJobPair.first.getOperatorKey().toString());
            identityInOutTez.setOutputKey(joinJobs[2].getOperatorKey().toString());
            pr.setOutputKey(joinJobs[2].getOperatorKey().toString());

            TezEdgeDescriptor edge = joinJobs[0].inEdges.get(prevOp.getOperatorKey());
            joinJobs[0].setUseMRMapSettings(prevOp.isUseMRMapSettings());
            // TODO: Convert to unsorted shuffle after TEZ-661
View Full Code Here

        TezOperator[] opers = new TezOperator[2];
        TezOperator oper1 = getTezOp();
        tezPlan.add(oper1);
        opers[0] = oper1;

        POIdentityInOutTez identityInOutTez = new POIdentityInOutTez(
                OperatorKey.genOpKey(scope),
                inputOperRearrange);
        identityInOutTez.setInputKey(inputOper.getOperatorKey().toString());
        oper1.plan.addAsLeaf(identityInOutTez);
        oper1.setClosed(true);
        oper1.markSampleBasedPartitioner();

        TezOperator oper2 = getTezOp();
        oper2.markGlobalSort();
        opers[1] = oper2;
        tezPlan.add(oper2);

        long limit = sort.getLimit();

        boolean[] sortOrder;

        List<Boolean> sortOrderList = sort.getMAscCols();
        if(sortOrderList != null) {
            sortOrder = new boolean[sortOrderList.size()];
            for(int i = 0; i < sortOrderList.size(); ++i) {
                sortOrder[i] = sortOrderList.get(i);
            }
            oper2.setSortOrder(sortOrder);
        }

        identityInOutTez.setOutputKey(oper2.getOperatorKey().toString());

        if (limit!=-1) {
            POPackage pkg_c = new POPackage(OperatorKey.genOpKey(scope));
            pkg_c.setPkgr(new LitePackager());
            pkg_c.getPkgr().setKeyType((fields == null || fields.length > 1) ? DataType.TUPLE : keyType);
View Full Code Here

TOP

Related Classes of org.apache.pig.backend.hadoop.executionengine.tez.plan.operator.POIdentityInOutTez

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.