Package org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators

Examples of org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POForEach


            flat1.add(true);
          }
        }
       
        // This foreach will pick the sort key columns from the RandomSampleLoader output
        POForEach nfe1 = new POForEach(new OperatorKey(scope,nig.getNextNodeId(scope)),-1,eps1,flat1);
        mro.mapPlan.addAsLeaf(nfe1);
       
        // Now set up a POLocalRearrange which has "all" as the key and the output of the
        // foreach will be the "value" out of POLocalRearrange
        PhysicalPlan ep1 = new PhysicalPlan();
        ConstantExpression ce = new ConstantExpression(new OperatorKey(scope,nig.getNextNodeId(scope)));
        ce.setValue("all");
        ce.setResultType(DataType.CHARARRAY);
        ep1.add(ce);
       
        List<PhysicalPlan> eps = new ArrayList<PhysicalPlan>();
        eps.add(ep1);
       
        POLocalRearrange lr = new POLocalRearrange(new OperatorKey(scope,nig.getNextNodeId(scope)));
        try {
            lr.setIndex(0);
        } catch (ExecException e) {
          int errCode = 2058;
          String msg = "Unable to set index on newly created POLocalRearrange.";
            throw new PlanException(msg, errCode, PigException.BUG, e);
        }
        lr.setKeyType(DataType.CHARARRAY);
        lr.setPlans(eps);
        lr.setResultType(DataType.TUPLE);
        mro.mapPlan.add(lr);
        mro.mapPlan.connect(nfe1, lr);
       
        mro.setMapDone(true);
       
        POPackage pkg = new POPackage(new OperatorKey(scope,nig.getNextNodeId(scope)));
        pkg.setKeyType(DataType.CHARARRAY);
        pkg.setNumInps(1);
        boolean[] inner = {false};
        pkg.setInner(inner);
        mro.reducePlan.add(pkg);
       
        // Lets start building the plan which will have the sort
        // for the foreach
        PhysicalPlan fe2Plan = new PhysicalPlan();
        // Top level project which just projects the tuple which is coming
        // from the foreach after the package
        POProject topPrj = new POProject(new OperatorKey(scope,nig.getNextNodeId(scope)));
        topPrj.setColumn(1);
        topPrj.setResultType(DataType.TUPLE);
        topPrj.setOverloaded(true);
        fe2Plan.add(topPrj);
       
        // the projections which will form sort plans
        List<PhysicalPlan> nesSortPlanLst = new ArrayList<PhysicalPlan>();            
        if (sortKeyPlans != null) {
          for(int i=0; i<sortKeyPlans.size(); i++) {         
            nesSortPlanLst.add(sortKeyPlans.get(i));         
          }
        }else{  
          Pair<Integer,Byte>[] fields = null;
            try{
              fields = getSortCols(sort.getSortPlans());
            }catch(Exception e) {
              throw new RuntimeException(e);
            }
            // Set up the projections of the key columns
            if (fields == null) {
                PhysicalPlan ep = new PhysicalPlan();
                POProject prj = new POProject(new OperatorKey(scope,
                    nig.getNextNodeId(scope)));
                prj.setStar(true);
                prj.setOverloaded(false);
                prj.setResultType(DataType.TUPLE);
                ep.add(prj);
                nesSortPlanLst.add(ep);
            } else {
                for (int i=0; i<fields.length; i++) {
                    PhysicalPlan ep = new PhysicalPlan();
                    POProject prj = new POProject(new OperatorKey(scope,nig.getNextNodeId(scope)));
                    prj.setColumn(i);
                    prj.setOverloaded(false);
                    prj.setResultType(fields[i].second);
                    ep.add(prj);
                    nesSortPlanLst.add(ep);
                }
            }                      
        }
       
        sort.setSortPlans(nesSortPlanLst);
        sort.setResultType(DataType.BAG);
        fe2Plan.add(sort);
        fe2Plan.connect(topPrj, sort);
       
        // The plan which will have a constant representing the
        // degree of parallelism for the final order by map-reduce job
        // this will either come from a "order by parallel x" in the script
        // or will be the default number of reducers for the cluster if
        // "parallel x" is not used in the script
        PhysicalPlan rpep = new PhysicalPlan();
        ConstantExpression rpce = new ConstantExpression(new OperatorKey(scope,nig.getNextNodeId(scope)));
        rpce.setRequestedParallelism(rp);
        int val = rp;
        if(val<=0){
            ExecutionEngine eng = pigContext.getExecutionEngine();
            if(pigContext.getExecType() != ExecType.LOCAL){
                try {
                    if(val<=0)
                        val = pigContext.defaultParallel;
                    if (val<=0)
                        val = ((HExecutionEngine)eng).getJobConf().getNumReduceTasks();
                    if (val<=0)
                        val = 1;
                } catch (Exception e) {
                    int errCode = 6015;
                    String msg = "Problem getting the default number of reduces from the Job Client.";
                    throw new MRCompilerException(msg, errCode, PigException.REMOTE_ENVIRONMENT, e);
                }
            } else {
              val = 1; // local mode, set it to 1
            }
        }
        int parallelismForSort = (rp <= 0 ? val : rp);
        rpce.setValue(parallelismForSort);
       
        rpce.setResultType(DataType.INTEGER);
        rpep.add(rpce);
       
        List<PhysicalPlan> genEps = new ArrayList<PhysicalPlan>();
        genEps.add(rpep);
        genEps.add(fe2Plan);
       
        List<Boolean> flattened2 = new ArrayList<Boolean>();
        flattened2.add(false);
        flattened2.add(false);
       
        POForEach nfe2 = new POForEach(new OperatorKey(scope,nig.getNextNodeId(scope)),-1, genEps, flattened2);
        mro.reducePlan.add(nfe2);
        mro.reducePlan.connect(pkg, nfe2);
       
        // Let's connect the output from the foreach containing
        // number of quantiles and the sorted bag of samples to
        // another foreach with the FindQuantiles udf. The input
        // to the FindQuantiles udf is a project(*) which takes the
        // foreach input and gives it to the udf
        PhysicalPlan ep4 = new PhysicalPlan();
        POProject prjStar4 = new POProject(new OperatorKey(scope,nig.getNextNodeId(scope)));
        prjStar4.setResultType(DataType.TUPLE);
        prjStar4.setStar(true);
        ep4.add(prjStar4);
       
        List<PhysicalOperator> ufInps = new ArrayList<PhysicalOperator>();
        ufInps.add(prjStar4);
     
        POUserFunc uf = new POUserFunc(new OperatorKey(scope,nig.getNextNodeId(scope)), -1, ufInps,
            new FuncSpec(udfClassName, udfArgs));
        ep4.add(uf);
        ep4.connect(prjStar4, uf);
       
        List<PhysicalPlan> ep4s = new ArrayList<PhysicalPlan>();
        ep4s.add(ep4);
        List<Boolean> flattened3 = new ArrayList<Boolean>();
        flattened3.add(false);
        POForEach nfe3 = new POForEach(new OperatorKey(scope,nig.getNextNodeId(scope)), -1, ep4s, flattened3);
       
        mro.reducePlan.add(nfe3);
        mro.reducePlan.connect(nfe2, nfe3);
       
        POStore str = getStore();
View Full Code Here


        boolean[] flatten = gen.getFlattenFlags();
        List<Boolean> flattenList = new ArrayList<Boolean>();
        for(boolean fl: flatten) {
            flattenList.add(fl);
        }
        POForEach poFE = new POForEach(new OperatorKey(scope, nodeGen
                .getNextNodeId(scope)), foreach.getRequestedParallelisam(), innerPlans, flattenList);
        poFE.setAlias(foreach.getAlias());
        poFE.setResultType(DataType.BAG);
        logToPhyMap.put(foreach, poFE);
        currentPlan.add(poFE);

        // generate cannot have multiple inputs
        List<Operator> op = foreach.getPlan().getPredecessors(foreach);
View Full Code Here

                        updateWithEmptyBagCheck(fep1, joinInput);
                    }
                    flattenLst.add(true);
                }
               
                POForEach fe = new POForEach(new OperatorKey(scope, nodeGen.getNextNodeId(scope)),
                        loj.getRequestedParallelisam(), fePlans, flattenLst );
                fe.setAlias(loj.getAlias());
                currentPlan.add(fe);
                currentPlan.connect(poPackage, fe);
                logToPhyMap.put(loj, fe);
            }catch (PlanException e1) {
                int errCode = 2015;
View Full Code Here

            op = sucs.get(0);
            boolean lastInputFlattened = true;
            boolean allSimple = true;
            if (op instanceof POForEach)
            {
                POForEach forEach = (POForEach)op;
                List<PhysicalPlan> planList = forEach.getInputPlans();
                List<Boolean> flatten = forEach.getToBeFlattened();
                POProject projOfLastInput = null;
                int i = 0;
                // check all nested foreach plans
                // 1. If it is simple projection
                // 2. If last input is all flattened
View Full Code Here

                feproj.setStar(true);
                feproj.setOverloaded(false);
                fep2.add(feproj);
                List<PhysicalPlan> fePlans = Arrays.asList(fep1, fep2);
               
                POForEach fe = new POForEach(new OperatorKey(scope, nodeGen.getNextNodeId(scope)), cross.getRequestedParallelisam(), fePlans, flattenLst );
                fe.setAlias(cross.getAlias());
                currentPlan.add(fe);
                currentPlan.connect(logToPhyMap.get(op), fe);
               
                POLocalRearrange physOp = new POLocalRearrange(new OperatorKey(
                        scope, nodeGen.getNextNodeId(scope)), cross
                        .getRequestedParallelisam());
                physOp.setAlias(cross.getAlias());
                List<PhysicalPlan> lrPlans = new ArrayList<PhysicalPlan>();
                for(int i=0;i<inputs.size();i++){
                    PhysicalPlan lrp1 = new PhysicalPlan();
                    POProject lrproj1 = new POProject(new OperatorKey(scope, nodeGen.getNextNodeId(scope)), cross.getRequestedParallelisam(), i);
                    lrproj1.setAlias(cross.getAlias());
                    lrproj1.setOverloaded(false);
                    lrproj1.setResultType(DataType.INTEGER);
                    lrp1.add(lrproj1);
                    lrPlans.add(lrp1);
                }
               
                physOp.setCross(true);
                physOp.setIndex(count++);
                physOp.setKeyType(DataType.TUPLE);
                physOp.setPlans(lrPlans);
                physOp.setResultType(DataType.TUPLE);
               
                currentPlan.add(physOp);
                currentPlan.connect(fe, physOp);
                currentPlan.connect(physOp, poGlobal);
            }
        } catch (PlanException e1) {
            int errCode = 2015;
            String msg = "Invalid physical operators in the physical plan" ;
            throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e1);
        } catch (ExecException e) {
            int errCode = 2058;
            String msg = "Unable to set index on newly create POLocalRearrange.";
            throw new VisitorException(msg, errCode, PigException.BUG, e);
        }
       
        poPackage.setKeyType(DataType.TUPLE);
        poPackage.setResultType(DataType.TUPLE);
        poPackage.setNumInps(count);
        boolean inner[] = new boolean[count];
        for (int i=0;i<count;i++) {
            inner[i] = true;
        }
        poPackage.setInner(inner);
       
        List<PhysicalPlan> fePlans = new ArrayList<PhysicalPlan>();
        List<Boolean> flattenLst = new ArrayList<Boolean>();
        for(int i=1;i<=count;i++){
            PhysicalPlan fep1 = new PhysicalPlan();
            POProject feproj1 = new POProject(new OperatorKey(scope, nodeGen.getNextNodeId(scope)), cross.getRequestedParallelisam(), i);
            feproj1.setAlias(cross.getAlias());
            feproj1.setResultType(DataType.BAG);
            feproj1.setOverloaded(false);
            fep1.add(feproj1);
            fePlans.add(fep1);
            flattenLst.add(true);
        }
       
        POForEach fe = new POForEach(new OperatorKey(scope, nodeGen.getNextNodeId(scope)), cross.getRequestedParallelisam(), fePlans, flattenLst );
        fe.setAlias(cross.getAlias());
        currentPlan.add(fe);
        try{
            currentPlan.connect(poPackage, fe);
        }catch (PlanException e1) {
            int errCode = 2015;
View Full Code Here

        boolean[] flatten = gen.getFlattenFlags();
        List<Boolean> flattenList = new ArrayList<Boolean>();
        for(boolean fl: flatten) {
            flattenList.add(fl);
        }
        POForEach poFE = new POForEach(new OperatorKey(scope, nodeGen
                .getNextNodeId(scope)), foreach.getRequestedParallelisam(), innerPlans, flattenList);
        poFE.setAlias(foreach.getAlias());
        poFE.setResultType(DataType.BAG);
        logToPhyMap.put(foreach, poFE);
        currentPlan.add(poFE);

        // generate cannot have multiple inputs
        List<Operator> op = foreach.getPlan().getPredecessors(foreach);
View Full Code Here

            }

            if(!usePOMergeJoin){
                // Now create and configure foreach which will flatten the output
                // of cogroup.
                POForEach fe = compileFE4Flattening(innerFlags,  scope, parallel, alias, inputs);
                currentPlan.add(fe);
                try {
                    currentPlan.connect(smj, fe);
                } catch (PlanException e) {
                    throw new LogicalToPhysicalTranslatorException(e.getMessage(),e.getErrorCode(),e.getErrorSource(),e);
                }
                logToPhyMap.put(loj, fe);               
            }
           
            return;
        }
        else if (loj.getJoinType() == LOJoin.JOINTYPE.HASH){
            POPackage poPackage = compileToLR_GR_PackTrio(loj, loj.getCustomPartitioner(), innerFlags, loj.getExpressionPlans());
            POForEach fe = compileFE4Flattening(innerFlags,  scope, parallel, alias, inputs);
            currentPlan.add(fe);
            try {
                currentPlan.connect(poPackage, fe);
            } catch (PlanException e) {
                throw new LogicalToPhysicalTranslatorException(e.getDetailedMessage(),
View Full Code Here

            int parallel, String alias, List<Operator> inputs)
                throws FrontendException {
       
        List<PhysicalPlan> fePlans = new ArrayList<PhysicalPlan>();
        List<Boolean> flattenLst = new ArrayList<Boolean>();
        POForEach fe;
        try{
            for(int i=0;i< inputs.size();i++){
                PhysicalPlan fep1 = new PhysicalPlan();
                POProject feproj1 = new POProject(new OperatorKey(scope, nodeGen.getNextNodeId(scope)),
                        parallel, i+1); //i+1 since the first column is the "group" field
                feproj1.setAlias(alias);
                feproj1.setResultType(DataType.BAG);
                feproj1.setOverloaded(false);
                fep1.add(feproj1);
                fePlans.add(fep1);
                // the parser would have marked the side
                // where we need to keep empty bags on
                // non matched as outer (innerFlags[i] would be
                // false)
                if(!(innerFlags[i])) {
                    Operator joinInput = inputs.get(i);
                    // for outer join add a bincond
                    // which will project nulls when bag is
                    // empty
                    updateWithEmptyBagCheck(fep1, joinInput);
                }
                flattenLst.add(true);
            }
           
            fe = new POForEach(new OperatorKey(scope, nodeGen.getNextNodeId(scope)),
                    parallel, fePlans, flattenLst );
            fe.setAlias(alias);

        }catch (PlanException e1) {
            int errCode = 2015;
            String msg = "Invalid physical operators in the physical plan" ;
            throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e1);
View Full Code Here

        prj1.setColumn(1);
        prj1.setOverloaded(true);
        ep1.add(prj1);
        eps1.add(ep1);
        flat1.add(true);
        POForEach fe = new POForEach(new OperatorKey(scope, nig
                .getNextNodeId(scope)), -1, eps1, flat1);
        fe.setResultType(DataType.BAG);
        return fe;
    }
View Full Code Here

            prj1.setColumn(0);
            prj1.setOverloaded(false);
            ep1.add(prj1);
            eps1.add(ep1);
            flat1.add(true);
            POForEach nfe1 = new POForEach(new OperatorKey(scope, nig
                    .getNextNodeId(scope)), op.getRequestedParallelism(), eps1,
                    flat1);
            nfe1.setResultType(DataType.BAG);
            curMROp.reducePlan.addAsLeaf(nfe1);
            curMROp.setNeedsDistinctCombiner(true);
            phyToMROpMap.put(op, curMROp);
        }catch(Exception e){
            int errCode = 2034;
View Full Code Here

TOP

Related Classes of org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POForEach

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.