Package org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators

Examples of org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.POProject


                new OperatorKey("", r.nextLong()), -1, null, new FuncSpec(funcName));
        POSort sort = new POSort(new OperatorKey("", r.nextLong()), -1, ldFil1.getLeaves(),
                null, new ArrayList<Boolean>(), comparator);
        sort.setRequestedParallelism(20);
        PhysicalPlan nesSortPlan = new PhysicalPlan();
        POProject topPrj = new POProject(new OperatorKey("", r.nextLong()));
        topPrj.setColumn(1);
        topPrj.setOverloaded(true);
        topPrj.setResultType(DataType.TUPLE);
        nesSortPlan.add(topPrj);
       
        POProject prjStar2 = new POProject(new OperatorKey("", r.nextLong()));
        prjStar2.setResultType(DataType.TUPLE);
        prjStar2.setStar(true);
        nesSortPlan.add(prjStar2);
       
        nesSortPlan.connect(topPrj, prjStar2);
        List<PhysicalPlan> nesSortPlanLst = new ArrayList<PhysicalPlan>();
        nesSortPlanLst.add(nesSortPlan);
View Full Code Here


       
        // we did not see a Non algebraic or a distinct so far
        // proceed to check leaves
        PhysicalOperator leaf = leaves.get(0);
        if (leaf instanceof POProject) {
            POProject proj = (POProject)leaf;
            // Check that it's a simple project.  We can't currently handle
            // things like group.$0, because that requires resetting types on
            // the reduce side.
            if (pp.getPredecessors(proj) != null) return ExprType.NOT_ALGEBRAIC;

            // Check if it's a projection of bag. Currently we can't use combiner
            // for statement like c = foreach b generate group, SUM(a), a;
            // where a is a bag.
            if (proj.getResultType() == DataType.BAG) return ExprType.NOT_ALGEBRAIC;
           
            // Check to see if this is a projection of the grouping column.
            // If so, it will be a projection of col 0 and will have no
            // predecessors (to avoid things like group.$0, which isn't what we
            // want).
            List<Integer> cols = proj.getColumns();
            if (cols != null && cols.size() == 1 && cols.get(0) == 0 &&
                    pp.getPredecessors(proj) == null) {
                mKeyField = field;
                keyFieldPositions[field] = true;
                mKeyType = proj.getResultType();
            } else {
                // It can't be a flatten except on the grouping column
                if (toBeFlattened) return ExprType.NOT_ALGEBRAIC;
            }
            return ExprType.SIMPLE_PROJECT;
View Full Code Here

     * @param plan
     * @throws PlanException
     */
    private void setProjectInput(PhysicalOperator op, PhysicalPlan plan, int index) throws PlanException {
        String scope = op.getOperatorKey().scope;
        POProject proj = new POProject(new OperatorKey(scope,
            NodeIdGenerator.getGenerator().getNextNodeId(scope)),
            op.getRequestedParallelism(), index);
        proj.setResultType(DataType.BAG);
        // Remove old connections and elements from the plan
        plan.trimAbove(op);
        plan.add(proj);
        plan.connect(proj, op);
        List<PhysicalOperator> inputs =
View Full Code Here

     * @param fe
     */
    private void addKeyProject(POForEach fe) {
        PhysicalPlan newForEachInnerPlan = new PhysicalPlan();
        String scope = fe.getOperatorKey().scope;
        POProject proj = new POProject(new OperatorKey(scope,
            NodeIdGenerator.getGenerator().getNextNodeId(scope)), -1, 0);
        proj.setResultType(mKeyType);
        newForEachInnerPlan.add(proj);
        fe.addInputPlan(newForEachInnerPlan, false);
    }
View Full Code Here

                // Then we can use this information to strip off these columns
                // from the "Value" and in POPackage stitch the right "Value"
                // tuple back by getting these columns from the "key". The goal
                // is reduce the amount of the data sent to Hadoop in the map.
                if(leaf instanceof POProject) {
                    POProject project = (POProject) leaf;
                    if(project.isStar()) {
                        if(plans.size() == 1) {
                            // note that we have a project *
                            mProjectStar  = true;
                            // key will be a tuple in this case
                            isKeyTuple = true;
                        } else {
                            // TODO: currently "group by (*, somethingelse)" is NOT
                            // allowed. So we should never get here. But once it is
                            // allowed, we will need to handle it. For now just log
                            log.debug("Project * in group by not being optimized in key-value transfer");
                        }
                    } else {
                        try {
                            List<PhysicalOperator> preds = plan.getPredecessors(leaf);
                            if (preds==null || !(preds.get(0) instanceof POProject))
                                mProjectedColsMap.put(project.getColumn(), keyIndex);
                        } catch (ExecException e) {
                            int errCode = 2070;
                            String msg = "Problem in accessing column from project operator.";
                            throw new PlanException(msg, errCode, PigException.BUG);
                        }
                    }
                    if(project.getResultType() == DataType.TUPLE)
                        isKeyTuple = true;
                }
                keyIndex++;
            }
        }
View Full Code Here

                // Then we can use this information to strip off these columns
                // from the "Value" and in POPackage stitch the right "Value"
                // tuple back by getting these columns from the "key". The goal
                // is reduce the amount of the data sent to Hadoop in the map.
                if(leaf instanceof POProject) {
                    POProject project = (POProject) leaf;
                    if(project.isStar()) {
                        if(secondaryPlans.size() == 1) {
                            // note that we have a project *
                            mSecondaryProjectStar  = true;
                            // key will be a tuple in this case
                            isSecondaryKeyTuple = true;
                        } else {
                            // TODO: currently "group by (*, somethingelse)" is NOT
                            // allowed. So we should never get here. But once it is
                            // allowed, we will need to handle it. For now just log
                            log.debug("Project * in group by not being optimized in key-value transfer");
                        }
                    } else {
                        try {
                            List<PhysicalOperator> preds = plan.getPredecessors(leaf);
                            if (preds==null || !(preds.get(0) instanceof POProject))
                                mSecondaryProjectedColsMap.put(project.getColumn(), keyIndex);
                        } catch (ExecException e) {
                            int errCode = 2070;
                            String msg = "Problem in accessing column from project operator.";
                            throw new PlanException(msg, errCode, PigException.BUG);
                        }
                    }
                    if(project.getResultType() == DataType.TUPLE)
                        isSecondaryKeyTuple = true;
                }
                keyIndex++;
            }
        }
View Full Code Here

                // Then we can use this information to strip off these columns
                // from the "Value" and in POPostCombinerPackage stitch the right "Value"
                // tuple back by getting these columns from the "key". The goal
                // is reduce the amount of the data sent to Hadoop in the map.
                if(leaf instanceof POProject) {
                    POProject project = (POProject) leaf;
                    if(project.isStar()) {
                        int errCode = 2021;
                        String msg = "Internal error. Unexpected operator project(*) in local rearrange inner plan.";
                        throw new PlanException(msg, errCode, PigException.BUG);
                    } else {
                        try {
                            mProjectedColsMap.put(project.getColumn(), keyIndex);
                        } catch (ExecException e) {
                            int errCode = 2070;
                            String msg = "Problem in accessing column from project operator.";
                            throw new PlanException(msg, errCode, PigException.BUG);
                        }
                    }
                    if(project.getResultType() == DataType.TUPLE)
                        isKeyTuple = true;
                }
                keyIndex++;
            }
        }
View Full Code Here

                /*fep1.add(gfc);
                fep1.connect(ce1, gfc);
                fep1.connect(ce2, gfc);*/
               
                PhysicalPlan fep2 = new PhysicalPlan();
                POProject feproj = new POProject(new OperatorKey(scope, nodeGen.getNextNodeId(scope)), cross.getRequestedParallelisam());
                feproj.setAlias(cross.getAlias());
                feproj.setResultType(DataType.TUPLE);
                feproj.setStar(true);
                feproj.setOverloaded(false);
                fep2.add(feproj);
                List<PhysicalPlan> fePlans = Arrays.asList(fep1, fep2);
               
                POForEach fe = new POForEach(new OperatorKey(scope, nodeGen.getNextNodeId(scope)), cross.getRequestedParallelisam(), fePlans, flattenLst );
                fe.setAlias(cross.getAlias());
                currentPlan.add(fe);
                currentPlan.connect(logToPhyMap.get(op), fe);
               
                POLocalRearrange physOp = new POLocalRearrange(new OperatorKey(
                        scope, nodeGen.getNextNodeId(scope)), cross
                        .getRequestedParallelisam());
                physOp.setAlias(cross.getAlias());
                List<PhysicalPlan> lrPlans = new ArrayList<PhysicalPlan>();
                for(int i=0;i<inputs.size();i++){
                    PhysicalPlan lrp1 = new PhysicalPlan();
                    POProject lrproj1 = new POProject(new OperatorKey(scope, nodeGen.getNextNodeId(scope)), cross.getRequestedParallelisam(), i);
                    lrproj1.setAlias(cross.getAlias());
                    lrproj1.setOverloaded(false);
                    lrproj1.setResultType(DataType.INTEGER);
                    lrp1.add(lrproj1);
                    lrPlans.add(lrp1);
                }
               
                physOp.setCross(true);
                physOp.setIndex(count++);
                physOp.setKeyType(DataType.TUPLE);
                physOp.setPlans(lrPlans);
                physOp.setResultType(DataType.TUPLE);
               
                currentPlan.add(physOp);
                currentPlan.connect(fe, physOp);
                currentPlan.connect(physOp, poGlobal);
            }
        } catch (PlanException e1) {
            int errCode = 2015;
            String msg = "Invalid physical operators in the physical plan" ;
            throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e1);
        } catch (ExecException e) {
            int errCode = 2058;
            String msg = "Unable to set index on newly create POLocalRearrange.";
            throw new VisitorException(msg, errCode, PigException.BUG, e);
        }
       
        poPackage.setKeyType(DataType.TUPLE);
        poPackage.setResultType(DataType.TUPLE);
        poPackage.setNumInps(count);
        boolean inner[] = new boolean[count];
        for (int i=0;i<count;i++) {
            inner[i] = true;
        }
        poPackage.setInner(inner);
       
        List<PhysicalPlan> fePlans = new ArrayList<PhysicalPlan>();
        List<Boolean> flattenLst = new ArrayList<Boolean>();
        for(int i=1;i<=count;i++){
            PhysicalPlan fep1 = new PhysicalPlan();
            POProject feproj1 = new POProject(new OperatorKey(scope, nodeGen.getNextNodeId(scope)), cross.getRequestedParallelisam(), i);
            feproj1.setAlias(cross.getAlias());
            feproj1.setResultType(DataType.BAG);
            feproj1.setOverloaded(false);
            fep1.add(feproj1);
            fePlans.add(fep1);
            flattenLst.add(true);
        }
       
View Full Code Here

    @Override
    public void visit(LOInnerLoad load) throws FrontendException {
        String scope = DEFAULT_SCOPE;
       
        POProject exprOp = new POProject(new OperatorKey(scope, nodeGen
              .getNextNodeId(scope)));
       
        LogicalSchema s = load.getSchema();

        if (load.sourceIsBag()) {
            Operator succ = load.getPlan().getSuccessors(load).get(0);
            if (succ instanceof LOGenerate) {
                exprOp.setResultType(DataType.BAG);
            }
            else {
                exprOp.setResultType(DataType.TUPLE);
            }
            exprOp.setOverloaded(true);
        }
        else {
            if (s!=null)
                exprOp.setResultType(s.getField(0).type);
            else
                exprOp.setResultType(DataType.BYTEARRAY);
        }

        exprOp.setColumn(load.getColNum());
        exprOp.setStar(load.getProjection().isProjectStar());       
       
        // set input to POProject to the predecessor of foreach
       
        logToPhyMap.put(load, exprOp);
        currentPlan.add(exprOp);
View Full Code Here

        List<Boolean> flattenLst = new ArrayList<Boolean>();
        POForEach fe;
        try{
            for(int i=0;i< inputs.size();i++){
                PhysicalPlan fep1 = new PhysicalPlan();
                POProject feproj1 = new POProject(new OperatorKey(scope, nodeGen.getNextNodeId(scope)),
                        parallel, i+1); //i+1 since the first column is the "group" field
                feproj1.setAlias(alias);
                feproj1.setResultType(DataType.BAG);
                feproj1.setOverloaded(false);
                fep1.add(feproj1);
                fePlans.add(fep1);
                // the parser would have marked the side
                // where we need to keep empty bags on
                // non matched as outer (innerFlags[i] would be
View Full Code Here

TOP

Related Classes of org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.POProject

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.