Package org.apache.pig.newplan.logical.relational

Examples of org.apache.pig.newplan.logical.relational.LOGenerate


            // flatten(distinct({(1), (1)})) is (1), (1)
           
            // in both cases correctness is not affected
           
            LOForEach foreach = (LOForEach)matched.getSources().get(0);
            LOGenerate gen = OptimizerUtils.findGenerate( foreach );
           
            if( !OptimizerUtils.hasFlatten( gen ) )
                return false;
           
            // If a foreach contains a nondeterministic udf, we shouldn't push it down.
            for (LogicalExpressionPlan p : gen.getOutputPlans()) {
                if (OptimizerUtils.planHasNonDeterministicUdf(p))
                    return false;
            }
           
            List<Operator> succs = currentPlan.getSuccessors( foreach );
            if( succs == null || succs.size() != 1 )
                return false;
           
            List<Long> uids = getNonFlattenFieldUids( gen );

            Operator succ = succs.get( );
            if( !( succ instanceof LOSort || succ instanceof LOJoin || succ instanceof LOCross ) )
                return false;
           
            if( succ instanceof LOSort ) {
                // Check if the expressions for the foreach generate are purely projection including flatten fields.
                List<LogicalExpressionPlan> exprs = gen.getOutputPlans();
                for( LogicalExpressionPlan expr : exprs ) {
                    if( !isPureProjection( expr ) )
                        return false;
                }
View Full Code Here


                Map<Integer, LogicalSchema> cachedUserDefinedSchema = new HashMap<Integer, LogicalSchema>();
                boolean[] flags = null;
                int fieldCount = 0;
                for( Operator op : preds ) {
                    if( op == foreach ) {
                        LOGenerate gen = OptimizerUtils.findGenerate( foreach );
                        flags = gen.getFlattenFlags();
                        for( int i = 0; i < flags.length; i++ ) {
                            if( flags[i] ) {
                                fieldsToBeFlattaned.add(fieldCount);
                                if (gen.getUserDefinedSchema()!=null && gen.getUserDefinedSchema().get(i)!=null) {
                                    cachedUserDefinedSchema.put(fieldCount, gen.getUserDefinedSchema().get(i));
                                    gen.getUserDefinedSchema().set(i, null);
                                }
                                fieldCount++;
                            } else {
                                fieldCount++;
                            }
                        }
                    } else {
                        fieldCount += ( (LogicalRelationalOperator)op ).getSchema().size();
                    }
                }
               
               
                boolean[] flattenFlags = new boolean[fieldCount];
                List<LogicalSchema> mUserDefinedSchema = null;
                if (cachedUserDefinedSchema!=null) {
                    mUserDefinedSchema = new ArrayList<LogicalSchema>();
                    for (int i=0;i<fieldCount;i++)
                        mUserDefinedSchema.add(null);
                }
                for( Integer i : fieldsToBeFlattaned ) {
                    flattenFlags[i] = true;
                    if (cachedUserDefinedSchema.containsKey(i)) {
                        mUserDefinedSchema.set(i, cachedUserDefinedSchema.get(i));
                    }
                }
               
                // Now create a new foreach after cross/join and insert it into the plan.
                LOForEach newForeach = new LOForEach( currentPlan );
                LogicalPlan innerPlan = new LogicalPlan();
                List<LogicalExpressionPlan> exprs = new ArrayList<LogicalExpressionPlan>( fieldCount );
                LOGenerate gen = new LOGenerate( innerPlan, exprs, flattenFlags );
                if (mUserDefinedSchema!=null)
                    gen.setUserDefinedSchema(mUserDefinedSchema);
                innerPlan.add( gen );
                newForeach.setInnerPlan( innerPlan );
                for( int i = 0; i < fieldCount; i++ ) {
                    LogicalExpressionPlan expr = new LogicalExpressionPlan();
                    expr.add( new ProjectExpression( expr, i, -1, gen ) );
View Full Code Here

        throws FrontendException {

        LOForEach foreach = new LOForEach(plan);
        LOCogroup groupby = new LOCogroup(plan);
        LogicalPlan innerPlan = new LogicalPlan();
        LogicalRelationalOperator gen = new LOGenerate(innerPlan);

        injectForeachOperator(loc, op, foreach);

        // Get all column attributes from the input relation.
        // Create ProjectExpression for all columns. Based on the
View Full Code Here

        }
        return alias;
    }

    LOGenerate createGenerateOp(LogicalPlan plan) {
        return new LOGenerate( plan );
    }
View Full Code Here

        LogicalPlan lp = new LogicalPlan(); // f's inner plan
        LOForEach f = new LOForEach( innerPlan );
        f.setInnerPlan( lp );
        f.setLocation( loc );
        LOGenerate gen = new LOGenerate( lp );
        boolean[] flatten = new boolean[exprPlans.size()];

        List<Operator> innerLoads = new ArrayList<Operator>( exprPlans.size() );
        for( LogicalExpressionPlan plan : exprPlans ) {
            ProjectExpression pe = (ProjectExpression)plan.getSinks().get( 0 );
            String al = pe.getColAlias();
            LOInnerLoad iload = ( al == null ) ?
                    new LOInnerLoad( lp, f, pe.getColNum() ) : createInnerLoad(loc, lp, f, al );
            iload.setLocation( pe.getLocation() );
            pe.setColNum( -1 );
            pe.setInputNum( innerLoads.size() );
            pe.setAttachedRelationalOp( gen );
            innerLoads.add( iload );
        }

        gen.setOutputPlans( exprPlans );
        gen.setFlattenFlags( flatten );
        lp.add( gen );

        for( Operator il : innerLoads ) {
            lp.add( il );
            lp.connect( il, gen );
View Full Code Here

        currentWalker.walk(this);
        popWalker();
       
        //get the LOGenerate
        List<Operator> feOutputs = innerPlan.getSinks();
        LOGenerate gen = null;
        for( Operator op  : feOutputs){
            if(op instanceof LOGenerate){
                if(gen != null){
                    String msg = "Expected single LOGenerate output in innerplan of foreach";
                    throw new VisitorException(foreach,
                            msg,
                            2266,
                            PigException.BUG
                    );
                }
                gen = (LOGenerate) op;
            }
        }
       
        //work on the generate plan, flatten and user schema
        List<LogicalExpressionPlan> expPlans = gen.getOutputPlans();
        List<LogicalExpressionPlan> newExpPlans = new ArrayList<LogicalExpressionPlan>();
       
        List<Operator> loGenPreds = innerPlan.getPredecessors(gen);
       
        if(loGenPreds == null){
            // there are no LOInnerLoads , must be working on just constants
            // no project-star expansion to be done
            return;
        }
       
        List<LogicalSchema> userSchema = gen.getUserDefinedSchema();
        List<LogicalSchema> newUserSchema = null;
        if(userSchema != null){
            newUserSchema = new ArrayList<LogicalSchema>();
        }
       
        boolean[] flattens = gen.getFlattenFlags();
        List<Boolean> newFlattens = new ArrayList<Boolean>(flattens.length);

        //get mapping of LOGenerate predecessor current position to object
        Map<Integer, LogicalRelationalOperator> oldPos2Rel =
            new HashMap<Integer, LogicalRelationalOperator>();
       
        for(int i=0; i<loGenPreds.size(); i++){
            oldPos2Rel.put(i, (LogicalRelationalOperator) loGenPreds.get(i));
        }
       
        //get schema of predecessor, project-star expansion needs a schema
        LogicalRelationalOperator pred =
            (LogicalRelationalOperator) foreach.getPlan().getPredecessors(foreach).get(0);
        LogicalSchema inpSch = pred.getSchema();
        //store mapping between the projection in inner plans of
        // of LOGenerate to the input relation object
        Map<ProjectExpression, LogicalRelationalOperator> proj2InpRel =
            new HashMap<ProjectExpression, LogicalRelationalOperator>();
       
       
        for(int i=0; i<expPlans.size(); i++){
            LogicalExpressionPlan expPlan = expPlans.get(i);
            ProjectExpression projStar = getProjectLonelyStar(expPlan, oldPos2Rel);

            boolean foundExpandableProject = false;
            if(projStar != null){             
                //there is a project-star to be expanded

                LogicalSchema userStarSch = null;
                if(userSchema != null && userSchema.get(i) != null){
                    userStarSch = userSchema.get(i);
                }


                //the range values are set in the project in LOInnerLoad
                ProjectExpression loInnerProj = ((LOInnerLoad)oldPos2Rel.get(projStar.getInputNum())).getProjection();

                int firstProjCol = 0;
                int lastProjCol = 0;
               
                if(loInnerProj.isRangeProject()){
                    loInnerProj.setColumnNumberFromAlias();
                    firstProjCol = loInnerProj.getStartCol();
                    lastProjCol = loInnerProj.getEndCol();
                }

               
                boolean isProjectToEnd = loInnerProj.isProjectStar() ||
                    (loInnerProj.isRangeProject() && lastProjCol == -1);
               
                //can't expand if there is no input schema, and this is
                // as project star or project-range-to-end
                if( !(inpSch == null && isProjectToEnd) ){
                   
                    foundExpandableProject = true;

                    if(isProjectToEnd)
                        lastProjCol = inpSch.size() - 1;

                    //replacing the existing project star with new ones
                    expPlan.remove(projStar);

                    //remove the LOInnerLoad with star
                    LOInnerLoad oldLOInnerLoad = (LOInnerLoad)oldPos2Rel.get(projStar.getInputNum());
                    innerPlan.disconnect(oldLOInnerLoad, gen);
                    innerPlan.remove(oldLOInnerLoad);


                    //generate new exp plan, inner load for each field in schema
                    for(int j = firstProjCol; j <= lastProjCol; j++){

                        //add new LOInnerLoad
                        LOInnerLoad newInLoad = new LOInnerLoad(innerPlan, foreach, j);
                        innerPlan.add(newInLoad);
                        innerPlan.connect(newInLoad, gen);


                        // new expression plan and proj
                        LogicalExpressionPlan newExpPlan = new LogicalExpressionPlan();
                        newExpPlans.add(newExpPlan);

                        ProjectExpression newProj =
                            new ProjectExpression(newExpPlan, -2, -1, gen);

                        proj2InpRel.put(newProj, newInLoad);

                        newFlattens.add(flattens[i]);
                        if(newUserSchema != null ){
                            //index into user specified schema
                            int schIdx = j - firstProjCol;
                            if(userStarSch != null
                                    && userStarSch.getFields().size() > schIdx
                                    && userStarSch.getField(schIdx) != null){

                                //if the project-star field has user specified schema, use the
                                // j'th field for this column
                                LogicalSchema sch = new LogicalSchema();
                                sch.addField(new LogicalFieldSchema(userStarSch.getField(schIdx)));
                                newUserSchema.add(sch);
                            }
                            else{
                                newUserSchema.add(null);
                            }
                        }
                    }
                }
            }

            if(!foundExpandableProject){ //no project-star that could be expanded

                //get all projects in here
                FindProjects findProjs = new FindProjects(expPlan);
                findProjs.visit();
                List<ProjectExpression> projs = findProjs.getProjs();

                //create a mapping of project expression to their inputs
                for(ProjectExpression proj : projs){
                    proj2InpRel.put(proj, oldPos2Rel.get(proj.getInputNum()));
                }

                newExpPlans.add(expPlan);

                newFlattens.add(flattens[i]);
                if(newUserSchema != null)
                    newUserSchema.add(userSchema.get(i));

            }
        }

        //get mapping of LoGenerate input relation to current position
        Map<LogicalRelationalOperator, Integer> rel2pos = new HashMap<LogicalRelationalOperator, Integer>();
        List<Operator> newGenPreds = innerPlan.getPredecessors(gen);
        int numNewGenPreds = 0;
        if(newGenPreds != null)
            numNewGenPreds = newGenPreds.size();
           
        for(int i=0; i<numNewGenPreds; i++){
            rel2pos.put((LogicalRelationalOperator) newGenPreds.get(i),i);
        }
       
        //correct the input num for projects
        for(Entry<ProjectExpression, LogicalRelationalOperator> projAndInp : proj2InpRel.entrySet()){
           ProjectExpression proj = projAndInp.getKey();
           LogicalRelationalOperator rel = projAndInp.getValue();
           proj.setInputNum(rel2pos.get(rel));
        }
       
        // set the new lists
        gen.setOutputPlans(newExpPlans);
        gen.setFlattenFlags(Booleans.toArray(newFlattens));
        gen.setUserDefinedSchema(newUserSchema);
       
        gen.resetSchema();
        foreach.resetSchema();
       
    }
View Full Code Here

                LogicalPlan innerPlan = new LogicalPlan();
                foreach.setInnerPlan(innerPlan);

                // build foreach inner plan
                List<LogicalExpressionPlan> exps = new ArrayList<LogicalExpressionPlan>();
                LOGenerate gen = new LOGenerate(innerPlan, exps, new boolean[requiredFields.getFields().size()]);
                innerPlan.add(gen);

                for (int i=0; i<requiredFields.getFields().size(); i++) {
                    LoadPushDown.RequiredField rf = requiredFields.getFields().get(i);
                    LOInnerLoad innerLoad = new LOInnerLoad(innerPlan, foreach, rf.getIndex());
View Full Code Here

            }
            branchHeadToRemove.add((LogicalRelationalOperator)op);
        }

        // Find the expression plan to remove
        LOGenerate gen = (LOGenerate)innerPlan.getSinks().get(0);
        List<LogicalExpressionPlan> genPlansToRemove = new ArrayList<LogicalExpressionPlan>();

        List<LogicalExpressionPlan> genPlans = gen.getOutputPlans();
        for (int i=0;i<genPlans.size();i++) {
            LogicalExpressionPlan expPlan = genPlans.get(i);
            List<Operator> expSources = expPlan.getSinks();

            for (Operator expSrc : expSources) {
                if (expSrc instanceof ProjectExpression) {
                    LogicalRelationalOperator reference = ((ProjectExpression)expSrc).findReferent();
                    if (branchHeadToRemove.contains(reference)) {
                        genPlansToRemove.add(expPlan);
                    }
                }
            }
        }

        // Build the temporary structure based on genPlansToRemove, which include:
        // * flattenList
        // * outputPlanSchemas
        // * uidOnlySchemas
        // * inputsRemoved
        //     We first construct inputsNeeded, and inputsRemoved = (all inputs) - inputsNeeded.
        //     We cannot figure out inputsRemoved directly since the inputs may be used by other output plan.
        //     We can only get inputsRemoved after visiting all output plans.
        List<Boolean> flattenList = new ArrayList<Boolean>();
        Set<Integer> inputsNeeded = new HashSet<Integer>();
        Set<Integer> inputsRemoved = new HashSet<Integer>();
        List<LogicalSchema> outputPlanSchemas = new ArrayList<LogicalSchema>();
        List<LogicalSchema> uidOnlySchemas = new ArrayList<LogicalSchema>();
        List<LogicalSchema> userDefinedSchemas = null;

        if (gen.getUserDefinedSchema()!=null)
            userDefinedSchemas = new ArrayList<LogicalSchema>();

        for (int i=0;i<genPlans.size();i++) {
            LogicalExpressionPlan genPlan = genPlans.get(i);
            if (!genPlansToRemove.contains(genPlan)) {
                flattenList.add(gen.getFlattenFlags()[i]);
                outputPlanSchemas.add(gen.getOutputPlanSchemas().get(i));
                uidOnlySchemas.add(gen.getUidOnlySchemas().get(i));
                if (gen.getUserDefinedSchema()!=null) {
                    userDefinedSchemas.add(gen.getUserDefinedSchema().get(i));
                }
                List<Operator> sinks = genPlan.getSinks();
                for(Operator s: sinks) {
                    if (s instanceof ProjectExpression) {
                        inputsNeeded.add(((ProjectExpression)s).getInputNum());
                    }
                }
            }
        }

        List<Operator> preds = innerPlan.getPredecessors(gen);

        if (preds!=null) {  // otherwise, all gen plan are based on constant, no need to adjust
            for (int i=0;i<preds.size();i++) {
                if (!inputsNeeded.contains(i))
                    inputsRemoved.add(i);
            }
        }


        // Change LOGenerate: remove unneeded output expression plan
        // change flatten flag, outputPlanSchema, uidOnlySchemas
        boolean[] flatten = new boolean[flattenList.size()];
        for (int i=0;i<flattenList.size();i++)
            flatten[i] = flattenList.get(i);

        gen.setFlattenFlags(flatten);
        gen.setOutputPlanSchemas(outputPlanSchemas);
        gen.setUidOnlySchemas(uidOnlySchemas);
        gen.setUserDefinedSchema(userDefinedSchemas);

        for (LogicalExpressionPlan genPlanToRemove : genPlansToRemove) {
            genPlans.remove(genPlanToRemove);
        }

View Full Code Here

            // flatten(distinct({(1), (1)})) is (1), (1)
           
            // in both cases correctness is not affected
           
            LOForEach foreach = (LOForEach)matched.getSources().get(0);
            LOGenerate gen = OptimizerUtils.findGenerate( foreach );
           
            if( !OptimizerUtils.hasFlatten( gen ) )
                return false;
           
            // If a foreach contains a nondeterministic udf, we shouldn't push it down.
            for (LogicalExpressionPlan p : gen.getOutputPlans()) {
                if (OptimizerUtils.planHasNonDeterministicUdf(p))
                    return false;
            }
           
            List<Operator> succs = currentPlan.getSuccessors( foreach );
            if( succs == null || succs.size() != 1 )
                return false;
           
            List<Long> uids = getNonFlattenFieldUids( gen );

            Operator succ = succs.get( );
            if( !( succ instanceof LOSort || succ instanceof LOJoin || succ instanceof LOCross ) )
                return false;
           
            if( succ instanceof LOSort ) {
                // Check if the expressions for the foreach generate are purely projection including flatten fields.
                List<LogicalExpressionPlan> exprs = gen.getOutputPlans();
                for( LogicalExpressionPlan expr : exprs ) {
                    if( !isPureProjection( expr ) )
                        return false;
                }
View Full Code Here

                Map<Integer, LogicalSchema> cachedUserDefinedSchema = new HashMap<Integer, LogicalSchema>();
                boolean[] flags = null;
                int fieldCount = 0;
                for( Operator op : preds ) {
                    if( op == foreach ) {
                        LOGenerate gen = OptimizerUtils.findGenerate( foreach );
                        flags = gen.getFlattenFlags();
                        for( int i = 0; i < flags.length; i++ ) {
                            if( flags[i] ) {
                                fieldsToBeFlattaned.add(fieldCount);
                                if (gen.getUserDefinedSchema()!=null && gen.getUserDefinedSchema().get(i)!=null) {
                                    cachedUserDefinedSchema.put(fieldCount, gen.getUserDefinedSchema().get(i));
                                    cachedUserDefinedSchema.get(fieldCount).mergeUid(gen.getOutputPlanSchemas().get(i));
                                    gen.getUserDefinedSchema().set(i, null);
                                }
                                fieldCount++;
                            } else {
                                fieldCount++;
                            }
                        }
                    } else {
                        fieldCount += ( (LogicalRelationalOperator)op ).getSchema().size();
                    }
                }
               
               
                boolean[] flattenFlags = new boolean[fieldCount];
                List<LogicalSchema> mUserDefinedSchema = null;
                if (cachedUserDefinedSchema!=null) {
                    mUserDefinedSchema = new ArrayList<LogicalSchema>();
                    for (int i=0;i<fieldCount;i++)
                        mUserDefinedSchema.add(null);
                }
                for( Integer i : fieldsToBeFlattaned ) {
                    flattenFlags[i] = true;
                    if (cachedUserDefinedSchema.containsKey(i)) {
                        mUserDefinedSchema.set(i, cachedUserDefinedSchema.get(i));
                    }
                }
               
                // Now create a new foreach after cross/join and insert it into the plan.
                LOForEach newForeach = new LOForEach( currentPlan );
                LogicalPlan innerPlan = new LogicalPlan();
                List<LogicalExpressionPlan> exprs = new ArrayList<LogicalExpressionPlan>( fieldCount );
                LOGenerate gen = new LOGenerate( innerPlan, exprs, flattenFlags );
                if (mUserDefinedSchema!=null)
                    gen.setUserDefinedSchema(mUserDefinedSchema);
                innerPlan.add( gen );
                newForeach.setInnerPlan( innerPlan );
                for( int i = 0; i < fieldCount; i++ ) {
                    LogicalExpressionPlan expr = new LogicalExpressionPlan();
                    expr.add( new ProjectExpression( expr, i, -1, gen ) );
View Full Code Here

TOP

Related Classes of org.apache.pig.newplan.logical.relational.LOGenerate

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.