Examples of org.apache.pig.impl.logicalLayer.RelationalOperator

Package org.apache.pig.impl.logicalLayer

Examples of org.apache.pig.impl.logicalLayer.RelationalOperator

org.apache.pig.impl.logicalLayer.RelationalOperator

            if (lo.getSchema()==null)
            {
                safeToPrune = false;
                return;
            }
            RelationalOperator rlo = (RelationalOperator)lo;
            List<LogicalOperator> predecessors = (mPlan.getPredecessors(rlo) == null ? null
                    : new ArrayList<LogicalOperator>(mPlan.getPredecessors(rlo)));
            
            // Now we have collected required output fields of LOLoad (include requried map keys).
            // We need to push these into the loader
            if (rlo instanceof LOLoad)
            {
                // LOLoad has only one output
                RequiredFields loaderRequiredFields = requiredOutputInfo.requiredFieldsList.get(0);
                prunedLoaderColumnsMap.put((LOLoad)rlo, loaderRequiredFields);
                return;
            }
            
            // If the predecessor is one of LOStore/LOStream/LODistinct, we stop to trace up.
            // We require all input fields. We stop processing here. The optimizer will
            // pick the next ForEach and start processing from there
            if (rlo instanceof LOStore || rlo instanceof LOStream || rlo instanceof LODistinct) {
                return;
            }
            
            // merge requiredOutputFields and process the predecessor
            if (rlo instanceof LOSplit)
            {
                List<RequiredFields> requiredInputFieldsList = new ArrayList<RequiredFields>();
                RequiredFields requiredFields = new RequiredFields(false);
                for (int i=0;i<mPlan.getSuccessors(rlo).size();i++)
                {
                    RequiredFields rf = null;
                    try {
                        rf = requiredOutputInfo.requiredFieldsList.get(i);
                    } catch (Exception e) {
                    }
                    if (rf!=null)
                    {
                        rf.reIndex(0);
                        requiredFields.merge(rf);
                    } else {
                        // need all fields
                        List<Pair<Integer, Integer>> l = new ArrayList<Pair<Integer, Integer>>();
                        for (int j=0;j<rlo.getSchema().size();j++)
                            l.add(new Pair<Integer, Integer>(0, j));
                        rf = new RequiredFields(l);
                        requiredFields.merge(rf);
                        break;
                    }
                }
                requiredInputFieldsList.add(requiredFields);
                if (predecessors.get(0) instanceof LOForEach || predecessors.get(0) instanceof LOSplit)
                    cachedRequiredInfo.put((RelationalOperator)predecessors.get(0), new RequiredInfo(requiredInputFieldsList));
                else
                    processNode(predecessors.get(0), new RequiredInfo(requiredInputFieldsList));
                return;
            }
            
            // Initialize requiredInputFieldsList
            List<RequiredFields> requiredInputFieldsList = new ArrayList<RequiredFields>(); 
            for (int i=0;i<predecessors.size();i++)
                requiredInputFieldsList.add(null);
            
            // Map required output columns to required input columns.
            // We also collect required output map keys into input map keys.
            // Since we have already processed Split, so every remaining operator
            // have only one element in requiredOutputFieldList, so we get the first 
            // element and process
            RequiredFields requiredOutputFields = requiredOutputInfo.requiredFieldsList.get(0);
            
            // needAllFields means we require every individual output column and all map keys of that output.
            // We convert needAllFields to individual fields here to facilitate further processing
            if (requiredOutputFields.needAllFields())
            {
                List<Pair<Integer, Integer>> outputList = new ArrayList<Pair<Integer, Integer>>(); 
                for (int j=0;j<rlo.getSchema().size();j++)
                    outputList.add(new Pair<Integer, Integer>(0, j));
                requiredOutputFields = new RequiredFields(outputList);
                for (int i=0;i<requiredOutputFields.size();i++)
                    requiredOutputFields.setMapKeysInfo(i, new MapKeysInfo(true));
            }
            
            if (requiredOutputFields.getFields()==null)
            {
                int errCode = 2184;
                String msg = "Fields list inside RequiredFields is null.";
                throw new OptimizerException(msg, errCode, PigException.BUG);
            }
            
            for (int i=0;i<requiredOutputFields.size();i++)
            {
                Pair<Integer, Integer> requiredOutputField = requiredOutputFields.getField(i);
                MapKeysInfo outputMapKeysInfo = requiredOutputFields.getMapKeysInfo(i);


                List<RequiredFields> relevantFieldsList = rlo.getRelevantInputs(requiredOutputField.first, requiredOutputField.second);
                
                // We do not have any relevant input fields for this output, continue to next output 
                if (relevantFieldsList==null)
                    continue;
                
                for (int j=0;j<relevantFieldsList.size();j++)
                {
                    RequiredFields relevantFields = relevantFieldsList.get(j);
                    
                    if (relevantFields!=null && relevantFields.needAllFields())
                    {
                        requiredInputFieldsList.set(j, new RequiredFields(true));
                        continue;
                    }
                    
                    // Mapping output map keys to input map keys
                    if (rlo instanceof LOCogroup)
                    {
                        if (j!=0 && relevantFields!=null && !relevantFields.needAllFields())
                        {
                            for (Pair<Integer, Integer> pair : relevantFields.getFields())
                                relevantFields.setMapKeysInfo(pair.first, pair.second, 
                                        new MapKeysInfo(true));
                        }
                    }
                    else if (rlo instanceof LOForEach)
                    {
                        // Relay map keys from output to input
                        LogicalPlan forEachPlan = ((LOForEach)rlo).getRelevantPlan(requiredOutputField.second);
                        if (relevantFields.getFields()!=null && relevantFields.getFields().size()!=0)
                        {
                            int index = ((LOForEach)rlo).getForEachPlans().indexOf(forEachPlan);
                            // We check if the field get flattened, if it does, then we do not relay output map keys to input map keys.
                            // There are two situations:
                            // 1. input column is tuple, bag, or other simple type, there is no concept of map key, so we do not relay
                            // 2. input column is map, flatten does not do anything, we can still relay
                            boolean nonflatten = false;
                            if (!((LOForEach)rlo).getFlatten().get(index))
                            {
                                nonflatten = true;
                            }
                            else
                            {
                                // Foreach plan is flattened, check if there is only one input for this foreach plan 
                                // and input schema for that input is not map, if so, it is a dummy flatten 
                                if (forEachPlan.getRoots().size()==1 && forEachPlan.getRoots().get(0) instanceof LOProject)
                                {
                                    LOProject loProj = (LOProject)forEachPlan.getRoots().get(0);
                                    if (loProj.getExpression().getSchema()!=null &&
                                            loProj.getExpression().getSchema().getField(loProj.getCol()).type!=DataType.BAG)
                                        nonflatten = true;
                                }
                            }
                            if (nonflatten && outputMapKeysInfo!=null && isSimpleProjectCast(forEachPlan))
                            {
                                Pair<Integer, Integer> inputColumn = relevantFields.getFields().get(0);
                                relevantFields.setMapKeysInfo(inputColumn.first, inputColumn.second, outputMapKeysInfo);
                            }
                        }
                        
                        // Collect required map keys in foreach plan here.
                        // This is the only logical operator that we collect map keys 
                        // which are introduced by the operator here.
                        // For all other logical operators, it is attached to required fields
                        // of that logical operator, will process in required fields processing
                        // section
                        for (Pair<Integer, Integer> relevantField : relevantFields.getFields())
                        {
                            MapKeysInfo mapKeysInfo = getMapKeysInPlan(forEachPlan, relevantField.second);
                            relevantFields.mergeMapKeysInfo(0, relevantField.second, mapKeysInfo);
                        }
                    }
                    else
                    {
                        // For all other logical operators, we have one output column mapping to one or more input column.
                        // We copy the output map keys from the output column to the according input column
                        if (relevantFields!=null && relevantFields.getFields()!=null && outputMapKeysInfo!=null)
                        {
                            for (Pair<Integer, Integer> pair : relevantFields.getFields())
                                relevantFields.setMapKeysInfo(pair.first, pair.second, 
                                        outputMapKeysInfo);
                        }
                    }
                    
                    // Now we aggregate the input columns of this output column to the required input columns
                    if (requiredInputFieldsList.get(j)==null)
                        requiredInputFieldsList.set(j, relevantFields);
                    else
                    {
                        requiredInputFieldsList.get(j).merge(relevantFields);
                    }
                }
            }
            
            // Merge with required input fields of this logical operator.
            // RequiredInputFields come from two sources, one is mapping from required output to input, 
            // the other is from the operator itself. Here we use getRequiredFields to get the second part,
            // and merge with the first part
            List<RequiredFields> requiredFieldsListOfLOOp;
            
            // For LOForEach, requiredFields all flattened fields. Even the flattened fields get pruned, 
            // it may expand the number of rows in the result. So flattened fields shall not be pruned.
            // LOForEach.getRequiredFields does not give the required fields. RequiredFields means that field
            // is required by all the outputs. The pipeline does not work correctly without that field. 
            // LOForEach.getRequiredFields give all the input fields referred in the LOForEach statement, but those
            // fields can still be pruned (which means, not required)
            // Eg:
            // B = foreach A generate a0, a1, a2+a3;
            // LOForEach.getRequiredFields gives (a0, a1, a2, a3);
            // However, a2,a3 can be pruned if we do not need the a2+a3 for LOForEach.
            // So here, we do not use LOForEach.getRequiredFields, instead, any flattened fields are required fields
            if (rlo instanceof LOForEach) {
                List<Pair<Integer, Integer>> flattenedInputs = new ArrayList<Pair<Integer, Integer>>();
                for (int i=0;i<rlo.getSchema().size();i++) {
                    if (((LOForEach)rlo).isInputFlattened(i)) {
                        flattenedInputs.add(new Pair<Integer, Integer>(0, i));
                    }
                }
                if (!flattenedInputs.isEmpty()) {
                    requiredFieldsListOfLOOp = new ArrayList<RequiredFields>();
                    requiredFieldsListOfLOOp.add(new RequiredFields(flattenedInputs));
                }
                else
                    requiredFieldsListOfLOOp = null;
            }
            // For LOCross/LOUnion, actually we do not require any field here
            else if (rlo instanceof LOCross || rlo instanceof LOUnion)
                requiredFieldsListOfLOOp = null;
            else
                requiredFieldsListOfLOOp = rlo.getRequiredFields();
            
            if (requiredFieldsListOfLOOp!=null)
            {
                for (int i=0;i<requiredFieldsListOfLOOp.size();i++)
                {
                    RequiredFields requiredFieldsOfLOOp = requiredFieldsListOfLOOp.get(i);
                    if (requiredInputFieldsList.get(i)==null)
                        requiredInputFieldsList.set(i, requiredFieldsOfLOOp);
                    else
                    {
                        requiredInputFieldsList.get(i).merge(requiredFieldsOfLOOp);
                    }
                }
                
                // Collect required map keys of this operator
                // Cases are:
                // 1. Single predecessor: LOFilter, LOSplitOutput, LOSort
                // 2. Multiple predecessors: LOJoin
                // 3. LOForEach do not have operator-wise required fields, we
                //    have already processed it
                // 4. LOCogroup require all map keys (even if we cogroup by a0#'k1', a0 itself will be in bag a
                //    and we have no way to figure out which keys are referenced for a0. So we do not process it and
                //    simply require all map keys)
                // 5. Other operators do not have required fields, no need to process
                if (rlo instanceof LOFilter || rlo instanceof LOSplitOutput || rlo instanceof LOSort)
                {
                    List<LogicalPlan> innerPlans = new ArrayList<LogicalPlan>();
                    if (rlo instanceof LOFilter)
                    {
                        innerPlans.add(((LOFilter)rlo).getComparisonPlan());
                    }
                    else if (rlo instanceof LOSplitOutput)
                    {
                        innerPlans.add(((LOSplitOutput)rlo).getConditionPlan());
                    }
                    else if (rlo instanceof LOSort)
                    {
                        innerPlans.addAll(((LOSort)rlo).getSortColPlans());
                    }
                    for (LogicalPlan p : innerPlans)
                    {
                        for (RequiredFields rf : requiredFieldsListOfLOOp)
                        {
                            if (rf.getFields()==null)
                                continue;
                            for (Pair<Integer, Integer> pair : rf.getFields())
                            {
                                MapKeysInfo mapKeysInfo = getMapKeysInPlan(p, pair.second);
                                if (mapKeysInfo!=null && !mapKeysInfo.needAllKeys() && mapKeysInfo.getKeys()!=null)
                                    requiredInputFieldsList.get(0).mergeMapKeysInfo(0, pair.second, 
                                            mapKeysInfo);
                            }
                        }
                    }
                }
                else if (rlo instanceof LOJoin)
                {
                    for (int i=0;i<predecessors.size();i++)
                    {
                        Collection<LogicalPlan> joinPlans = ((LOJoin)rlo).getJoinPlans().get(predecessors.get(i));
                        if (joinPlans==null)
                            continue;
                        for (LogicalPlan p : joinPlans)
                        {
                            RequiredFields rf = requiredFieldsListOfLOOp.get(i);
                            if (rf.getFields()==null)
                                continue;
                            for (Pair<Integer, Integer> pair : rf.getFields())
                            {
                                MapKeysInfo mapKeysInfo = getMapKeysInPlan(p, pair.second);
                                if (mapKeysInfo!=null && !mapKeysInfo.needAllKeys() && mapKeysInfo.getKeys()!=null)
                                    requiredInputFieldsList.get(i).mergeMapKeysInfo(i, pair.second, 
                                            mapKeysInfo);
                            }
                        }
                    }
                }
            }
            
            // Now we finish the current logical operator, we need to process next logical operator. There are two cases:
            // 1. If the predecessor is LOForEach or LOSplit, we put requiredOutputFieldsList into cache and exit, the optimizer
            //    will invoke transform() on LOForEach or LOSplit and continue to process
            // 2. If the predecessor is otherwise, we then recursively collect required fields for the predecessor
            for (int i=0;i<predecessors.size();i++)
            {
              RelationalOperator predecessor = (RelationalOperator)predecessors.get(i);
                
                List<RequiredFields> newRequiredOutputFieldsList = new ArrayList<RequiredFields>();
              
                // In this optimization, we only prune columns and do not change structure of logical plan
                // So if we do not require anything from the input, we change it to require the first field

View Full Code Here

        }
        List<Pair<Integer, Integer>> columnsPruned = new ArrayList<Pair<Integer, Integer>>();
        List<Pair<Integer, Integer>> columnsToPrune = new ArrayList<Pair<Integer, Integer>>();
        for (int i=0;i<predecessors.size();i++)
        {
            RelationalOperator predecessor = (RelationalOperator)predecessors.get(i);
            if (prunedColumnsMap.containsKey(predecessor))
            {
                List<Pair<Integer, Integer>> predColumnsToPrune = prunedColumnsMap.get(predecessor);
                if (predColumnsToPrune!=null)
                {

View Full Code Here

        }
        List<Pair<Integer, Integer>> columnsPruned = new ArrayList<Pair<Integer, Integer>>();
        List<Pair<Integer, Integer>> columnsToPrune = new ArrayList<Pair<Integer, Integer>>();
        for (int i=0;i<predecessors.size();i++)
        {
            RelationalOperator predecessor = (RelationalOperator)predecessors.get(i);
            if (prunedColumnsMap.containsKey(predecessor))
            {
                List<Pair<Integer, Integer>> predColumnsToPrune = prunedColumnsMap.get(predecessor);
                if (predColumnsToPrune!=null)
                {

View Full Code Here

            }
            if (lo.getSchema()==null)
            {
                return;
            }
            RelationalOperator rlo = (RelationalOperator)lo;
            List<LogicalOperator> predecessors = (mPlan.getPredecessors(rlo) == null ? null
                    : new ArrayList<LogicalOperator>(mPlan.getPredecessors(rlo)));
            
            // Now we have collected required output fields of LOLoad (include requried map keys).
            // We need to push these into the loader
            if (rlo instanceof LOLoad)
            {
                // LOLoad has only one output
                RequiredFields loaderRequiredFields = requiredOutputInfo.requiredFieldsList.get(0);
                pruneLoader((LOLoad)rlo, loaderRequiredFields);
                return;
            }
            
            // If the predecessor is one of LOStore/LOStream/LODistinct, we stop to trace up.
            // We require all input fields. We stop processing here. The optimizer will
            // pick the next ForEach and start processing from there
            if (rlo instanceof LOStore || rlo instanceof LOStream || rlo instanceof LODistinct) {
                return;
            }
            
            // merge requiredOutputFields and process the predecessor
            if (rlo instanceof LOSplit)
            {
                List<RequiredFields> requiredInputFieldsList = new ArrayList<RequiredFields>();
                RequiredFields requiredFields = new RequiredFields(false);
                for (RequiredFields rf : requiredOutputInfo.requiredFieldsList)
                {
                    if (rf!=null)
                    {
                        rf.reIndex(0);
                        requiredFields.merge(rf);
                    }
                }
                requiredInputFieldsList.add(requiredFields);
                if (predecessors.get(0) instanceof LOForEach || predecessors.get(0) instanceof LOSplit)
                    cachedRequiredInfo.put((RelationalOperator)predecessors.get(0), new RequiredInfo(requiredInputFieldsList));
                else
                    processNode(predecessors.get(0), new RequiredInfo(requiredInputFieldsList));
                return;
            }
            
            // Initialize requiredInputFieldsList
            List<RequiredFields> requiredInputFieldsList = new ArrayList<RequiredFields>(); 
            for (int i=0;i<predecessors.size();i++)
                requiredInputFieldsList.add(null);
            
            // Map required output columns to required input columns.
            // We also collect required output map keys into input map keys.
            // Since we have already processed Split, so every remaining operator
            // have only one element in requiredOutputFieldList, so we get the first 
            // element and process
            RequiredFields requiredOutputFields = requiredOutputInfo.requiredFieldsList.get(0);
            
            // needAllFields means we require every individual output column and all map keys of that output.
            // We convert needAllFields to individual fields here to facilitate further processing
            if (requiredOutputFields.needAllFields())
            {
                List<Pair<Integer, Integer>> outputList = new ArrayList<Pair<Integer, Integer>>(); 
                for (int j=0;j<rlo.getSchema().size();j++)
                    outputList.add(new Pair<Integer, Integer>(0, j));
                requiredOutputFields = new RequiredFields(outputList);
                for (int i=0;i<requiredOutputFields.size();i++)
                    requiredOutputFields.setMapKeysInfo(i, new MapKeysInfo(true));
            }
            
            if (requiredOutputFields.getFields()==null)
            {
                int errCode = 2184;
                String msg = "Fields list inside RequiredFields is null.";
                throw new OptimizerException(msg, errCode, PigException.BUG);
            }
            
            for (int i=0;i<requiredOutputFields.size();i++)
            {
                Pair<Integer, Integer> requiredOutputField = requiredOutputFields.getField(i);
                MapKeysInfo outputMapKeysInfo = requiredOutputFields.getMapKeysInfo(i);


                List<RequiredFields> relevantFieldsList = rlo.getRelevantInputs(requiredOutputField.first, requiredOutputField.second);
                
                // We do not have any relevant input fields for this output, continue to next output 
                if (relevantFieldsList==null)
                    continue;
                
                for (int j=0;j<relevantFieldsList.size();j++)
                {
                    RequiredFields relevantFields = relevantFieldsList.get(j);
                    
                    if (relevantFields!=null && relevantFields.needAllFields())
                    {
                        requiredInputFieldsList.set(j, new RequiredFields(true));
                        continue;
                    }
                    
                    // Mapping output map keys to input map keys
                    if (rlo instanceof LOCogroup)
                    {
                        if (j!=0 && relevantFields!=null && !relevantFields.needAllFields())
                        {
                            for (Pair<Integer, Integer> pair : relevantFields.getFields())
                                relevantFields.setMapKeysInfo(pair.first, pair.second, 
                                        new MapKeysInfo(true));
                        }
                    }
                    else if (rlo instanceof LOForEach)
                    {
                        // Relay map keys from output to input
                        LogicalPlan forEachPlan = ((LOForEach)rlo).getRelevantPlan(requiredOutputField.second);
                        if (relevantFields.getFields()!=null && relevantFields.getFields().size()!=0)
                        {
                            int index = ((LOForEach)rlo).getForEachPlans().indexOf(forEachPlan);
                            // We check if the field get flattened, if it does, then we do not relay output map keys to input map keys.
                            // There are two situations:
                            // 1. input column is tuple, bag, or other simple type, there is no concept of map key, so we do not relay
                            // 2. input column is map, flatten does not do anything, we can still relay
                            boolean nonflatten = false;
                            if (!((LOForEach)rlo).getFlatten().get(index))
                            {
                                nonflatten = true;
                            }
                            else
                            {
                                // Foreach plan is flattened, check if there is only one input for this foreach plan 
                                // and input schema for that input is not map, if so, it is a dummy flatten 
                                if (forEachPlan.getRoots().size()==1 && forEachPlan.getRoots().get(0) instanceof LOProject)
                                {
                                    LOProject loProj = (LOProject)forEachPlan.getRoots().get(0);
                                    if (loProj.getExpression().getSchema()!=null &&
                                            loProj.getExpression().getSchema().getField(loProj.getCol()).type!=DataType.BAG)
                                        nonflatten = true;
                                }
                            }
                            if (nonflatten && outputMapKeysInfo!=null && isSimpleProjectCast(forEachPlan))
                            {
                                Pair<Integer, Integer> inputColumn = relevantFields.getFields().get(0);
                                relevantFields.setMapKeysInfo(inputColumn.first, inputColumn.second, outputMapKeysInfo);
                            }
                        }
                        
                        // Collect required map keys in foreach plan here.
                        // This is the only logical operator that we collect map keys 
                        // which are introduced by the operator here.
                        // For all other logical operators, it is attached to required fields
                        // of that logical operator, will process in required fields processing
                        // section
                        for (Pair<Integer, Integer> relevantField : relevantFields.getFields())
                        {
                            MapKeysInfo mapKeysInfo = getMapKeysInPlan(forEachPlan, relevantField.second);
                            relevantFields.mergeMapKeysInfo(0, relevantField.second, mapKeysInfo);
                        }
                    }
                    else
                    {
                        // For all other logical operators, we have one output column mapping to one or more input column.
                        // We copy the output map keys from the output column to the according input column
                        if (relevantFields!=null && relevantFields.getFields()!=null && outputMapKeysInfo!=null)
                        {
                            for (Pair<Integer, Integer> pair : relevantFields.getFields())
                                relevantFields.setMapKeysInfo(pair.first, pair.second, 
                                        outputMapKeysInfo);
                        }
                    }
                    
                    // Now we aggregate the input columns of this output column to the required input columns
                    if (requiredInputFieldsList.get(j)==null)
                        requiredInputFieldsList.set(j, relevantFields);
                    else
                    {
                        requiredInputFieldsList.get(j).merge(relevantFields);
                    }
                }
            }


                
            // Merge with required input fields of this logical operator.
            // RequiredInputFields come from two sources, one is mapping from required output to input, 
            // the other is from the operator itself. Here we use getRequiredFields to get the second part,
            // and merge with the first part
            List<RequiredFields> requiredFieldsListOfLOOp;
            
            // For LOForEach, requiredFields is not really required fields. Here required fields means the input
            // fields required by the entire output columns, such as filter condition in LOFilter, group columns in LOCoGroup.
            // For LOForEach, output columns are generated by the foreach plan it belongs to, there is nothing globally required.
            // So we need to fix the semantic gap here. If the operator is LOForEach, requiredFields is null.
            // For LOCross/LOUnion, actually we do not require any field here
            if (rlo instanceof LOForEach || rlo instanceof LOCross || rlo instanceof LOUnion)
                requiredFieldsListOfLOOp = null;
            else
                requiredFieldsListOfLOOp = rlo.getRequiredFields();
            
            if (requiredFieldsListOfLOOp!=null)
            {
                for (int i=0;i<requiredFieldsListOfLOOp.size();i++)
                {
                    RequiredFields requiredFieldsOfLOOp = requiredFieldsListOfLOOp.get(i);
                    if (requiredInputFieldsList.get(i)==null)
                        requiredInputFieldsList.set(i, requiredFieldsOfLOOp);
                    else
                    {
                        requiredInputFieldsList.get(i).merge(requiredFieldsOfLOOp);
                    }
                }
                
                // Collect required map keys of this operator
                // Cases are:
                // 1. Single predecessor: LOFilter, LOSplitOutput, LOSort
                // 2. Multiple predecessors: LOJoin
                // 3. LOForEach do not have operator-wise required fields, we
                //    have already processed it
                // 4. LOCogroup require all map keys (even if we cogroup by a0#'k1', a0 itself will be in bag a
                //    and we have no way to figure out which keys are referenced for a0. So we do not process it and
                //    simply require all map keys)
                // 5. Other operators do not have required fields, no need to process
                if (rlo instanceof LOFilter || rlo instanceof LOSplitOutput || rlo instanceof LOSort)
                {
                    List<LogicalPlan> innerPlans = new ArrayList<LogicalPlan>();
                    if (rlo instanceof LOFilter)
                    {
                        innerPlans.add(((LOFilter)rlo).getComparisonPlan());
                    }
                    else if (rlo instanceof LOSplitOutput)
                    {
                        innerPlans.add(((LOSplitOutput)rlo).getConditionPlan());
                    }
                    else if (rlo instanceof LOSort)
                    {
                        innerPlans.addAll(((LOSort)rlo).getSortColPlans());
                    }
                    for (LogicalPlan p : innerPlans)
                    {
                        for (RequiredFields rf : requiredFieldsListOfLOOp)
                        {
                            if (rf.getFields()==null)
                                continue;
                            for (Pair<Integer, Integer> pair : rf.getFields())
                            {
                                MapKeysInfo mapKeysInfo = getMapKeysInPlan(p, pair.second);
                                if (mapKeysInfo!=null && !mapKeysInfo.needAllKeys() && mapKeysInfo.getKeys()!=null)
                                    requiredInputFieldsList.get(0).mergeMapKeysInfo(0, pair.second, 
                                            mapKeysInfo);
                            }
                        }
                    }
                }
                else if (rlo instanceof LOJoin)
                {
                    for (int i=0;i<predecessors.size();i++)
                    {
                        Collection<LogicalPlan> joinPlans = ((LOJoin)rlo).getJoinPlans().get(predecessors.get(i));
                        if (joinPlans==null)
                            continue;
                        for (LogicalPlan p : joinPlans)
                        {
                            RequiredFields rf = requiredFieldsListOfLOOp.get(i);
                            if (rf.getFields()==null)
                                continue;
                            for (Pair<Integer, Integer> pair : rf.getFields())
                            {
                                MapKeysInfo mapKeysInfo = getMapKeysInPlan(p, pair.second);
                                if (mapKeysInfo!=null && !mapKeysInfo.needAllKeys() && mapKeysInfo.getKeys()!=null)
                                    requiredInputFieldsList.get(i).mergeMapKeysInfo(i, pair.second, 
                                            mapKeysInfo);
                            }
                        }
                    }
                }
            }
            
            // Now we finish the current logical operator, we need to process next logical operator. There are two cases:
            // 1. If the predecessor is LOForEach or LOSplit, we put requiredOutputFieldsList into cache and exit, the optimizer
            //    will invoke transform() on LOForEach or LOSplit and continue to process
            // 2. If the predecessor is otherwise, we then recursively collect required fields for the predecessor
            for (int i=0;i<predecessors.size();i++)
            {
              RelationalOperator predecessor = (RelationalOperator)predecessors.get(i);
                
                List<RequiredFields> newRequiredOutputFieldsList = new ArrayList<RequiredFields>();
              
                // In this optimization, we only prune columns and do not change structure of logical plan
                // So if we do not require anything from the input, we change it to require the first field

View Full Code Here

        }
        List<Pair<Integer, Integer>> columnsPruned = new ArrayList<Pair<Integer, Integer>>();
        List<Pair<Integer, Integer>> columnsToPrune = new ArrayList<Pair<Integer, Integer>>();
        for (int i=0;i<predecessors.size();i++)
        {
            RelationalOperator predecessor = (RelationalOperator)predecessors.get(i);
            if (prunedColumnsMap.containsKey(predecessor))
            {
                List<Pair<Integer, Integer>> predColumnsToPrune = prunedColumnsMap.get(predecessor);
                if (predColumnsToPrune!=null)
                {

View Full Code Here

TOP

Related Classes of org.apache.pig.impl.logicalLayer.RelationalOperator

org.apache.pig.impl.logicalLayer.ColumnPruner

org.apache.pig.impl.logicalLayer.optimizer.PruneColumns

org.apache.pig.impl.plan.OperatorKey

org.apache.pig.impl.plan.ProjectionMap.Column

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.