Package org.apache.pig.newplan.logical.relational

Examples of org.apache.pig.newplan.logical.relational.LogicalSchema


        Pair<Map<Integer,Set<String>>,Set<Integer>> required =
            requiredItems.get(load);
       
        RequiredFieldList requiredFields = new RequiredFieldList();

        LogicalSchema s = load.getSchema();
        for (int i=0;i<s.size();i++) {
            RequiredField requiredField = null;
            // As we have done processing ahead, we assume that
            // a column is not present in both ColumnPruner and
            // MapPruner
            if( required.first != null && required.first.containsKey(i) ) {
                requiredField = new RequiredField();
                requiredField.setIndex(i);
                requiredField.setType(s.getField(i).type);
                List<RequiredField> subFields = new ArrayList<RequiredField>();
                for( String key : required.first.get(i) ) {
                    RequiredField subField = new RequiredField(key,-1,null,DataType.BYTEARRAY);
                    subFields.add(subField);
                }
                requiredField.setSubFields(subFields);
                requiredFields.add(requiredField);
            }
            if( required.second != null && required.second.contains(i) ) {
                requiredField = new RequiredField();
                requiredField.setIndex(i);
                requiredField.setType(s.getField(i).type);     
                requiredFields.add(requiredField);
            }
        }
       
        boolean[] columnRequired = new boolean[s.size()];
        for (RequiredField rf : requiredFields.getFields())
            columnRequired[rf.getIndex()] = true;
       
        List<Pair<Integer, Integer>> pruneList = new ArrayList<Pair<Integer, Integer>>();
        for (int i=0;i<columnRequired.length;i++)
        {
            if (!columnRequired[i])
                pruneList.add(new Pair<Integer, Integer>(0, i));
        }
        StringBuffer message = new StringBuffer();
        if (pruneList.size()!=0)
        {
            message.append("Columns pruned for " + load.getAlias() + ": ");
            for (int i=0;i<pruneList.size();i++)
            {
                message.append("$"+pruneList.get(i).second);
                if (i!=pruneList.size()-1)
                    message.append(", ");
            }
            log.info(message);
        }
       
        message = new StringBuffer();
        for(RequiredField rf: requiredFields.getFields()) {
            List<RequiredField> sub = rf.getSubFields();
            if (sub != null) {
                message.append("Map key required for " + load.getAlias() + ": $" + rf.getIndex() + "->" + sub + "\n");
            }
        }
        if (message.length()!=0)
            log.info(message);
       
        LoadPushDown.RequiredFieldResponse response = null;
        try {
            LoadFunc loadFunc = load.getLoadFunc();
            if (loadFunc instanceof LoadPushDown) {
                response = ((LoadPushDown)loadFunc).pushProjection(requiredFields);
            }
                               
        } catch (FrontendException e) {
            log.warn("pushProjection on "+load+" throw an exception, skip it");
        }                     
       
        // Loader does not support column pruning, insert foreach     
        if (columnPrune) {
            if (response==null || !response.getRequiredFieldResponse()) {
                LogicalPlan p = (LogicalPlan)load.getPlan();                       
                Operator next = p.getSuccessors(load).get(0);
                // if there is already a LOForEach after load, we don't need to
                // add another LOForEach
                if (next instanceof LOForEach) {
                    return;
                }
               
                LOForEach foreach = new LOForEach(load.getPlan());
               
                // add foreach to the base plan                      
                p.add(foreach);
                              
                p.insertBetween(load, foreach, next);
               
                LogicalPlan innerPlan = new LogicalPlan();
                foreach.setInnerPlan(innerPlan);
               
                // build foreach inner plan
                List<LogicalExpressionPlan> exps = new ArrayList<LogicalExpressionPlan>();             
                LOGenerate gen = new LOGenerate(innerPlan, exps, new boolean[requiredFields.getFields().size()]);
                innerPlan.add(gen);
               
                for (int i=0; i<requiredFields.getFields().size(); i++) {
                    LoadPushDown.RequiredField rf = requiredFields.getFields().get(i);
                    LOInnerLoad innerLoad = new LOInnerLoad(innerPlan, foreach, rf.getIndex());                   
                    innerPlan.add(innerLoad);         
                    innerPlan.connect(innerLoad, gen);
                   
                    LogicalExpressionPlan exp = new LogicalExpressionPlan();
                    ProjectExpression prj = new ProjectExpression(exp, i, -1, gen);
                    exp.add(prj);
                    exps.add(exp);
                }               
              
            } else {
                // columns are pruned, reset schema for LOLoader
                List<Integer> requiredIndexes = new ArrayList<Integer>();
                List<LoadPushDown.RequiredField> fieldList = requiredFields.getFields();
                for (int i=0; i<fieldList.size(); i++) {                   
                    requiredIndexes.add(fieldList.get(i).getIndex());
                }

                load.setRequiredFields(requiredIndexes);
               
                LogicalSchema newSchema = new LogicalSchema();
                for (int i=0; i<fieldList.size(); i++) {                   
                    newSchema.addField(s.getField(fieldList.get(i).getIndex()));
                }
               
                load.setSchema(newSchema);
            }
        }
View Full Code Here


    // Add ForEach after op to prune unnecessary columns
    @SuppressWarnings("unchecked")
    private void addForEachIfNecessary(LogicalRelationalOperator op) throws FrontendException {
        Set<Long> outputUids = (Set<Long>)op.getAnnotation(ColumnPruneHelper.OUTPUTUIDS);
        if (outputUids!=null) {
            LogicalSchema schema = op.getSchema();
            Set<Integer> columnsToDrop = new HashSet<Integer>();
           
            for (int i=0;i<schema.size();i++) {
                if (!outputUids.contains(schema.getField(i).uid))
                    columnsToDrop.add(i);
            }
           
            if (!columnsToDrop.isEmpty()) {
                LOForEach foreach = Util.addForEachAfter((LogicalPlan)op.getPlan(), op, 0, columnsToDrop);
View Full Code Here

            }
            return mappedKeys;
        }

        private void setupColNameMaps() throws FrontendException {
            LogicalSchema loLoadSchema = loLoad.getSchema();
            LogicalSchema loadFuncSchema = loLoad.getDeterminedSchema();
             for(int i = 0; i < loadFuncSchema.size(); i++) {
                colNameMap.put(loadFuncSchema.getField(i).alias,
                        (i < loLoadSchema.size() ? loLoadSchema.getField(i).alias :
                            loadFuncSchema.getField(i).alias));
               
                reverseColNameMap.put((i < loLoadSchema.size() ? loLoadSchema.getField(i).alias :
                            loadFuncSchema.getField(i).alias),
                            loadFuncSchema.getField(i).alias);
            }
        }
View Full Code Here

   
    public class TypeCastInserterTransformer extends Transformer {
        @Override
        public boolean check(OperatorPlan matched) throws FrontendException {
            LogicalRelationalOperator op = (LogicalRelationalOperator)matched.getSources().get(0);
            LogicalSchema s = op.getSchema();
            if (s == null) return false;
   
            if (op instanceof LOLoad) {
                if (((LOLoad)op).isCastInserted()) return false;
            }
            else {
                if (((LOStream)op).isCastInserted()) return false;
            }
           
            boolean sawOne = false;
            List<LogicalSchema.LogicalFieldSchema> fss = s.getFields();
            LogicalSchema determinedSchema = null;
            if(LOLoad.class.getName().equals(getOperatorClassName())) {
                determinedSchema = ((LOLoad)op).getDeterminedSchema();
            }
            for (int i = 0; i < fss.size(); i++) {
                if (fss.get(i).type != DataType.BYTEARRAY) {
                    if(determinedSchema == null ||
                            (!fss.get(i).isEqual(determinedSchema.getField(i)))) {
                            // Either no schema was determined by loader OR the type
                            // from the "determinedSchema" is different
                            // from the type specified - so we need to cast
                            sawOne = true;
                        }
View Full Code Here

        }

        @Override
        public void transform(OperatorPlan matched) throws FrontendException {
            LogicalRelationalOperator op = (LogicalRelationalOperator)matched.getSources().get(0);
            LogicalSchema s = op.getSchema();
            // For every field, build a logical plan.  If the field has a type
            // other than byte array, then the plan will be cast(project).  Else
            // it will just be project.
            LogicalPlan innerPlan = new LogicalPlan();
           
            LOForEach foreach = new LOForEach(currentPlan);
            foreach.setInnerPlan(innerPlan);
            foreach.setAlias(op.getAlias());
           
            // Insert the foreach into the plan and patch up the plan.
            Operator next = currentPlan.getSuccessors(op).get(0);
            currentPlan.insertBetween(op, foreach, next);
           
            List<LogicalExpressionPlan> exps = new ArrayList<LogicalExpressionPlan>();
            LOGenerate gen = new LOGenerate(innerPlan, exps, new boolean[s.size()]);
            innerPlan.add(gen);
           
            // if we are inserting casts in a load and if the loader
            // implements determineSchema(), insert casts only where necessary
            // Note that in this case, the data coming out of the loader is not
            // a BYTEARRAY but is whatever determineSchema() says it is.
            LogicalSchema determinedSchema = null;
            if(LOLoad.class.getName().equals(getOperatorClassName())) {
                determinedSchema = ((LOLoad)op).getDeterminedSchema();
            }
            else {
                determinedSchema = new LogicalSchema();
                for (int i=0;i<s.size();i++) {
                    determinedSchema.addField(new LogicalFieldSchema(null, null, DataType.BYTEARRAY));
                }
            }
            for (int i = 0; i < s.size(); i++) {
                LogicalSchema.LogicalFieldSchema fs = s.getField(i);
               
                LOInnerLoad innerLoad = new LOInnerLoad(innerPlan, foreach, i);
                innerPlan.add(innerLoad);         
                innerPlan.connect(innerLoad, gen);
               
                LogicalExpressionPlan exp = new LogicalExpressionPlan();
               
                ProjectExpression prj = new ProjectExpression(exp, i, 0, gen);
                exp.add(prj);
               
                if (fs.type != DataType.BYTEARRAY && (determinedSchema == null || (!fs.isEqual(determinedSchema.getField(i))))) {
                    // Either no schema was determined by loader OR the type
                    // from the "determinedSchema" is different
                    // from the type specified - so we need to cast
                    CastExpression cast = new CastExpression(exp, prj, new LogicalSchema.LogicalFieldSchema(fs));
                    exp.add(cast);
View Full Code Here

        List<Operator> ll = subPlan.getSources();
        boolean found = false;
        for(Operator op: ll) {
            if (op instanceof LOLoad) {
                Set<Long> uids = (Set<Long>)op.getAnnotation(INPUTUIDS);
                LogicalSchema s = ((LOLoad) op).getSchema();
                Set<Integer> required = getColumns(s, uids);
               
                if (required.size() < s.size()) {
                    op.annotate(REQUIREDCOLS, required);             
                    found = true;
                }
            }
        }
View Full Code Here

        public void visit(LOStore store) throws FrontendException {
            Set<Long> output = setOutputUids(store);           
           
            if (output.isEmpty()) {
                // to deal with load-store-load-store case
                LogicalSchema s = store.getSchema();
                if (s == null) {
                    throw new SchemaNotDefinedException("Schema for " + store.getName() + " is not defined.");
                }
                               
                for(int i=0; i<s.size(); i++) {
                    output.add(s.getField(i).uid);
                }                                               
            }       
           
            // for store, input uids are same as output uids
            store.annotate(INPUTUIDS, output);
View Full Code Here

        @Override
        public void visit(LODistinct distinct) throws FrontendException {
            Set<Long> input = new HashSet<Long>();
           
            // Every field is required
            LogicalSchema s = distinct.getSchema();
            if (s == null) {
                throw new SchemaNotDefinedException("Schema for " + distinct.getName() + " is not defined.");
            }
           
            for(int i=0; i<s.size(); i++) {
                input.add(s.getField(i).uid);
            }                                               
            distinct.annotate(INPUTUIDS, input);
        }
View Full Code Here

            Set<Long> output = setOutputUids(cross);
            // Since we do not change the topology of the plan, we keep
            // at least one input for each predecessor.
            List<Operator> preds = plan.getPredecessors(cross);
            for (Operator pred : preds) {
                LogicalSchema schema = ((LogicalRelationalOperator)pred).getSchema();
                Set<Long> uids = getAllUids(schema);
                boolean allPruned = true;
                for (Long uid : uids) {
                    if (output.contains(uid))
                        allPruned = false;
                }
                if (allPruned)
                    output.add(schema.getField(0).uid);
            }
            cross.annotate(INPUTUIDS, output);
        }
View Full Code Here

             while(iter.hasNext()) {
                 long uid = iter.next();
                 for(int i=0; i<ll.size(); i++) {
                     LogicalExpressionPlan exp = ll.get(i);
                     boolean found = false;
                     LogicalSchema planSchema = gen.getOutputPlanSchemas().get(i);
                     for (LogicalFieldSchema fs : planSchema.getFields()) {
                         if (fs.uid == uid) {
                             found = true;
                             break;
                         }
                     }
View Full Code Here

TOP

Related Classes of org.apache.pig.newplan.logical.relational.LogicalSchema

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.