Package org.apache.pig.newplan.logical.relational

Examples of org.apache.pig.newplan.logical.relational.LogicalSchema


                    if (!((ProjectExpression)op).isProjectStar()) {
                        long uid = ((ProjectExpression)op).getFieldSchema().uid;
                        uids.add(uid);
                    } else {
                        LogicalRelationalOperator ref = ((ProjectExpression)op).findReferent();
                        LogicalSchema s = ref.getSchema();
                        if (s == null) {
                            throw new SchemaNotDefinedException("Schema not defined for " + ref.getAlias());
                        }
                        for(LogicalFieldSchema f: s.getFields()) {
                            uids.add(f.uid);
                        }
                    }
                }
            }
View Full Code Here


        private Set<Long> setOutputUids(LogicalRelationalOperator op) throws FrontendException {
           
            List<Operator> ll = plan.getSuccessors(op);
            Set<Long> uids = new HashSet<Long>();
           
            LogicalSchema s = op.getSchema();
            if (s == null) {
                throw new SchemaNotDefinedException("Schema for " + op.getName() + " is not defined.");
            }
                           
            if (ll != null) {
                // if this is not sink, the output uids are union of input uids of its successors
                for(Operator succ: ll) {
                    Set<Long> inputUids = (Set<Long>)succ.getAnnotation(INPUTUIDS);
                    if (inputUids != null) {
                        Iterator<Long> iter = inputUids.iterator();
                        while(iter.hasNext()) {
                            long uid = iter.next();
                           
                            if (s.findField(uid) != -1) {
                                uids.add(uid);
                            }
                        }
                    }
                }
            } else {
                // if  it's leaf, set to its schema               
                for(int i=0; i<s.size(); i++) {
                    uids.add(s.getField(i).uid);
                }                               
            }
           
            op.annotate(OUTPUTUIDS, uids);
            return uids;
View Full Code Here

                //predecessors haven't been setup, return null
                return null;
            }
        }

        LogicalSchema inputSchema = new LogicalSchema();
        List<Operator> succs = plan.getSuccessors(this);

        if (succs!=null) {
            for(Operator lo : succs){
                if (((LogicalExpression)lo).getFieldSchema()==null) {
                    inputSchema = null;
                    break;
                }
                inputSchema.addField(((LogicalExpression)lo).getFieldSchema());
            }
        }

        // Since ef only set one time, we never change its value, so we can optimize it by instantiate only once.
        // This significantly optimize the performance of frontend (PIG-1738)
View Full Code Here

        // store D into 'whatever';
       
        // A = load
        LogicalPlan lp = new LogicalPlan();
        {
          LogicalSchema aschema = new LogicalSchema();
          aschema.addField(new LogicalSchema.LogicalFieldSchema(
              "x", null, DataType.BYTEARRAY));
          aschema.addField(new LogicalSchema.LogicalFieldSchema(
              "y", null, DataType.BYTEARRAY));
          LOLoad A = new LOLoad(new FileSpec("bla", new FuncSpec("PigStorage", "\t")), aschema, lp, null);
          A.setAlias("A");
          lp.add(A);
         
          // B = load
          LogicalSchema bschema = new LogicalSchema();
          bschema.addField(new LogicalSchema.LogicalFieldSchema(
              "a", null, DataType.BYTEARRAY));
          bschema.addField(new LogicalSchema.LogicalFieldSchema(
              "b", null, DataType.BYTEARRAY));
          LOLoad B = new LOLoad(new FileSpec("morebla", new FuncSpec("PigStorage", "\t")), bschema, lp, null);
          B.setAlias("B");
          lp.add(B);
         
          // C = join
          LogicalSchema cschema = new LogicalSchema();
          cschema.addField(new LogicalSchema.LogicalFieldSchema(
              "x", null, DataType.BYTEARRAY));
          cschema.addField(new LogicalSchema.LogicalFieldSchema(
              "y", null, DataType.BYTEARRAY));
          cschema.addField(new LogicalSchema.LogicalFieldSchema(
              "a", null, DataType.BYTEARRAY));
          cschema.addField(new LogicalSchema.LogicalFieldSchema(
              "b", null, DataType.BYTEARRAY));
          MultiMap<Integer, LogicalExpressionPlan> mm =
                new MultiMap<Integer, LogicalExpressionPlan>();
          LogicalExpressionPlan aprojplan = new LogicalExpressionPlan();
          LOJoin C = new LOJoin(lp, mm, JOINTYPE.HASH, new boolean[] {true, true});
          new ProjectExpression(aprojplan, 0, 0, C);
          LogicalExpressionPlan bprojplan = new LogicalExpressionPlan();
          new ProjectExpression(bprojplan, 1, 0, C);
          mm.put(0, aprojplan);
          mm.put(1, bprojplan);
         
          C.setAlias("C");
          lp.add(C);
          lp.connect(A, C);
          lp.connect(B, C);
       
          // D = filter
          LogicalExpressionPlan filterPlan = new LogicalExpressionPlan();
          LOFilter D = new LOFilter(lp, filterPlan);
          ProjectExpression fx = new ProjectExpression(filterPlan, 0, 0, D);
          ConstantExpression fc0 = new ConstantExpression(filterPlan, new Integer(0), new LogicalFieldSchema(null, null, DataType.BYTEARRAY));
          EqualExpression eq1 = new EqualExpression(filterPlan, fx, fc0);
          ProjectExpression fanotherx = new ProjectExpression(filterPlan, 0, 0, D);
          ProjectExpression fa = new ProjectExpression(filterPlan, 0, 2, D);
          EqualExpression eq2 = new EqualExpression(filterPlan, fanotherx, fa);
          AndExpression and1 = new AndExpression(filterPlan, eq1, eq2);
          ProjectExpression fb = new ProjectExpression(filterPlan, 0, 3, D);
          ConstantExpression fc1 = new ConstantExpression(filterPlan, new Integer(1),new LogicalFieldSchema(null, null, DataType.BYTEARRAY));
          EqualExpression eq3 = new EqualExpression(filterPlan, fb, fc1);
          AndExpression and2 = new AndExpression(filterPlan, and1, eq3);
          ProjectExpression fanotherb = new ProjectExpression(filterPlan, 0, 3, D);
          ProjectExpression fy = new ProjectExpression(filterPlan, 0, 1, D);
          EqualExpression eq4 = new EqualExpression(filterPlan, fy, fanotherb);
          new AndExpression(filterPlan, and2, eq4);
         
          D.setAlias("D");
          // Connect D to B, since the transform has happened.
          lp.add(D);
          lp.connect(C, D);
        }
       
        System.out.println(lp);
        LogicalPlanOptimizer optimizer = new LogicalPlanOptimizer(lp, 500, null);
        optimizer.optimize();
       
        LogicalPlan expected = new LogicalPlan();
        {
            // A = load
          LogicalSchema aschema = new LogicalSchema();
          aschema.addField(new LogicalSchema.LogicalFieldSchema(
              "x", null, DataType.BYTEARRAY));
          aschema.addField(new LogicalSchema.LogicalFieldSchema(
              "y", null, DataType.BYTEARRAY));
          LOLoad A = new LOLoad(new FileSpec("bla", new FuncSpec("PigStorage", "\t")), aschema, expected, null);
          expected.add(A);
         
          // DA = filter
          LogicalExpressionPlan DAfilterPlan = new LogicalExpressionPlan();
          LOFilter DA = new LOFilter(expected, DAfilterPlan);
          ProjectExpression fx = new ProjectExpression(DAfilterPlan, 0, 0, DA);
          fx.neverUseForRealSetFieldSchema(new LogicalFieldSchema(null, null, DataType.BYTEARRAY));
          ConstantExpression fc0 = new ConstantExpression(DAfilterPlan, new Integer(0), new LogicalFieldSchema(null, null, DataType.BYTEARRAY));
          new EqualExpression(DAfilterPlan, fx, fc0);
         
          DA.neverUseForRealSetSchema(aschema);
          expected.add(DA);
          expected.connect(A, DA);
         
          // B = load
          LogicalSchema bschema = new LogicalSchema();
          bschema.addField(new LogicalSchema.LogicalFieldSchema(
              "a", null, DataType.BYTEARRAY));
          bschema.addField(new LogicalSchema.LogicalFieldSchema(
              "b", null, DataType.BYTEARRAY));
          LOLoad B = new LOLoad(new FileSpec("morebla", new FuncSpec("PigStorage", "\t")), bschema, expected, null);
          expected.add(B);
         
          // DB = filter
          LogicalExpressionPlan DBfilterPlan = new LogicalExpressionPlan();
            LOFilter DB = new LOFilter(expected, DBfilterPlan);
          ProjectExpression fb = new ProjectExpression(DBfilterPlan, 0, 1, DB);
          fb.neverUseForRealSetFieldSchema(new LogicalFieldSchema(null, null, DataType.BYTEARRAY));
          ConstantExpression fc1 = new ConstantExpression(DBfilterPlan, new Integer(1), new LogicalFieldSchema(null, null, DataType.BYTEARRAY));
          new EqualExpression(DBfilterPlan, fb, fc1);

          DB.neverUseForRealSetSchema(bschema);
          expected.add(DB);
          expected.connect(B, DB);
         
          // C = join
          LogicalSchema cschema = new LogicalSchema();
          cschema.addField(new LogicalSchema.LogicalFieldSchema(
              "D::x", null, DataType.BYTEARRAY));
          cschema.addField(new LogicalSchema.LogicalFieldSchema(
              "D::y", null, DataType.BYTEARRAY));
          cschema.addField(new LogicalSchema.LogicalFieldSchema(
              "null::a", null, DataType.BYTEARRAY));
          cschema.addField(new LogicalSchema.LogicalFieldSchema(
              "null::b", null, DataType.BYTEARRAY));
          cschema.getField(0).uid = 1;
          cschema.getField(1).uid = 2;
          cschema.getField(2).uid = 3;
          cschema.getField(3).uid = 4;
          LogicalExpressionPlan aprojplan = new LogicalExpressionPlan();
            MultiMap<Integer, LogicalExpressionPlan> mm =
                new MultiMap<Integer, LogicalExpressionPlan>();
            LOJoin C = new LOJoin(expected, mm, JOINTYPE.HASH, new boolean[] {true, true});
View Full Code Here

                    op = iter.next();
                    if( op instanceof ProjectExpression ) {
                        ProjectExpression proj = (ProjectExpression)op;
                        if( proj.isProjectStar() ) {
                            LogicalRelationalOperator pred = (LogicalRelationalOperator)filter.getPlan().getPredecessors(filter).get(0);
                            LogicalSchema predSchema = pred.getSchema();
                            if (predSchema!=null) {
                                for (int i=0;i<predSchema.size();i++) {
                                    uids.add(predSchema.getField(i).uid);
                                    types.add(predSchema.getField(i).type);
                                }
                            }
                        } else {
                            uids.add(proj.getFieldSchema().uid);
                            types.add(proj.getFieldSchema().type);
View Full Code Here

         * @param uids Uids to check for
         * @return true if given LogicalRelationalOperator has all the given uids
         */
        private boolean hasAll(LogicalRelationalOperator op, Pair<List<Long>,
                List<Byte>> uidWithTypes) throws FrontendException {
            LogicalSchema schema = op.getSchema();
           
            if (schema==null)
                return false;
           
            List<Long> uids = uidWithTypes.first;
            List<Byte> types = uidWithTypes.second;
           
            for (int i=0;i<uids.size();i++) {
                boolean found = false;
                for (LogicalSchema.LogicalFieldSchema fs : schema.getFields()) {
                    if (fs.uid==uids.get(i) && fs.type==types.get(i))
                        found = true;
                }
                if (!found)
                    return false;
View Full Code Here

            }
        }
       
        // check if a relational operator contains all of the specified uids
        private boolean hasAll(LogicalRelationalOperator op, Set<Long> uids) throws FrontendException {
            LogicalSchema schema = op.getSchema();
            for(long uid: uids) {
                if (schema.findField(uid) == -1) {
                    return false;
                }
            }
           
            return true;
View Full Code Here

                return;
            }
            else {
                // Get the schema for this operator and search it for the matching uid
                int match = -1;
                LogicalSchema schema = pred.getSchema();
                if (schema==null)
                    return;
                List<LogicalSchema.LogicalFieldSchema> fields = schema.getFields();
                for (int i = 0; i < fields.size(); i++) {
                    if (fields.get(i).uid == myUid) {
                        match = i;
                        break;
                    }
View Full Code Here

       
        // First check if we have a load with a map in it or not
        List<Operator> sources = currentPlan.getSources();
       
        for( Operator source : sources ) {
            LogicalSchema schema = ((LogicalRelationalOperator)source).getSchema();
            // If any of the loads has a null schema we dont know the ramifications here
            // so we skip this optimization
            if( schema == null ) {
                return false;
            }
        }
                   
        // Now we check what keys are needed
        MapMarker marker = new MapMarker(currentPlan);
        marker.visit();
       
        // If the uid is the input uid of LOStore, LOCogroup, LOUnion, UserFunc, that means
        // the entire map may be used. For simplicity, we do not prune any map key in this case
        Set<Long> fullMapUids = new HashSet<Long>();
        FullMapCollector collector = new FullMapCollector(currentPlan, fullMapUids);
        collector.visit();
       
        // If we have found specific keys which are needed then we return true;
        // Else if we dont have any specific keys we return false
        boolean hasAnnotation = false;
        for( Operator source : sources ) {
            Map<Integer,Set<String>> annotationValue =
                (Map<Integer, Set<String>>) ((LogicalRelationalOperator)source).getAnnotation(REQUIRED_MAPKEYS);
           
            // Now for all full maps found in sinks we cannot prune them at source
            if( ! fullMapUids.isEmpty() && annotationValue != null &&
                    !annotationValue.isEmpty() ) {
                Integer[] annotationKeyArray = annotationValue.keySet().toArray( new Integer[0] );
                LogicalSchema sourceSchema = ((LogicalRelationalOperator)source).getSchema();
                for( Integer col : annotationKeyArray ) {                 
                    if( fullMapUids.contains(sourceSchema.getField(col).uid)) {
                        annotationValue.remove( col );
                    }
                }
            }
           
View Full Code Here

    public LogicalSchema.LogicalFieldSchema getFieldSchema() throws FrontendException {
        if (fieldSchema!=null)
            return fieldSchema;
        LogicalRelationalOperator referent = findReferent();
       
        LogicalSchema schema = referent.getSchema();
       
        if (attachedRelationalOp instanceof LOGenerate && plan.getSuccessors(this)==null) {
            if (!(findReferent() instanceof LOInnerLoad)||
                    ((LOInnerLoad)findReferent()).sourceIsBag()) {
                String alias = findReferent().getAlias();
                List<LOInnerLoad> innerLoads = LOForEach.findReacheableInnerLoadFromBoundaryProject(this);
               
                // pull tuple information from innerload
                if (innerLoads.get(0).getProjection().getFieldSchema().schema!=null &&
                        innerLoads.get(0).getProjection().getFieldSchema().schema.isTwoLevelAccessRequired()) {
                    LogicalFieldSchema originalTupleFieldSchema = innerLoads.get(0).getProjection().getFieldSchema().schema.getField(0);
                    LogicalFieldSchema newTupleFieldSchema = new LogicalFieldSchema(originalTupleFieldSchema.alias,
                            schema, DataType.TUPLE);
                    newTupleFieldSchema.uid = originalTupleFieldSchema.uid;
                    LogicalSchema newTupleSchema = new LogicalSchema();
                    newTupleSchema.setTwoLevelAccessRequired(true);
                    newTupleSchema.addField(newTupleFieldSchema);
                    fieldSchema = new LogicalSchema.LogicalFieldSchema(alias, newTupleSchema, DataType.BAG);
                }
                else {
                    fieldSchema = new LogicalSchema.LogicalFieldSchema(alias, schema, DataType.BAG);
                }
                fieldSchema.uid = innerLoads.get(0).getProjection().getFieldSchema().uid;
            }
            else {
                if (findReferent().getSchema()!=null)
                    fieldSchema = findReferent().getSchema().getField(0);
            }
            if (fieldSchema!=null)
                uidOnlyFieldSchema = fieldSchema.mergeUid(uidOnlyFieldSchema);
        }
        else {
            if (schema == null) {
                fieldSchema = new LogicalSchema.LogicalFieldSchema(null, null, DataType.BYTEARRAY);
                uidOnlyFieldSchema = fieldSchema.mergeUid(uidOnlyFieldSchema);
            }
            else {
                int index = -1;
                if (!isProjectStar() && uidOnlyFieldSchema!=null) {
                    long uid = uidOnlyFieldSchema.uid;
                    for (int i=0;i<schema.size();i++) {
                        LogicalFieldSchema fs = schema.getField(i);
                        if (fs.uid==uid) {
                            index = i;
                        }
                    }
                }
                if (index==-1)
                    index = col;
               
                if (!isProjectStar()) {
                    if (schema!=null && schema.size()>index)
                        fieldSchema = schema.getField(index);
                    else
                        fieldSchema = new LogicalSchema.LogicalFieldSchema(null, null, DataType.BYTEARRAY);
                    uidOnlyFieldSchema = fieldSchema.cloneUid();
                }
                else {
                    LogicalSchema newTupleSchema = null;
                    if (schema!=null)
                        newTupleSchema = schema.deepCopy();
                    fieldSchema = new LogicalSchema.LogicalFieldSchema(null, newTupleSchema, DataType.TUPLE);
                    uidOnlyFieldSchema = fieldSchema.mergeUid(uidOnlyFieldSchema);
                }
View Full Code Here

TOP

Related Classes of org.apache.pig.newplan.logical.relational.LogicalSchema

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.