Examples of org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceOper

Package org.apache.pig.backend.hadoop.executionengine.mapReduceLayer

Examples of org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceOper

org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceOper
An operator model for a Map Reduce job. Acts as a host to the plans that will execute in map, reduce and optionally combine phases. These will be embedded in the MROperPlan in order to capture the dependencies amongst jobs.


            LogicalPlan lp = checkLogicalPlan(1, 2, 14);
            PhysicalPlan pp = checkPhysicalPlan(lp, 1, 2, 20);
            MROperPlan mp = checkMRPlan(pp, 1, 2, 3);


            MapReduceOper mo1 = mp.getRoots().get(0);
            MapReduceOper mo2 = mp.getLeaves().get(0);
            MapReduceOper mo3 = mp.getLeaves().get(1);


            checkPhysicalPlan(mo1.mapPlan, 1, 1, 3);
            checkPhysicalPlan(mo1.reducePlan, 1, 1, 2);
            PhysicalOperator leaf = mo1.reducePlan.getLeaves().get(0);

View Full Code Here


            LogicalPlan lp = checkLogicalPlan(1, 1, 7);
            PhysicalPlan pp = checkPhysicalPlan(lp, 1, 1, 11);
            MROperPlan mp = checkMRPlan(pp, 1, 1, 2);


            MapReduceOper mo1 = mp.getRoots().get(0);
            MapReduceOper mo2 = mp.getLeaves().get(0);


            checkPhysicalPlan(mo1.mapPlan, 1, 1, 3);
            checkPhysicalPlan(mo1.reducePlan, 1, 1, 2);
            PhysicalOperator leaf = mo1.reducePlan.getLeaves().get(0);

View Full Code Here


            LogicalPlan lp = checkLogicalPlan(1, 1, 6);
            PhysicalPlan pp = checkPhysicalPlan(lp, 1, 1, 11);
            MROperPlan mp = checkMRPlan(pp, 1, 1, 2);


            MapReduceOper mo1 = mp.getRoots().get(0);
            MapReduceOper mo2 = mp.getLeaves().get(0);


            checkPhysicalPlan(mo1.mapPlan, 1, 1, 3);
            checkPhysicalPlan(mo1.reducePlan, 1, 1, 2);
            PhysicalOperator leaf = mo1.reducePlan.getLeaves().get(0);

View Full Code Here


            LogicalPlan lp = checkLogicalPlan(1, 3, 5);
            PhysicalPlan pp = checkPhysicalPlan(lp, 1, 3, 10);
            MROperPlan mp = checkMRPlan(pp, 1, 1, 1);


            MapReduceOper mo = mp.getRoots().get(0);


            checkPhysicalPlan(mo.mapPlan, 1, 1, 4);
            PhysicalOperator leaf = mo.mapPlan.getLeaves().get(0);
            
            Assert.assertTrue(leaf instanceof POSplit);

View Full Code Here

            //MROperPlan mrp = checkMRPlan(pp, 1, 1, 1);
            //MapReduceOper mrop = mrp.getRoots().get(0);


            //Instead of 1 merged mapreduce job, there will be two.
            MROperPlan mrp = checkMRPlan(pp, 1, 1, 2);
            MapReduceOper mrop = mrp.getLeaves().get(0);
            Assert.assertTrue(mrop.getCustomPartitioner().equals(SimpleCustomPartitioner.class.getName()));


        } catch (Exception e) {
            e.printStackTrace();
            Assert.fail();
        }

View Full Code Here

            MROperPlan mrp = checkMRPlan(pp, 1, 1, 2);


            // since the first mapreduce job of mrp.getRoots().get(0)
            // is the merge of splitter and splittee without custom partitioner (c2 above),
            // second job should contain the custom partitioner
            MapReduceOper mrop;
            mrop = mrp.getRoots().get(0);
            Assert.assertTrue(mrop.getCustomPartitioner() == null );
            mrop = mrp.getLeaves().get(0);
            Assert.assertTrue(mrop.getCustomPartitioner().equals(SimpleCustomPartitioner.class.getName()));


        } catch (Exception e) {
            e.printStackTrace();
            Assert.fail(e.toString());
        }

View Full Code Here


        // Get the sort job
        Iterator<MapReduceOper> iter = mrPlan.getKeys().values().iterator();
        int counter = 0;
        while (iter.hasNext()) {
            MapReduceOper op = iter.next();
            counter++;
            if (op.isGlobalSort()) {
                assertTrue(op.getRequestedParallelism()==100);
            }
        }
        assertEquals(3, counter);


        pc.defaultParallel = -1;

View Full Code Here


        // Get the skew join job
        Iterator<MapReduceOper> iter = mrPlan.getKeys().values().iterator();
        int counter = 0;
        while (iter.hasNext()) {
            MapReduceOper op = iter.next();
            counter++;
            if (op.isSkewedJoin()) {
                assertTrue(op.getRequestedParallelism()==100);
            }
        }
        assertEquals(3, counter);


        pc.defaultParallel = -1;

View Full Code Here


        // first job uses a single reducer for the sampling
        Util.assertParallelValues(-1, 1, -1, 1, jobControl.getWaitingJobs().get(0).getJobConf());


        // Simulate the first job having run so estimation kicks in.
        MapReduceOper sort = mrPlan.getLeaves().get(0);
        jcc.updateMROpPlan(jobControl.getReadyJobs());
        FileLocalizer.create(sort.getQuantFile(), pc);
        jobControl = jcc.compile(mrPlan, query);


        sort = mrPlan.getLeaves().get(0);
        long reducer=Math.min((long)Math.ceil(new File("test/org/apache/pig/test/data/passwd").length()/100.0), 10);
        assertEquals(reducer, sort.getRequestedParallelism());


        // the second job estimates reducers
        Util.assertParallelValues(-1, -1, reducer, reducer, jobControl.getWaitingJobs().get(0).getJobConf());


        // use the PARALLEL key word, it will override the estimated reducer number
        query = "a = load '/passwd';" + "b = order a by $0 PARALLEL 2;" +
                "store b into 'output';";
        pp = Util.buildPp(ps, query);


        mrPlan = Util.buildMRPlanWithOptimizer(pp, pc);


        assertEquals(2, mrPlan.size());


        sort = mrPlan.getLeaves().get(0);
        assertEquals(2, sort.getRequestedParallelism());


        // the estimation won't take effect when it apply to non-dfs or the files doesn't exist, such as hbase
        query = "a = load 'hbase://passwd' using org.apache.pig.backend.hadoop.hbase.HBaseStorage('c:f1 c:f2');" +
                "b = order a by $0 ;" +
                "store b into 'output';";
        pp = Util.buildPp(ps, query);


        mrPlan = Util.buildMRPlanWithOptimizer(pp, pc);
        assertEquals(2, mrPlan.size());


        sort = mrPlan.getLeaves().get(0);


        // the requested parallel will be -1 if users don't set any of default_parallel, paralllel
        // and the estimation doesn't take effect. MR framework will finally set it to 1.
        assertEquals(-1, sort.getRequestedParallelism());


        // test order by with three jobs (after optimization)
        query = "a = load '/passwd';" +
                "b = foreach a generate $0, $1, $2;" +
                "c = order b by $0;" +
                "store c into 'output';";
        pp = Util.buildPp(ps, query);


        mrPlan = Util.buildMRPlanWithOptimizer(pp, pc);
        assertEquals(3, mrPlan.size());


        // Simulate the first 2 jobs having run so estimation kicks in.
        sort = mrPlan.getLeaves().get(0);
        FileLocalizer.create(sort.getQuantFile(), pc);


        jobControl = jcc.compile(mrPlan, query);
        Util.copyFromLocalToCluster(cluster, "test/org/apache/pig/test/data/passwd", ((POLoad) sort.mapPlan.getRoots().get(0)).getLFile().getFileName());


        //First job is just foreach with projection, mapper-only job, so estimate gets ignored
        Util.assertParallelValues(-1, -1, reducer, 0, jobControl.getWaitingJobs().get(0).getJobConf());


        jcc.updateMROpPlan(jobControl.getReadyJobs());
        jobControl = jcc.compile(mrPlan, query);
        jcc.updateMROpPlan(jobControl.getReadyJobs());


        //Second job is a sampler, which requests and gets 1 reducer
        Util.assertParallelValues(-1, 1, -1, 1, jobControl.getWaitingJobs().get(0).getJobConf());


        jobControl = jcc.compile(mrPlan, query);
        sort = mrPlan.getLeaves().get(0);
        assertEquals(reducer, sort.getRequestedParallelism());


        //Third job is the order, which uses the estimated number of reducers
        Util.assertParallelValues(-1, -1, reducer, reducer, jobControl.getWaitingJobs().get(0).getJobConf());
    }

View Full Code Here


            LogicalPlan lp = checkLogicalPlan(1, 2, 10);
            PhysicalPlan pp = checkPhysicalPlan(lp, 1, 2, 20);
            MROperPlan mp = checkMRPlan(pp, 1, 2, 3);


            MapReduceOper mo1 = mp.getRoots().get(0);
            MapReduceOper mo2 = mp.getLeaves().get(0);
            MapReduceOper mo3 = mp.getLeaves().get(1);


            checkPhysicalPlan(mo1.mapPlan, 1, 1, 3);
            checkPhysicalPlan(mo1.reducePlan, 1, 1, 2);
            PhysicalOperator leaf = mo1.reducePlan.getLeaves().get(0);

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceOper

com.twitter.ambrose.pig.AmbrosePigProgressNotificationListener

org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.plans.POPackageAnnotator

org.apache.pig.backend.hadoop.executionengine.physicalLayer.PhysicalOperator

org.apache.pig.backend.hadoop.executionengine.physicalLayer.plans.PhysicalPlan

org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POCounter

org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POUnion

org.apache.pig.impl.plan.OperatorKey

org.apache.pig.pen.LocalMapReduceSimulator

org.apache.pig.test.TestFRJoin2

org.apache.pig.test.TestJobControlCompiler

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.