Package org.apache.pig.test.utils

Examples of org.apache.pig.test.utils.LogicalPlanTester


    }

    @Test
    public void testReducerNumEstimation() throws Exception{
       // use the estimation
        LogicalPlanTester planTester = new LogicalPlanTester(pc) ;
        Util.copyFromLocalToCluster(cluster, "test/org/apache/pig/test/data/passwd", "/passwd");
        planTester.buildPlan("a = load '/passwd';");
        LogicalPlan lp = planTester.buildPlan("b = group a by $0;");
        PhysicalPlan pp = Util.buildPhysicalPlan(lp, pc);
        POStore store = GenPhyOp.dummyPigStorageOp();
        pp.addAsLeaf(store);
        MROperPlan mrPlan = Util.buildMRPlan(pp, pc);
              
        pc.getConf().setProperty("pig.exec.reducers.bytes.per.reducer", "100");
        pc.getConf().setProperty("pig.exec.reducers.max", "10");
        HExecutionEngine exe = pc.getExecutionEngine();
        ConfigurationValidator.validatePigProperties(exe.getConfiguration());
        Configuration conf = ConfigurationUtil.toConfiguration(exe.getConfiguration());
        JobControlCompiler jcc = new JobControlCompiler(pc, conf);
        JobControl jc=jcc.compile(mrPlan, "Test");
        Job job = jc.getWaitingJobs().get(0);
        long reducer=Math.min((long)Math.ceil(new File("test/org/apache/pig/test/data/passwd").length()/100.0), 10);
        assertEquals(job.getJobConf().getLong("mapred.reduce.tasks",10), reducer);
       
        // use the PARALLEL key word, it will override the estimated reducer number
        planTester = new LogicalPlanTester(pc) ;
        planTester.buildPlan("a = load '/passwd';");
        lp = planTester.buildPlan("b = group a by $0 PARALLEL 2;");
        pp = Util.buildPhysicalPlan(lp, pc);
        store = GenPhyOp.dummyPigStorageOp();
        pp.addAsLeaf(store);
        mrPlan = Util.buildMRPlan(pp, pc);
              
        pc.getConf().setProperty("pig.exec.reducers.bytes.per.reducer", "100");
        pc.getConf().setProperty("pig.exec.reducers.max", "10");
        exe = pc.getExecutionEngine();
        ConfigurationValidator.validatePigProperties(exe.getConfiguration());
        conf = ConfigurationUtil.toConfiguration(exe.getConfiguration());
        jcc = new JobControlCompiler(pc, conf);
        jc=jcc.compile(mrPlan, "Test");
        job = jc.getWaitingJobs().get(0);
        assertEquals(job.getJobConf().getLong("mapred.reduce.tasks",10), 2);
       
        // the estimation won't take effect when it apply to non-dfs or the files doesn't exist, such as hbase
        planTester = new LogicalPlanTester(pc) ;
        planTester.buildPlan("a = load 'hbase://passwd' using org.apache.pig.backend.hadoop.hbase.HBaseStorage('c:f1 c:f2');");
        lp = planTester.buildPlan("b = group a by $0 ;");
        pp = Util.buildPhysicalPlan(lp, pc);
        store = GenPhyOp.dummyPigStorageOp();
        pp.addAsLeaf(store);
        mrPlan = Util.buildMRPlan(pp, pc);
               
View Full Code Here


    }

    @Test
    public void testDistinctOptimization1() throws Exception {
        // Limit in the foreach plan
        LogicalPlanTester planTester = new LogicalPlanTester();
        planTester.buildPlan("A = LOAD 'input1' AS (a0, a1, a2);");
        planTester.buildPlan("B = LOAD 'input2' AS (b0, b1, b2);");
        planTester.buildPlan("C = cogroup A by a0, B by b0;");
        planTester.buildPlan("D = foreach C { E = limit A 10; F = E.a1; G = DISTINCT F; generate group, COUNT(G);};");

        LogicalPlan lp = planTester.buildPlan("store D into '/tmp';");
        PhysicalPlan pp = Util.buildPhysicalPlan(lp, pc);
        MROperPlan mrPlan = Util.buildMRPlan(pp, pc);

        SecondaryKeyOptimizer so = new SecondaryKeyOptimizer(mrPlan);
        so.visit();
View Full Code Here

    }

    @Test
    public void testDistinctOptimization2() throws Exception {
        // Distinct on one entire input
        LogicalPlanTester planTester = new LogicalPlanTester();
        planTester.buildPlan("A = LOAD 'input1' AS (a0, a1, a2);");
        planTester.buildPlan("B = group A by $0;");
        planTester.buildPlan("C = foreach B { D = distinct A; generate group, D;};");

        LogicalPlan lp = planTester.buildPlan("store C into '/tmp';");
        PhysicalPlan pp = Util.buildPhysicalPlan(lp, pc);
        MROperPlan mrPlan = Util.buildMRPlan(pp, pc);

        SecondaryKeyOptimizer so = new SecondaryKeyOptimizer(mrPlan);
        so.visit();
View Full Code Here

    }

    @Test
    public void testDistinctOptimization3() throws Exception {
        // Distinct on the prefix of main sort key
        LogicalPlanTester planTester = new LogicalPlanTester();
        planTester.buildPlan("A = LOAD 'input1' AS (a0, a1, a2);");
        planTester.buildPlan("B = group A by $0;");
        planTester.buildPlan("C = foreach B { D = A.a0; E = distinct D; generate group, E;};");

        LogicalPlan lp = planTester.buildPlan("store C into '/tmp';");
        PhysicalPlan pp = Util.buildPhysicalPlan(lp, pc);
        MROperPlan mrPlan = Util.buildMRPlan(pp, pc);

        SecondaryKeyOptimizer so = new SecondaryKeyOptimizer(mrPlan);
        so.visit();
View Full Code Here

    }

    @Test
    public void testDistinctOptimization4() throws Exception {
        // Distinct on secondary key again, should remove
        LogicalPlanTester planTester = new LogicalPlanTester();
        planTester.buildPlan("A = LOAD 'input1' AS (a0, a1, a2);");
        planTester.buildPlan("B = group A by $0;");
        planTester.buildPlan("C = foreach B { D = A.a1; E = distinct D; F = distinct E; generate group, F;};");

        LogicalPlan lp = planTester.buildPlan("store C into '/tmp';");
        PhysicalPlan pp = Util.buildPhysicalPlan(lp, pc);
        MROperPlan mrPlan = Util.buildMRPlan(pp, pc);

        SecondaryKeyOptimizer so = new SecondaryKeyOptimizer(mrPlan);
        so.visit();
View Full Code Here

    }

    @Test
    public void testDistinctOptimization5() throws Exception {
        // Filter in foreach plan
        LogicalPlanTester planTester = new LogicalPlanTester();
        planTester.buildPlan("A = LOAD 'input1' AS (a0, a1, a2);");
        planTester.buildPlan("B = group A by $0;");
        planTester.buildPlan("C = foreach B { D = A.a1; E = distinct D; F = filter E by $0=='1'; generate group, F;};");

        LogicalPlan lp = planTester.buildPlan("store C into '/tmp';");
        PhysicalPlan pp = Util.buildPhysicalPlan(lp, pc);
        MROperPlan mrPlan = Util.buildMRPlan(pp, pc);

        SecondaryKeyOptimizer so = new SecondaryKeyOptimizer(mrPlan);
        so.visit();
View Full Code Here

    }

    @Test
    public void testDistinctOptimization6() throws Exception {
        // group by * with no schema, and distinct key is not part of main key
        LogicalPlanTester planTester = new LogicalPlanTester();
        planTester.buildPlan("A = LOAD 'input1';");
        planTester.buildPlan("B = group A by *;");
        planTester.buildPlan("C = foreach B { D = limit A 10; E = D.$1; F = DISTINCT E; generate group, COUNT(F);};");

        LogicalPlan lp = planTester.buildPlan("store C into '/tmp';");
        PhysicalPlan pp = Util.buildPhysicalPlan(lp, pc);
        MROperPlan mrPlan = Util.buildMRPlan(pp, pc);

        SecondaryKeyOptimizer so = new SecondaryKeyOptimizer(mrPlan);
        so.visit();
View Full Code Here

    }

    @Test
    public void testDistinctOptimization7() throws Exception {
        // group by * with no schema, distinct key is more specific than the main key
        LogicalPlanTester planTester = new LogicalPlanTester();
        planTester.buildPlan("A = LOAD 'input1';");
        planTester.buildPlan("B = group A by *;");
        planTester.buildPlan("C = foreach B { D = limit A 10; E = D.$0; F = DISTINCT E; generate group, COUNT(F);};");

        LogicalPlan lp = planTester.buildPlan("store C into '/tmp';");
        PhysicalPlan pp = Util.buildPhysicalPlan(lp, pc);
        MROperPlan mrPlan = Util.buildMRPlan(pp, pc);

        SecondaryKeyOptimizer so = new SecondaryKeyOptimizer(mrPlan);
        so.visit();
View Full Code Here

    }

    @Test
    public void testDistinctOptimization8() throws Exception {
        // local arrange plan is an expression
        LogicalPlanTester planTester = new LogicalPlanTester();
        planTester.buildPlan("A = LOAD 'input1' AS (a0, a1, a2);");
        planTester.buildPlan("B = group A by $0+$1;");
        planTester.buildPlan("C = foreach B { D = limit A 10; E = D.$0; F = DISTINCT E; generate group, COUNT(F);};");

        LogicalPlan lp = planTester.buildPlan("store C into '/tmp';");
        PhysicalPlan pp = Util.buildPhysicalPlan(lp, pc);
        MROperPlan mrPlan = Util.buildMRPlan(pp, pc);

        SecondaryKeyOptimizer so = new SecondaryKeyOptimizer(mrPlan);
        so.visit();
View Full Code Here

    }

    @Test
    public void testDistinctOptimization9() throws Exception {
        // local arrange plan is nested project
        LogicalPlanTester planTester = new LogicalPlanTester();
        planTester.buildPlan("A = LOAD 'input1' as (a:tuple(a0:int, a1:chararray));");
        planTester.buildPlan("B = group A by a.a1;");
        planTester.buildPlan("C = foreach B { D = A.a; E = DISTINCT D; generate group, COUNT(E);};");

        LogicalPlan lp = planTester.buildPlan("store C into '/tmp';");
        PhysicalPlan pp = Util.buildPhysicalPlan(lp, pc);
        MROperPlan mrPlan = Util.buildMRPlan(pp, pc);

        SecondaryKeyOptimizer so = new SecondaryKeyOptimizer(mrPlan);
        so.visit();
View Full Code Here

TOP

Related Classes of org.apache.pig.test.utils.LogicalPlanTester

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.