Package org.apache.pig.experimental.plan.optimizer

Examples of org.apache.pig.experimental.plan.optimizer.PlanOptimizer


        Rule r = new SplitFilter("SplitFilter");
        Set<Rule> s = new HashSet<Rule>();
        s.add(r);
        List<Set<Rule>> ls = new ArrayList<Set<Rule>>();
        ls.add(s);
        PlanOptimizer optimizer = new MyPlanOptimizer(plan, ls, 3);
        optimizer.optimize();
       
        assertEquals(plan.getPredecessors(filter).get(0), join);
        Operator next = plan.getSuccessors(filter).get(0);
        assertEquals(LOFilter.class, next.getClass());       
        next = plan.getSuccessors(next).get(0);
        assertEquals(LOStore.class, next.getClass());
       
        // run push up filter rule
        r = new PushUpFilter("PushUpFilter");
        s = new HashSet<Rule>();
        s.add(r);
        ls = new ArrayList<Set<Rule>>();
        ls.add(s);
        optimizer = new MyPlanOptimizer(plan, ls, 3);
        optimizer.optimize();
       
        // both filters should be moved up to be after each load
        next = plan.getSuccessors(load1).get(0);
        assertEquals(next.getClass(), LOFilter.class);
        assertEquals(plan.getSuccessors(next).get(0), join);
       
        next = plan.getSuccessors(load2).get(0);
        assertEquals(next.getClass(), LOFilter.class);
        assertEquals(plan.getSuccessors(next).get(0), join);
       
        assertEquals(plan.getSuccessors(join).get(0), store);
       
        // run merge filter rule
        r = new MergeFilter("MergeFilter");
        s = new HashSet<Rule>();
        s.add(r);
        ls = new ArrayList<Set<Rule>>();
        ls.add(s);
        optimizer = new MyPlanOptimizer(plan, ls, 3);
        optimizer.optimize();
       
        // the filters should the same as before, nothing to merge
        next = plan.getSuccessors(load1).get(0);
        assertEquals(next.getClass(), LOFilter.class);
        assertEquals(plan.getSuccessors(next).get(0), join);
View Full Code Here


        lpt.buildPlan("a = load 'd.txt' as (id, v1, v2);");
        lpt.buildPlan("b = filter a by v1==NULL;");       
        org.apache.pig.impl.logicalLayer.LogicalPlan plan = lpt.buildPlan("store b into 'empty';")
        LogicalPlan newLogicalPlan = migratePlan(plan);
              
        PlanOptimizer optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
        optimizer.optimize();
       
        lpt = new LogicalPlanTester();
        lpt.buildPlan("a = load 'd.txt' as (id, v1, v2);");
        lpt.buildPlan("b = filter a by v1==NULL;");       
        plan = lpt.buildPlan("store b into 'empty';")
        LogicalPlan expected = migratePlan(plan);
       
        assertTrue(expected.isEqual(newLogicalPlan));
       
        // no schema
        lpt = new LogicalPlanTester();
        lpt.buildPlan("a = load 'd.txt';");
        lpt.buildPlan("b = foreach a generate $0, $1;");
        plan = lpt.buildPlan("store b into 'empty';")
        newLogicalPlan = migratePlan(plan);
              
        optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
        optimizer.optimize();
       
        lpt = new LogicalPlanTester();
        lpt.buildPlan("a = load 'd.txt';");
        lpt.buildPlan("b = foreach a generate $0, $1;");
        plan = lpt.buildPlan("store b into 'empty';")
View Full Code Here

        lpt.buildPlan("a = load 'd.txt' as (id, v1, v2);");
        lpt.buildPlan("b = foreach a generate id;");       
        org.apache.pig.impl.logicalLayer.LogicalPlan plan = lpt.buildPlan("store b into 'empty';")
        LogicalPlan newLogicalPlan = migratePlan(plan);
              
        PlanOptimizer optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
        optimizer.optimize();
       
        lpt = new LogicalPlanTester();
        lpt.buildPlan("a = load 'd.txt' as (id);");
        lpt.buildPlan("b = foreach a generate id;");       
        plan = lpt.buildPlan("store b into 'empty';")
        LogicalPlan expected = migratePlan(plan);
       
        assertTrue(expected.isEqual(newLogicalPlan));
       
        // with filter
        lpt = new LogicalPlanTester();
        lpt.buildPlan("a = load 'd.txt' as (id, v1, v5, v3, v4, v2);");
        lpt.buildPlan("b = filter a by v1 != NULL AND (v2+v3)<100;");
        lpt.buildPlan("c = foreach b generate id;");
        plan = lpt.buildPlan("store c into 'empty';")
        newLogicalPlan = migratePlan(plan);
              
        optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
        optimizer.optimize();
       
        lpt = new LogicalPlanTester();
        lpt.buildPlan("a = load 'd.txt' as (id, v1, v3, v2);");
        lpt.buildPlan("b = filter a by v1 != NULL AND (v2+v3)<100;");
        lpt.buildPlan("c = foreach b generate id;");
        plan = lpt.buildPlan("store c into 'empty';");
        expected = migratePlan(plan);
        assertTrue(expected.isEqual(newLogicalPlan));
       
        // with 2 foreach
        lpt = new LogicalPlanTester();
        lpt.buildPlan("a = load 'd.txt' as (id, v1, v5, v3, v4, v2);");
        lpt.buildPlan("b = foreach a generate v2, v5, v4;");
        lpt.buildPlan("c = foreach b generate v5, v4;");
        plan = lpt.buildPlan("store c into 'empty';")
        newLogicalPlan = migratePlan(plan);
              
        optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
        optimizer.optimize();
       
        lpt = new LogicalPlanTester();
        lpt.buildPlan("a = load 'd.txt' as (v5, v4);");
        lpt.buildPlan("b = foreach a generate v5, v4;");
        lpt.buildPlan("c = foreach b generate v5, v4;");
        plan = lpt.buildPlan("store c into 'empty';");
        expected = migratePlan(plan);
        assertTrue(expected.isEqual(newLogicalPlan));
       
        // with 2 foreach
        lpt = new LogicalPlanTester();
        lpt.buildPlan("a = load 'd.txt' as (id, v1, v5, v3, v4, v2);");
        lpt.buildPlan("b = foreach a generate id, v1, v5, v3, v4;");
        lpt.buildPlan("c = foreach b generate v5, v4;");
        plan = lpt.buildPlan("store c into 'empty';")
        newLogicalPlan = migratePlan(plan);
              
        optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
        optimizer.optimize();
       
        lpt = new LogicalPlanTester();
        lpt.buildPlan("a = load 'd.txt' as (v5, v4);");
        lpt.buildPlan("b = foreach a generate v5, v4;");
        lpt.buildPlan("c = foreach b generate v5, v4;");
        plan = lpt.buildPlan("store c into 'empty';");
        expected = migratePlan(plan);
        assertTrue(expected.isEqual(newLogicalPlan));
       
        // with 2 foreach and filter in between
        lpt = new LogicalPlanTester();
        lpt.buildPlan("a = load 'd.txt' as (id, v1, v5, v3, v4, v2);");
        lpt.buildPlan("b = foreach a generate v2, v5, v4;");
        lpt.buildPlan("c = filter b by v2 != NULL;");
        lpt.buildPlan("d = foreach c generate v5, v4;");
        plan = lpt.buildPlan("store d into 'empty';")
        newLogicalPlan = migratePlan(plan);
              
        optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
        optimizer.optimize();
       
        lpt = new LogicalPlanTester();
        lpt.buildPlan("a = load 'd.txt' as (v5, v4, v2);");
        lpt.buildPlan("b = foreach a generate v2, v5, v4;");
        lpt.buildPlan("c = filter b by v2 != NULL;");
        lpt.buildPlan("d = foreach c generate v5, v4;");
        plan = lpt.buildPlan("store d into 'empty';")
        expected = migratePlan(plan);
        assertTrue(expected.isEqual(newLogicalPlan));
       
        // with 2 foreach after join
        lpt = new LogicalPlanTester();
        lpt.buildPlan("a = load 'd.txt' as (id, v1, v2, v3);");
        lpt.buildPlan("b = load 'c.txt' as (id, v4, v5, v6);");
        lpt.buildPlan("c = join a by id, b by id;");      
        lpt.buildPlan("d = foreach c generate a::id, v5, v3, v4;");
        plan = lpt.buildPlan("store d into 'empty';")
        newLogicalPlan = migratePlan(plan);
              
        optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
        optimizer.optimize();
       
        lpt = new LogicalPlanTester();
        lpt.buildPlan("a = load 'd.txt' as (id, v3);");
        lpt.buildPlan("b = load 'c.txt' as (id, v4, v5);");
        lpt.buildPlan("c = join a by id, b by id;");      
        lpt.buildPlan("d = foreach c generate a::id, v5, v3, v4;");
        plan = lpt.buildPlan("store d into 'empty';")
        expected = migratePlan(plan);
        assertTrue(expected.isEqual(newLogicalPlan));
       
        // with BinStorage, insert foreach after load
        lpt = new LogicalPlanTester();
        lpt.buildPlan("a = load 'd.txt' using BinStorage() as (id, v1, v5, v3, v4, v2);");       
        lpt.buildPlan("c = filter a by v2 != NULL;");
        lpt.buildPlan("d = foreach c generate v5, v4;");
        plan = lpt.buildPlan("store d into 'empty';")
        newLogicalPlan = migratePlan(plan);
              
        optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
        optimizer.optimize();
       
        lpt = new LogicalPlanTester();
        lpt.buildPlan("a = load 'd.txt' using BinStorage() as (id, v1, v5, v3, v4, v2);");
        lpt.buildPlan("b = foreach a generate v5, v4, v2;");
        lpt.buildPlan("c = filter b by v2 != NULL;");
        lpt.buildPlan("d = foreach c generate v5, v4;");
        plan = lpt.buildPlan("store d into 'empty';")
        expected = migratePlan(plan);
        assertTrue(expected.isEqual(newLogicalPlan));
       
       // with BinStorage, not to insert foreach after load if there is already one
        lpt = new LogicalPlanTester();
        lpt.buildPlan("a = load 'd.txt' using BinStorage() as (id, v1, v5, v3, v4, v2);");   
        lpt.buildPlan("b = foreach a generate v5, v4, v2;");
        lpt.buildPlan("c = filter b by v2 != NULL;");
        lpt.buildPlan("d = foreach c generate v5;");
        plan = lpt.buildPlan("store d into 'empty';")
        newLogicalPlan = migratePlan(plan);
              
        optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
        optimizer.optimize();
       
        lpt = new LogicalPlanTester();
        lpt.buildPlan("a = load 'd.txt' using BinStorage() as (id, v1, v5, v3, v4, v2);");
        lpt.buildPlan("b = foreach a generate v5, v2;");
        lpt.buildPlan("c = filter b by v2 != NULL;");
        lpt.buildPlan("d = foreach c generate v5;");
        plan = lpt.buildPlan("store d into 'empty';")
        expected = migratePlan(plan);
        assertTrue(expected.isEqual(newLogicalPlan));
       
       // with BinStorage, not to insert foreach after load if there is already one
        lpt = new LogicalPlanTester();
        lpt.buildPlan("a = load 'd.txt' using BinStorage() as (id, v1, v5, v3, v4, v2);");   
        lpt.buildPlan("b = foreach a generate v5, v4, v2, 10;");
        lpt.buildPlan("c = filter b by v2 != NULL;");
        lpt.buildPlan("d = foreach c generate v5;");
        plan = lpt.buildPlan("store d into 'empty';")
        newLogicalPlan = migratePlan(plan);
              
        optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
        optimizer.optimize();
       
        lpt = new LogicalPlanTester();
        lpt.buildPlan("a = load 'd.txt' using BinStorage() as (id, v1, v5, v3, v4, v2);");
        lpt.buildPlan("b = foreach a generate v5, v2, 10;");
        lpt.buildPlan("c = filter b by v2 != NULL;");
View Full Code Here

        lpt.buildPlan("a = load 'd.txt' as (id, v1, m:map[]);");
        lpt.buildPlan("b = foreach a generate id, m#'path';");       
        org.apache.pig.impl.logicalLayer.LogicalPlan plan = lpt.buildPlan("store b into 'empty';")
        LogicalPlan newLogicalPlan = migratePlan(plan);
              
        PlanOptimizer optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
        optimizer.optimize();
       
        lpt = new LogicalPlanTester();
        lpt.buildPlan("a = load 'd.txt' as (id, m:map[]);");
        lpt.buildPlan("b = foreach a generate id, m#'path';");       
        plan = lpt.buildPlan("store b into 'empty';")
        LogicalPlan expected = migratePlan(plan);
       
        assertTrue(expected.isEqual(newLogicalPlan));
       
        LOLoad op = (LOLoad)newLogicalPlan.getSources().get(0);
        Map<Integer,Set<String>> annotation =
                (Map<Integer, Set<String>>) op.getAnnotation(MapKeysPruneHelper.REQUIRED_MAPKEYS);
        assertEquals(annotation.size(), 1);
        Set<String> s = new HashSet<String>();
        s.add("path");
        assertEquals(annotation.get(2), s);
       
        // foreach with join
        lpt = new LogicalPlanTester();
        lpt.buildPlan("a = load 'd.txt' as (id, v1, m:map[]);");
        lpt.buildPlan("b = load 'd.txt' as (id, v1, m:map[]);");
        lpt.buildPlan("c = join a by id, b by id;");
        lpt.buildPlan("d = filter c by a::m#'path' != NULL;");
        lpt.buildPlan("e = foreach d generate a::id, b::id, b::m#'path', a::m;");       
        plan = lpt.buildPlan("store e into 'empty';")
        newLogicalPlan = migratePlan(plan);
              
        optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
        optimizer.optimize();
       
        lpt = new LogicalPlanTester();
        lpt.buildPlan("a = load 'd.txt' as (id, m:map[]);");
        lpt.buildPlan("b = load 'd.txt' as (id, m:map[]);");
        lpt.buildPlan("c = join a by id, b by id;");
View Full Code Here

        lpt.buildPlan("c = foreach b generate id, FLATTEN(v);");   
        lpt.buildPlan("d = foreach c generate id, v::s2;");   
        org.apache.pig.impl.logicalLayer.LogicalPlan plan = lpt.buildPlan("store d into 'empty';")
        LogicalPlan newLogicalPlan = migratePlan(plan);
              
        PlanOptimizer optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
        optimizer.optimize();
       
        lpt = new LogicalPlanTester();
        lpt.buildPlan("a = load 'd.txt' as (id, v:bag{t:(s1,s2,s3)});");
        lpt.buildPlan("b = filter a by id>10;");
        lpt.buildPlan("c = foreach b generate id, FLATTEN(v);");   
View Full Code Here

        lpt.buildPlan("b = filter a by v1>10;");
        lpt.buildPlan("c = foreach b generate id;");       
        org.apache.pig.impl.logicalLayer.LogicalPlan plan = lpt.buildPlan("store c into 'empty';")
        LogicalPlan newLogicalPlan = migratePlan(plan);
              
        PlanOptimizer optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
        optimizer.optimize();
       
        lpt = new LogicalPlanTester();
        lpt.buildPlan("a = load 'd.txt' as (id, v1);");
        lpt.buildPlan("b = filter a by v1>10;");
        lpt.buildPlan("c = foreach b generate id;");     
        plan = lpt.buildPlan("store c into 'empty';")
        LogicalPlan expected = migratePlan(plan);
       
        assertTrue(expected.isEqual(newLogicalPlan));
       
        // join with foreach
        lpt = new LogicalPlanTester();
        lpt.buildPlan("a = load 'd.txt' as (id, v1, v2);");
        lpt.buildPlan("b = load 'd.txt' as (id, v1, v2);");
        lpt.buildPlan("c = join a by id, b by id;");
        lpt.buildPlan("d = filter c by a::v1>b::v1;");
        lpt.buildPlan("e = foreach d generate a::id;");       
        plan = lpt.buildPlan("store e into 'empty';")
        newLogicalPlan = migratePlan(plan);
              
        optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
        optimizer.optimize();
       
        lpt = new LogicalPlanTester();
        lpt.buildPlan("a = load 'd.txt' as (id, v1);");
        lpt.buildPlan("b = load 'd.txt' as (id, v1);");
        lpt.buildPlan("c = join a by id, b by id;");
View Full Code Here

TOP

Related Classes of org.apache.pig.experimental.plan.optimizer.PlanOptimizer

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.