assertTrue(expected.isEqual(newLogicalPlan));
}
public void testPrune() throws Exception {
// only foreach
LogicalPlanTester lpt = new LogicalPlanTester(pc);
lpt.buildPlan("a = load 'd.txt' as (id, v1, v2);");
lpt.buildPlan("b = foreach a generate id;");
org.apache.pig.impl.logicalLayer.LogicalPlan plan = lpt.buildPlan("store b into 'empty';");
LogicalPlan newLogicalPlan = migratePlan(plan);
PlanOptimizer optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
optimizer.optimize();
lpt = new LogicalPlanTester(pc);
lpt.buildPlan("a = load 'd.txt' as (id);");
lpt.buildPlan("b = foreach a generate id;");
plan = lpt.buildPlan("store b into 'empty';");
LogicalPlan expected = migratePlan(plan);
assertTrue(expected.isEqual(newLogicalPlan));
// with filter
lpt = new LogicalPlanTester(pc);
lpt.buildPlan("a = load 'd.txt' as (id, v1, v5, v3, v4, v2);");
lpt.buildPlan("b = filter a by v1 != NULL AND (v2+v3)<100;");
lpt.buildPlan("c = foreach b generate id;");
plan = lpt.buildPlan("store c into 'empty';");
newLogicalPlan = migratePlan(plan);
optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
optimizer.optimize();
lpt = new LogicalPlanTester(pc);
lpt.buildPlan("a = load 'd.txt' as (id, v1, v3, v2);");
lpt.buildPlan("b = filter a by v1 != NULL AND (v2+v3)<100;");
lpt.buildPlan("c = foreach b generate id;");
plan = lpt.buildPlan("store c into 'empty';");
expected = migratePlan(plan);
assertTrue(expected.isEqual(newLogicalPlan));
// with 2 foreach
lpt = new LogicalPlanTester(pc);
lpt.buildPlan("a = load 'd.txt' as (id, v1, v5, v3, v4, v2);");
lpt.buildPlan("b = foreach a generate v2, v5, v4;");
lpt.buildPlan("c = foreach b generate v5, v4;");
plan = lpt.buildPlan("store c into 'empty';");
newLogicalPlan = migratePlan(plan);
optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
optimizer.optimize();
lpt = new LogicalPlanTester(pc);
lpt.buildPlan("a = load 'd.txt' as (v5, v4);");
lpt.buildPlan("b = foreach a generate v5, v4;");
lpt.buildPlan("c = foreach b generate v5, v4;");
plan = lpt.buildPlan("store c into 'empty';");
expected = migratePlan(plan);
assertTrue(expected.isEqual(newLogicalPlan));
// with 2 foreach
lpt = new LogicalPlanTester(pc);
lpt.buildPlan("a = load 'd.txt' as (id, v1, v5, v3, v4, v2);");
lpt.buildPlan("b = foreach a generate id, v1, v5, v3, v4;");
lpt.buildPlan("c = foreach b generate v5, v4;");
plan = lpt.buildPlan("store c into 'empty';");
newLogicalPlan = migratePlan(plan);
optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
optimizer.optimize();
lpt = new LogicalPlanTester(pc);
lpt.buildPlan("a = load 'd.txt' as (v5, v4);");
lpt.buildPlan("b = foreach a generate v5, v4;");
lpt.buildPlan("c = foreach b generate v5, v4;");
plan = lpt.buildPlan("store c into 'empty';");
expected = migratePlan(plan);
assertTrue(expected.isEqual(newLogicalPlan));
// with 2 foreach and filter in between
lpt = new LogicalPlanTester(pc);
lpt.buildPlan("a = load 'd.txt' as (id, v1, v5, v3, v4, v2);");
lpt.buildPlan("b = foreach a generate v2, v5, v4;");
lpt.buildPlan("c = filter b by v2 != NULL;");
lpt.buildPlan("d = foreach c generate v5, v4;");
plan = lpt.buildPlan("store d into 'empty';");
newLogicalPlan = migratePlan(plan);
optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
optimizer.optimize();
lpt = new LogicalPlanTester(pc);
lpt.buildPlan("a = load 'd.txt' as (v5, v4, v2);");
lpt.buildPlan("b = foreach a generate v2, v5, v4;");
lpt.buildPlan("c = filter b by v2 != NULL;");
lpt.buildPlan("d = foreach c generate v5, v4;");
plan = lpt.buildPlan("store d into 'empty';");
expected = migratePlan(plan);
assertTrue(expected.isEqual(newLogicalPlan));
// with 2 foreach after join
lpt = new LogicalPlanTester(pc);
lpt.buildPlan("a = load 'd.txt' as (id, v1, v2, v3);");
lpt.buildPlan("b = load 'c.txt' as (id, v4, v5, v6);");
lpt.buildPlan("c = join a by id, b by id;");
lpt.buildPlan("d = foreach c generate a::id, v5, v3, v4;");
plan = lpt.buildPlan("store d into 'empty';");
newLogicalPlan = migratePlan(plan);
optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
optimizer.optimize();
lpt = new LogicalPlanTester(pc);
lpt.buildPlan("a = load 'd.txt' as (id, v3);");
lpt.buildPlan("b = load 'c.txt' as (id, v4, v5);");
lpt.buildPlan("c = join a by id, b by id;");
lpt.buildPlan("d = foreach c generate a::id, v5, v3, v4;");
plan = lpt.buildPlan("store d into 'empty';");
expected = migratePlan(plan);
assertTrue(expected.isEqual(newLogicalPlan));
// with BinStorage, insert foreach after load
lpt = new LogicalPlanTester(pc);
lpt.buildPlan("a = load 'd.txt' using BinStorage() as (id, v1, v5, v3, v4, v2);");
lpt.buildPlan("c = filter a by v2 != NULL;");
lpt.buildPlan("d = foreach c generate v5, v4;");
plan = lpt.buildPlan("store d into 'empty';");
newLogicalPlan = migratePlan(plan);
optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
optimizer.optimize();
lpt = new LogicalPlanTester(pc);
lpt.buildPlan("a = load 'd.txt' using BinStorage() as (id, v1, v5, v3, v4, v2);");
lpt.buildPlan("b = foreach a generate v5, v4, v2;");
lpt.buildPlan("c = filter b by v2 != NULL;");
lpt.buildPlan("d = foreach c generate v5, v4;");
plan = lpt.buildPlan("store d into 'empty';");
expected = migratePlan(plan);
assertTrue(expected.isEqual(newLogicalPlan));
// with BinStorage, not to insert foreach after load if there is already one
lpt = new LogicalPlanTester(pc);
lpt.buildPlan("a = load 'd.txt' using BinStorage() as (id, v1, v5, v3, v4, v2);");
lpt.buildPlan("b = foreach a generate v5, v4, v2;");
lpt.buildPlan("c = filter b by v2 != NULL;");
lpt.buildPlan("d = foreach c generate v5;");
plan = lpt.buildPlan("store d into 'empty';");
newLogicalPlan = migratePlan(plan);
optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
optimizer.optimize();
lpt = new LogicalPlanTester(pc);
lpt.buildPlan("a = load 'd.txt' using BinStorage() as (id, v1, v5, v3, v4, v2);");
lpt.buildPlan("b = foreach a generate v5, v2;");
lpt.buildPlan("c = filter b by v2 != NULL;");
lpt.buildPlan("d = foreach c generate v5;");
plan = lpt.buildPlan("store d into 'empty';");
expected = migratePlan(plan);
assertTrue(expected.isEqual(newLogicalPlan));
// with BinStorage, not to insert foreach after load if there is already one
lpt = new LogicalPlanTester(pc);
lpt.buildPlan("a = load 'd.txt' using BinStorage() as (id, v1, v5, v3, v4, v2);");
lpt.buildPlan("b = foreach a generate v5, v4, v2, 10;");
lpt.buildPlan("c = filter b by v2 != NULL;");
lpt.buildPlan("d = foreach c generate v5;");
plan = lpt.buildPlan("store d into 'empty';");
newLogicalPlan = migratePlan(plan);
optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
optimizer.optimize();
lpt = new LogicalPlanTester(pc);
lpt.buildPlan("a = load 'd.txt' using BinStorage() as (id, v1, v5, v3, v4, v2);");
lpt.buildPlan("b = foreach a generate v5, v2, 10;");
lpt.buildPlan("c = filter b by v2 != NULL;");
lpt.buildPlan("d = foreach c generate v5;");
plan = lpt.buildPlan("store d into 'empty';");
expected = migratePlan(plan);
assertTrue(expected.isEqual(newLogicalPlan));
}