String query = "a = load 'd.txt' as (id, v1, v2);" +
"b = foreach a generate id;"+
"store b into 'empty';";
LogicalPlan newLogicalPlan = buildPlan(query);
PlanOptimizer optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
optimizer.optimize();
query = "a = load 'd.txt' as (id);" +
"b = foreach a generate id;"+
"store b into 'empty';";
LogicalPlan expected = buildPlan(query);
assertTrue(expected.isEqual(newLogicalPlan));
// with filter
query = "a = load 'd.txt' as (id, v1, v5, v3, v4, v2);"+
"b = filter a by v1 != NULL AND (v2+v3)<100;"+
"c = foreach b generate id;"+
"store c into 'empty';";
newLogicalPlan = buildPlan(query);
optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
optimizer.optimize();
query = "a = load 'd.txt' as (id, v1, v3, v2);" +
"b = filter a by v1 != NULL AND (v2+v3)<100;" +
"c = foreach b generate id;" +
"store c into 'empty';";
expected = buildPlan(query);
assertTrue(expected.isEqual(newLogicalPlan));
// with 2 foreach
query = "a = load 'd.txt' as (id, v1, v5, v3, v4, v2);" +
"b = foreach a generate v2, v5, v4;" +
"c = foreach b generate v5, v4;" +
"store c into 'empty';";
newLogicalPlan = buildPlan(query);
optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
optimizer.optimize();
query = "a = load 'd.txt' as (v5, v4);" +
"b = foreach a generate v5, v4;" +
"c = foreach b generate v5, v4;" +
"store c into 'empty';";
expected = buildPlan(query);
assertTrue(expected.isEqual(newLogicalPlan));
// with 2 foreach
query = "a = load 'd.txt' as (id, v1, v5, v3, v4, v2);" +
"b = foreach a generate id, v1, v5, v3, v4;" +
"c = foreach b generate v5, v4;" +
"store c into 'empty';";
newLogicalPlan = buildPlan(query);
optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
optimizer.optimize();
query = "a = load 'd.txt' as (v5, v4);" +
"b = foreach a generate v5, v4;" +
"c = foreach b generate v5, v4;" +
"store c into 'empty';";
expected = buildPlan(query);
assertTrue(expected.isEqual(newLogicalPlan));
// with 2 foreach and filter in between
query = "a =load 'd.txt' as (id, v1, v5, v3, v4, v2);" +
"b = foreach a generate v2, v5, v4;" +
"c = filter b by v2 != NULL;" +
"d = foreach c generate v5, v4;" +
"store d into 'empty';";
newLogicalPlan = buildPlan(query);
optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
optimizer.optimize();
query = "a =load 'd.txt' as (v5, v4, v2);" +
"b = foreach a generate v2, v5, v4;" +
"c = filter b by v2 != NULL;" +
"d = foreach c generate v5, v4;" +
"store d into 'empty';";
expected = buildPlan(query);
assertTrue(expected.isEqual(newLogicalPlan));
// with 2 foreach after join
query = "a =load 'd.txt' as (id, v1, v2, v3);" +
"b = load 'c.txt' as (id, v4, v5, v6);" +
"c = join a by id, b by id;" +
"d = foreach c generate a::id, v5, v3, v4;" +
"store d into 'empty';";
newLogicalPlan = buildPlan(query);
optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
optimizer.optimize();
query = "a =load 'd.txt' as (id, v3);" +
"b = load 'c.txt' as (id, v4, v5);" +
"c = join a by id, b by id;" +
"d = foreach c generate a::id, v5, v3, v4;" +
"store d into 'empty';";
expected = buildPlan(query);
assertTrue(expected.isEqual(newLogicalPlan));
// with BinStorage, insert foreach after load
query = "a =load 'd.txt' using BinStorage() as (id, v1, v5, v3, v4, v2);" +
"c = filter a by v2 != NULL;" +
"d = foreach c generate v5, v4;" +
"store d into 'empty';";
newLogicalPlan = buildPlan(query);
optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
optimizer.optimize();
query = "a =load 'd.txt' using BinStorage() as (id, v1, v5, v3, v4, v2);" +
"b = foreach a generate v5, v4, v2;" +
"c = filter b by v2 != NULL;" +
"d = foreach c generate v5, v4;" +
"store d into 'empty';";
expected = buildPlan(query);
assertTrue(expected.isEqual(newLogicalPlan));
// with BinStorage, not to insert foreach after load if there is already one
query = "a =load 'd.txt' using BinStorage() as (id, v1, v5, v3, v4, v2);" +
"b = foreach a generate v5, v4, v2;" +
"c = filter b by v2 != NULL;" +
"d = foreach c generate v5;" +
"store d into 'empty';";
newLogicalPlan = buildPlan(query);
optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
optimizer.optimize();
query = "a =load 'd.txt' using BinStorage() as (id, v1, v5, v3, v4, v2);" +
"b = foreach a generate v5, v2;" +
"c = filter b by v2 != NULL;" +
"d = foreach c generate v5;" +
"store d into 'empty';";
expected = buildPlan(query);
assertTrue(expected.isEqual(newLogicalPlan));
// with BinStorage, not to insert foreach after load if there is already one
query = "a =load 'd.txt' using BinStorage() as (id, v1, v5, v3, v4, v2);" +
"b = foreach a generate v5, v4, v2, 10;" +
"c = filter b by v2 != NULL;" +
"d = foreach c generate v5;" +
"store d into 'empty';";
newLogicalPlan = buildPlan(query);
optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
optimizer.optimize();
query = "a =load 'd.txt' using BinStorage() as (id, v1, v5, v3, v4, v2);" +
"b = foreach a generate v5, v2, 10;" +
"c = filter b by v2 != NULL;" +
"d = foreach c generate v5;" +