/***********************************************************************************************************************
* Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
**********************************************************************************************************************/
package eu.stratosphere.pact.compiler;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import junit.framework.Assert;
import org.junit.Test;
import eu.stratosphere.api.common.Plan;
import eu.stratosphere.api.java.DataSet;
import eu.stratosphere.api.java.ExecutionEnvironment;
import eu.stratosphere.api.java.IterativeDataSet;
import eu.stratosphere.api.java.functions.JoinFunction;
import eu.stratosphere.api.java.record.operators.BulkIteration;
import eu.stratosphere.api.java.record.operators.DeltaIteration;
import eu.stratosphere.api.java.record.operators.FileDataSink;
import eu.stratosphere.api.java.record.operators.FileDataSource;
import eu.stratosphere.api.java.record.operators.CoGroupOperator;
import eu.stratosphere.api.java.record.operators.CrossOperator;
import eu.stratosphere.api.java.record.operators.JoinOperator;
import eu.stratosphere.api.java.record.operators.MapOperator;
import eu.stratosphere.api.java.record.operators.ReduceOperator;
import eu.stratosphere.compiler.PactCompiler;
import eu.stratosphere.compiler.plan.OptimizedPlan;
import eu.stratosphere.compiler.plan.SinkPlanNode;
import eu.stratosphere.compiler.plantranslate.NepheleJobGraphGenerator;
import eu.stratosphere.pact.compiler.testfunctions.IdentityGroupReducer;
import eu.stratosphere.pact.compiler.testfunctions.IdentityKeyExtractor;
import eu.stratosphere.pact.compiler.testfunctions.IdentityMapper;
import eu.stratosphere.pact.compiler.testfunctions.Top1GroupReducer;
import eu.stratosphere.pact.compiler.util.DummyCoGroupStub;
import eu.stratosphere.pact.compiler.util.DummyCrossStub;
import eu.stratosphere.pact.compiler.util.DummyInputFormat;
import eu.stratosphere.pact.compiler.util.DummyMatchStub;
import eu.stratosphere.pact.compiler.util.DummyNonPreservingMatchStub;
import eu.stratosphere.pact.compiler.util.DummyOutputFormat;
import eu.stratosphere.pact.compiler.util.IdentityMap;
import eu.stratosphere.pact.compiler.util.IdentityReduce;
import eu.stratosphere.types.IntValue;
import eu.stratosphere.types.LongValue;
@SuppressWarnings("serial")
public class BranchingPlansCompilerTest extends CompilerTestBase {
@Test
public void testCostComputationWithMultipleDataSinks() {
final int SINKS = 5;
try {
List<FileDataSink> sinks = new ArrayList<FileDataSink>();
// construct the plan
final String out1Path = "file:///test/1";
final String out2Path = "file:///test/2";
FileDataSource sourceA = new FileDataSource(DummyInputFormat.class, IN_FILE);
MapOperator mapA = MapOperator.builder(IdentityMap.class).input(sourceA).name("Map A").build();
MapOperator mapC = MapOperator.builder(IdentityMap.class).input(mapA).name("Map C").build();
FileDataSink[] sinkA = new FileDataSink[SINKS];
FileDataSink[] sinkB = new FileDataSink[SINKS];
for (int sink = 0; sink < SINKS; sink++) {
sinkA[sink] = new FileDataSink(DummyOutputFormat.class, out1Path, mapA, "Sink A:" + sink);
sinks.add(sinkA[sink]);
sinkB[sink] = new FileDataSink(DummyOutputFormat.class, out2Path, mapC, "Sink B:" + sink);
sinks.add(sinkB[sink]);
}
// return the PACT plan
Plan plan = new Plan(sinks, "Plans With Multiple Data Sinks");
OptimizedPlan oPlan = compileNoStats(plan);
// ---------- compile plan to nephele job graph to verify that no error is thrown ----------
NepheleJobGraphGenerator jobGen = new NepheleJobGraphGenerator();
jobGen.compileJobGraph(oPlan);
} catch (Exception e) {
e.printStackTrace();
Assert.fail(e.getMessage());
}
}
/**
*
* <pre>
* (SRC A)
* |
* (MAP A)
* / \
* (MAP B) (MAP C)
* / / \
* (SINK A) (SINK B) (SINK C)
* </pre>
*/
@Test
public void testBranchingWithMultipleDataSinks2() {
try {
// construct the plan
final String out1Path = "file:///test/1";
final String out2Path = "file:///test/2";
final String out3Path = "file:///test/3";
FileDataSource sourceA = new FileDataSource(DummyInputFormat.class, IN_FILE);
MapOperator mapA = MapOperator.builder(IdentityMap.class).input(sourceA).name("Map A").build();
MapOperator mapB = MapOperator.builder(IdentityMap.class).input(mapA).name("Map B").build();
MapOperator mapC = MapOperator.builder(IdentityMap.class).input(mapA).name("Map C").build();
FileDataSink sinkA = new FileDataSink(DummyOutputFormat.class, out1Path, mapB, "Sink A");
FileDataSink sinkB = new FileDataSink(DummyOutputFormat.class, out2Path, mapC, "Sink B");
FileDataSink sinkC = new FileDataSink(DummyOutputFormat.class, out3Path, mapC, "Sink C");
List<FileDataSink> sinks = new ArrayList<FileDataSink>();
sinks.add(sinkA);
sinks.add(sinkB);
sinks.add(sinkC);
// return the PACT plan
Plan plan = new Plan(sinks, "Plans With Multiple Data Sinks");
OptimizedPlan oPlan = compileNoStats(plan);
// ---------- check the optimizer plan ----------
// number of sinks
Assert.assertEquals("Wrong number of data sinks.", 3, oPlan.getDataSinks().size());
// sinks contain all sink paths
Set<String> allSinks = new HashSet<String>();
allSinks.add(out1Path);
allSinks.add(out2Path);
allSinks.add(out3Path);
for (SinkPlanNode n : oPlan.getDataSinks()) {
String path = ((FileDataSink) n.getSinkNode().getPactContract()).getFilePath();
Assert.assertTrue("Invalid data sink.", allSinks.remove(path));
}
// ---------- compile plan to nephele job graph to verify that no error is thrown ----------
NepheleJobGraphGenerator jobGen = new NepheleJobGraphGenerator();
jobGen.compileJobGraph(oPlan);
} catch (Exception e) {
e.printStackTrace();
Assert.fail(e.getMessage());
}
}
/**
* <pre>
* SINK
* |
* COGROUP
* +---/ \----+
* / \
* / MATCH10
* / | \
* / | MATCH9
* MATCH5 | | \
* | \ | | MATCH8
* | MATCH4 | | | \
* | | \ | | | MATCH7
* | | MATCH3 | | | | \
* | | | \ | | | | MATCH6
* | | | MATCH2 | | | | | |
* | | | | \ +--+--+--+--+--+
* | | | | MATCH1 MAP
* \ | | | | | /-----------/
* (DATA SOURCE ONE)
* </pre>
*/
@Test
public void testBranchingSourceMultipleTimes() {
try {
// construct the plan
FileDataSource sourceA = new FileDataSource(new DummyInputFormat(), IN_FILE);
JoinOperator mat1 = JoinOperator.builder(new DummyMatchStub(), IntValue.class, 0, 0)
.input1(sourceA)
.input2(sourceA)
.build();
JoinOperator mat2 = JoinOperator.builder(new DummyMatchStub(), IntValue.class, 0, 0)
.input1(sourceA)
.input2(mat1)
.build();
JoinOperator mat3 = JoinOperator.builder(new DummyMatchStub(), IntValue.class, 0, 0)
.input1(sourceA)
.input2(mat2)
.build();
JoinOperator mat4 = JoinOperator.builder(new DummyMatchStub(), IntValue.class, 0, 0)
.input1(sourceA)
.input2(mat3)
.build();
JoinOperator mat5 = JoinOperator.builder(new DummyMatchStub(), IntValue.class, 0, 0)
.input1(sourceA)
.input2(mat4)
.build();
MapOperator ma = MapOperator.builder(new IdentityMap()).input(sourceA).build();
JoinOperator mat6 = JoinOperator.builder(new DummyMatchStub(), IntValue.class, 0, 0)
.input1(ma)
.input2(ma)
.build();
JoinOperator mat7 = JoinOperator.builder(new DummyMatchStub(), IntValue.class, 0, 0)
.input1(ma)
.input2(mat6)
.build();
JoinOperator mat8 = JoinOperator.builder(new DummyMatchStub(), IntValue.class, 0, 0)
.input1(ma)
.input2(mat7)
.build();
JoinOperator mat9 = JoinOperator.builder(new DummyMatchStub(), IntValue.class, 0, 0)
.input1(ma)
.input2(mat8)
.build();
JoinOperator mat10 = JoinOperator.builder(new DummyMatchStub(), IntValue.class, 0, 0)
.input1(ma)
.input2(mat9)
.build();
CoGroupOperator co = CoGroupOperator.builder(new DummyCoGroupStub(), IntValue.class, 0, 0)
.input1(mat5)
.input2(mat10)
.build();
FileDataSink sink = new FileDataSink(new DummyOutputFormat(), OUT_FILE, co);
// return the PACT plan
Plan plan = new Plan(sink, "Branching Source Multiple Times");
OptimizedPlan oPlan = compileNoStats(plan);
NepheleJobGraphGenerator jobGen = new NepheleJobGraphGenerator();
//Compile plan to verify that no error is thrown
jobGen.compileJobGraph(oPlan);
} catch (Exception e) {
e.printStackTrace();
Assert.fail(e.getMessage());
}
}
/**
*
* <pre>
* (SINK A)
* | (SINK B) (SINK C)
* CROSS / /
* / \ | +------+
* / \ | /
* REDUCE MATCH2
* | +---/ \
* \ / |
* MAP |
* | |
* COGROUP MATCH1
* / \ / \
* (SRC A) (SRC B) (SRC C)
* </pre>
*/
@Test
public void testBranchingWithMultipleDataSinks() {
try {
// construct the plan
final String out1Path = "file:///test/1";
final String out2Path = "file:///test/2";
final String out3Path = "file:///test/3";
FileDataSource sourceA = new FileDataSource(new DummyInputFormat(), IN_FILE);
FileDataSource sourceB = new FileDataSource(new DummyInputFormat(), IN_FILE);
FileDataSource sourceC = new FileDataSource(new DummyInputFormat(), IN_FILE);
CoGroupOperator co = CoGroupOperator.builder(new DummyCoGroupStub(), IntValue.class, 0,0)
.input1(sourceA)
.input2(sourceB)
.build();
MapOperator ma = MapOperator.builder(new IdentityMap()).input(co).build();
JoinOperator mat1 = JoinOperator.builder(new DummyMatchStub(), IntValue.class, 0, 0)
.input1(sourceB)
.input2(sourceC)
.build();
JoinOperator mat2 = JoinOperator.builder(new DummyMatchStub(), IntValue.class, 0, 0)
.input1(ma)
.input2(mat1)
.build();
ReduceOperator r = ReduceOperator.builder(new IdentityReduce(), IntValue.class, 0)
.input(ma)
.build();
CrossOperator c = CrossOperator.builder(new DummyCrossStub())
.input1(r)
.input2(mat2)
.build();
FileDataSink sinkA = new FileDataSink(new DummyOutputFormat(), out1Path, c);
FileDataSink sinkB = new FileDataSink(new DummyOutputFormat(), out2Path, mat2);
FileDataSink sinkC = new FileDataSink(new DummyOutputFormat(), out3Path, mat2);
List<FileDataSink> sinks = new ArrayList<FileDataSink>();
sinks.add(sinkA);
sinks.add(sinkB);
sinks.add(sinkC);
// return the PACT plan
Plan plan = new Plan(sinks, "Branching Plans With Multiple Data Sinks");
OptimizedPlan oPlan = compileNoStats(plan);
// ---------- check the optimizer plan ----------
// number of sinks
Assert.assertEquals("Wrong number of data sinks.", 3, oPlan.getDataSinks().size());
// sinks contain all sink paths
Set<String> allSinks = new HashSet<String>();
allSinks.add(out1Path);
allSinks.add(out2Path);
allSinks.add(out3Path);
for (SinkPlanNode n : oPlan.getDataSinks()) {
String path = ((FileDataSink) n.getSinkNode().getPactContract()).getFilePath();
Assert.assertTrue("Invalid data sink.", allSinks.remove(path));
}
// ---------- compile plan to nephele job graph to verify that no error is thrown ----------
NepheleJobGraphGenerator jobGen = new NepheleJobGraphGenerator();
jobGen.compileJobGraph(oPlan);
} catch (Exception e) {
e.printStackTrace();
Assert.fail(e.getMessage());
}
}
@Test
public void testBranchEachContractType() {
try {
// construct the plan
FileDataSource sourceA = new FileDataSource(new DummyInputFormat(), "file:///test/file1", "Source A");
FileDataSource sourceB = new FileDataSource(new DummyInputFormat(), "file:///test/file2", "Source B");
FileDataSource sourceC = new FileDataSource(new DummyInputFormat(), "file:///test/file3", "Source C");
MapOperator map1 = MapOperator.builder(new IdentityMap()).input(sourceA).name("Map 1").build();
ReduceOperator reduce1 = ReduceOperator.builder(new IdentityReduce(), IntValue.class, 0)
.input(map1)
.name("Reduce 1")
.build();
@SuppressWarnings("unchecked")
JoinOperator match1 = JoinOperator.builder(new DummyMatchStub(), IntValue.class, 0, 0)
.input1(sourceB, sourceB, sourceC)
.input2(sourceC)
.name("Match 1")
.build();
;
CoGroupOperator cogroup1 = CoGroupOperator.builder(new DummyCoGroupStub(), IntValue.class, 0,0)
.input1(sourceA)
.input2(sourceB)
.name("CoGroup 1")
.build();
CrossOperator cross1 = CrossOperator.builder(new DummyCrossStub())
.input1(reduce1)
.input2(cogroup1)
.name("Cross 1")
.build();
CoGroupOperator cogroup2 = CoGroupOperator.builder(new DummyCoGroupStub(), IntValue.class, 0,0)
.input1(cross1)
.input2(cross1)
.name("CoGroup 2")
.build();
CoGroupOperator cogroup3 = CoGroupOperator.builder(new DummyCoGroupStub(), IntValue.class, 0,0)
.input1(map1)
.input2(match1)
.name("CoGroup 3")
.build();
MapOperator map2 = MapOperator.builder(new IdentityMap()).input(cogroup3).name("Map 2").build();
CoGroupOperator cogroup4 = CoGroupOperator.builder(new DummyCoGroupStub(), IntValue.class, 0,0)
.input1(map2)
.input2(match1)
.name("CoGroup 4")
.build();
CoGroupOperator cogroup5 = CoGroupOperator.builder(new DummyCoGroupStub(), IntValue.class, 0,0)
.input1(cogroup2)
.input2(cogroup1)
.name("CoGroup 5")
.build();
CoGroupOperator cogroup6 = CoGroupOperator.builder(new DummyCoGroupStub(), IntValue.class, 0,0)
.input1(reduce1)
.input2(cogroup4)
.name("CoGroup 6")
.build();
CoGroupOperator cogroup7 = CoGroupOperator.builder(new DummyCoGroupStub(), IntValue.class, 0,0)
.input1(cogroup5)
.input2(cogroup6)
.name("CoGroup 7")
.build();
FileDataSink sink = new FileDataSink(new DummyOutputFormat(), OUT_FILE, cogroup7);
// sink.addInput(sourceA);
// sink.addInput(co3);
// sink.addInput(co4);
// sink.addInput(co1);
// return the PACT plan
Plan plan = new Plan(sink, "Branching of each contract type");
OptimizedPlan oPlan = compileNoStats(plan);
NepheleJobGraphGenerator jobGen = new NepheleJobGraphGenerator();
//Compile plan to verify that no error is thrown
jobGen.compileJobGraph(oPlan);
} catch (Exception e) {
e.printStackTrace();
Assert.fail(e.getMessage());
}
}
@Test
public void testBranchingUnion() {
try {
// construct the plan
FileDataSource source1 = new FileDataSource(new DummyInputFormat(), IN_FILE);
FileDataSource source2 = new FileDataSource(new DummyInputFormat(), IN_FILE);
JoinOperator mat1 = JoinOperator.builder(new DummyMatchStub(), IntValue.class, 0, 0)
.input1(source1)
.input2(source2)
.name("Match 1")
.build();
MapOperator ma1 = MapOperator.builder(new IdentityMap()).input(mat1).name("Map1").build();
ReduceOperator r1 = ReduceOperator.builder(new IdentityReduce(), IntValue.class, 0)
.input(ma1)
.name("Reduce 1")
.build();
ReduceOperator r2 = ReduceOperator.builder(new IdentityReduce(), IntValue.class, 0)
.input(mat1)
.name("Reduce 2")
.build();
MapOperator ma2 = MapOperator.builder(new IdentityMap()).input(mat1).name("Map 2").build();
MapOperator ma3 = MapOperator.builder(new IdentityMap()).input(ma2).name("Map 3").build();
@SuppressWarnings("unchecked")
JoinOperator mat2 = JoinOperator.builder(new DummyMatchStub(), IntValue.class, 0, 0)
.input1(r1, r2, ma2, ma3)
.input2(ma2)
.name("Match 2")
.build();
mat2.setParameter(PactCompiler.HINT_LOCAL_STRATEGY, PactCompiler.HINT_LOCAL_STRATEGY_MERGE);
FileDataSink sink = new FileDataSink(new DummyOutputFormat(), OUT_FILE, mat2);
// return the PACT plan
Plan plan = new Plan(sink, "Branching Union");
OptimizedPlan oPlan = compileNoStats(plan);
NepheleJobGraphGenerator jobGen = new NepheleJobGraphGenerator();
//Compile plan to verify that no error is thrown
jobGen.compileJobGraph(oPlan);
} catch (Exception e) {
e.printStackTrace();
Assert.fail(e.getMessage());
}
}
/**
*
* <pre>
* (SRC A)
* / \
* (SINK A) (SINK B)
* </pre>
*/
@Test
public void testBranchingWithMultipleDataSinksSmall() {
try {
// construct the plan
final String out1Path = "file:///test/1";
final String out2Path = "file:///test/2";
FileDataSource sourceA = new FileDataSource(DummyInputFormat.class, IN_FILE);
FileDataSink sinkA = new FileDataSink(DummyOutputFormat.class, out1Path, sourceA);
FileDataSink sinkB = new FileDataSink(DummyOutputFormat.class, out2Path, sourceA);
List<FileDataSink> sinks = new ArrayList<FileDataSink>();
sinks.add(sinkA);
sinks.add(sinkB);
// return the PACT plan
Plan plan = new Plan(sinks, "Plans With Multiple Data Sinks");
OptimizedPlan oPlan = compileNoStats(plan);
// ---------- check the optimizer plan ----------
// number of sinks
Assert.assertEquals("Wrong number of data sinks.", 2, oPlan.getDataSinks().size());
// sinks contain all sink paths
Set<String> allSinks = new HashSet<String>();
allSinks.add(out1Path);
allSinks.add(out2Path);
for (SinkPlanNode n : oPlan.getDataSinks()) {
String path = ((FileDataSink) n.getSinkNode().getPactContract()).getFilePath();
Assert.assertTrue("Invalid data sink.", allSinks.remove(path));
}
// ---------- compile plan to nephele job graph to verify that no error is thrown ----------
NepheleJobGraphGenerator jobGen = new NepheleJobGraphGenerator();
jobGen.compileJobGraph(oPlan);
} catch (Exception e) {
e.printStackTrace();
Assert.fail(e.getMessage());
}
}
/**
*
* <pre>
* (SINK A) (SINK B)
* / /
* (SRC A) (SRC B)
* </pre>
*/
@Test
public void testSimpleDisjointPlan() {
// construct the plan
final String out1Path = "file:///test/1";
final String out2Path = "file:///test/2";
FileDataSource sourceA = new FileDataSource(DummyInputFormat.class, IN_FILE);
FileDataSource sourceB = new FileDataSource(DummyInputFormat.class, IN_FILE);
FileDataSink sinkA = new FileDataSink(DummyOutputFormat.class, out1Path, sourceA);
FileDataSink sinkB = new FileDataSink(DummyOutputFormat.class, out2Path, sourceB);
List<FileDataSink> sinks = new ArrayList<FileDataSink>();
sinks.add(sinkA);
sinks.add(sinkB);
// return the PACT plan
Plan plan = new Plan(sinks, "Disjoint plan with multiple data sinks");
try {
compileNoStats(plan);
Assert.fail("Plan must not be compilable, it contains disjoint sub-plans.");
}
catch (Exception ex) {
// as expected
}
}
/**
*
* <pre>
* (SINK 3) (SINK 1) (SINK 2) (SINK 4)
* \ / \ /
* (SRC A) (SRC B)
* </pre>
*
* NOTE: this case is currently not caught by the compiler. we should enable the test once it is caught.
*/
// @Test (Deactivated for now because of unsupported feature)
public void testBranchingDisjointPlan() {
// construct the plan
final String out1Path = "file:///test/1";
final String out2Path = "file:///test/2";
final String out3Path = "file:///test/3";
final String out4Path = "file:///test/4";
FileDataSource sourceA = new FileDataSource(DummyInputFormat.class, IN_FILE);
FileDataSource sourceB = new FileDataSource(DummyInputFormat.class, IN_FILE);
FileDataSink sink1 = new FileDataSink(DummyOutputFormat.class, out1Path, sourceA, "1");
FileDataSink sink2 = new FileDataSink(DummyOutputFormat.class, out2Path, sourceB, "2");
FileDataSink sink3 = new FileDataSink(DummyOutputFormat.class, out3Path, sourceA, "3");
FileDataSink sink4 = new FileDataSink(DummyOutputFormat.class, out4Path, sourceB, "4");
List<FileDataSink> sinks = new ArrayList<FileDataSink>();
sinks.add(sink1);
sinks.add(sink2);
sinks.add(sink3);
sinks.add(sink4);
// return the PACT plan
Plan plan = new Plan(sinks, "Disjoint plan with multiple data sinks and branches");
try {
compileNoStats(plan);
Assert.fail("Plan must not be compilable, it contains disjoint sub-plans.");
}
catch (Exception ex) {
// as expected
}
}
@Test
public void testBranchAfterIteration() {
FileDataSource sourceA = new FileDataSource(DummyInputFormat.class, IN_FILE, "Source 2");
BulkIteration iteration = new BulkIteration("Loop");
iteration.setInput(sourceA);
iteration.setMaximumNumberOfIterations(10);
MapOperator mapper = MapOperator.builder(IdentityMap.class).name("Mapper").input(iteration.getPartialSolution()).build();
iteration.setNextPartialSolution(mapper);
FileDataSink sink1 = new FileDataSink(DummyOutputFormat.class, OUT_FILE, iteration, "Sink 1");
MapOperator postMap = MapOperator.builder(IdentityMap.class).name("Post Iteration Mapper")
.input(iteration).build();
FileDataSink sink2 = new FileDataSink(DummyOutputFormat.class, OUT_FILE, postMap, "Sink 2");
List<FileDataSink> sinks = new ArrayList<FileDataSink>();
sinks.add(sink1);
sinks.add(sink2);
Plan plan = new Plan(sinks);
try {
compileNoStats(plan);
}
catch (Exception e) {
e.printStackTrace();
Assert.fail(e.getMessage());
}
}
@Test
public void testBranchBeforeIteration() {
FileDataSource source1 = new FileDataSource(DummyInputFormat.class, IN_FILE, "Source 1");
FileDataSource source2 = new FileDataSource(DummyInputFormat.class, IN_FILE, "Source 2");
BulkIteration iteration = new BulkIteration("Loop");
iteration.setInput(source2);
iteration.setMaximumNumberOfIterations(10);
MapOperator inMap = MapOperator.builder(new IdentityMap())
.input(source1)
.name("In Iteration Map")
.setBroadcastVariable("BC", iteration.getPartialSolution())
.build();
iteration.setNextPartialSolution(inMap);
MapOperator postMap = MapOperator.builder(new IdentityMap())
.input(source1)
.name("Post Iteration Map")
.setBroadcastVariable("BC", iteration)
.build();
FileDataSink sink = new FileDataSink(DummyOutputFormat.class, OUT_FILE, postMap, "Sink");
Plan plan = new Plan(sink);
try {
compileNoStats(plan);
}
catch (Exception e) {
e.printStackTrace();
Assert.fail(e.getMessage());
}
}
/**
* Test to ensure that sourceA is inside as well as outside of the iteration the same
* node.
*
* <pre>
* (SRC A) (SRC B)
* / \ / \
* (SINK 1) (ITERATION) | (SINK 2)
* / \ /
* (SINK 3) (CROSS => NEXT PARTIAL SOLUTION)
* </pre>
*/
@Test
public void testClosure() {
FileDataSource sourceA = new FileDataSource(DummyInputFormat.class, IN_FILE, "Source 1");
FileDataSource sourceB = new FileDataSource(DummyInputFormat.class, IN_FILE, "Source 2");
FileDataSink sink1 = new FileDataSink(DummyOutputFormat.class, OUT_FILE, sourceA, "Sink 1");
FileDataSink sink2 = new FileDataSink(DummyOutputFormat.class, OUT_FILE, sourceB, "Sink 2");
BulkIteration iteration = new BulkIteration("Loop");
iteration.setInput(sourceA);
iteration.setMaximumNumberOfIterations(10);
CrossOperator stepFunction = CrossOperator.builder(DummyCrossStub.class).name("StepFunction").
input1(iteration.getPartialSolution()).
input2(sourceB).
build();
iteration.setNextPartialSolution(stepFunction);
FileDataSink sink3 = new FileDataSink(DummyOutputFormat.class, OUT_FILE, iteration, "Sink 3");
List<FileDataSink> sinks = new ArrayList<FileDataSink>();
sinks.add(sink1);
sinks.add(sink2);
sinks.add(sink3);
Plan plan = new Plan(sinks);
try{
compileNoStats(plan);
}catch(Exception e){
e.printStackTrace();
Assert.fail(e.getMessage());
}
}
/**
* <pre>
* (SRC A) (SRC B) (SRC C)
* / \ / / \
* (SINK 1) (DELTA ITERATION) | (SINK 2)
* / | \ /
* (SINK 3) | (CROSS => NEXT WORKSET)
* | |
* (JOIN => SOLUTION SET DELTA)
* </pre>
*/
@Test
public void testClosureDeltaIteration() {
FileDataSource sourceA = new FileDataSource(DummyInputFormat.class, IN_FILE, "Source 1");
FileDataSource sourceB = new FileDataSource(DummyInputFormat.class, IN_FILE, "Source 2");
FileDataSource sourceC = new FileDataSource(DummyInputFormat.class, IN_FILE, "Source 3");
FileDataSink sink1 = new FileDataSink(DummyOutputFormat.class, OUT_FILE, sourceA, "Sink 1");
FileDataSink sink2 = new FileDataSink(DummyOutputFormat.class, OUT_FILE, sourceC, "Sink 2");
DeltaIteration iteration = new DeltaIteration(0, "Loop");
iteration.setInitialSolutionSet(sourceA);
iteration.setInitialWorkset(sourceB);
iteration.setMaximumNumberOfIterations(10);
CrossOperator nextWorkset = CrossOperator.builder(DummyCrossStub.class).name("Next workset").
input1(iteration.getWorkset()).
input2(sourceC).
build();
JoinOperator solutionSetDelta = JoinOperator.builder(DummyMatchStub.class, LongValue.class,0,0).
name("Next solution set.").
input1(nextWorkset).
input2(iteration.getSolutionSet()).
build();
iteration.setNextWorkset(nextWorkset);
iteration.setSolutionSetDelta(solutionSetDelta);
FileDataSink sink3 = new FileDataSink(DummyOutputFormat.class, OUT_FILE, iteration, "Sink 3");
List<FileDataSink> sinks = new ArrayList<FileDataSink>();
sinks.add(sink1);
sinks.add(sink2);
sinks.add(sink3);
Plan plan = new Plan(sinks);
try{
compileNoStats(plan);
}catch(Exception e){
e.printStackTrace();
Assert.fail(e.getMessage());
}
}
/**
* <prev>
* +----Iteration-------+
* | |
* /---------< >---------join-----< >---sink
* / (Solution)| / |
* / | / |
* /--map-------< >----\ / /--|
* / (Workset)| \ / / |
* src-map | join------/ |
* \ | / |
* \ +-----/--------------+
* \ /
* \--reduce-------/
* <p/>
* </prev>
*/
@Test
public void testDeltaIterationWithStaticInput() {
FileDataSource source = new FileDataSource(DummyInputFormat.class, IN_FILE, "source");
MapOperator mappedSource = MapOperator.builder(IdentityMap.class).
input(source).
name("Identity mapped source").
build();
ReduceOperator reducedSource = ReduceOperator.builder(IdentityReduce.class).
input(source).
name("Identity reduce source").
build();
DeltaIteration iteration = new DeltaIteration(0,"Loop");
iteration.setMaximumNumberOfIterations(10);
iteration.setInitialSolutionSet(source);
iteration.setInitialWorkset(mappedSource);
JoinOperator nextWorkset = JoinOperator.builder(DummyNonPreservingMatchStub.class, IntValue.class, 0,0).
input1(iteration.getWorkset()).
input2(reducedSource).
name("Next work set").
build();
JoinOperator solutionSetDelta = JoinOperator.builder(DummyNonPreservingMatchStub.class, IntValue.class, 0,
0).
input1(iteration.getSolutionSet()).
input2(nextWorkset).
name("Solution set delta").
build();
iteration.setNextWorkset(nextWorkset);
iteration.setSolutionSetDelta(solutionSetDelta);
FileDataSink sink = new FileDataSink(DummyOutputFormat.class, OUT_FILE, iteration, "Iteration sink");
List<FileDataSink> sinks = new ArrayList<FileDataSink>();
sinks.add(sink);
Plan plan = new Plan(sinks);
try{
compileNoStats(plan);
}catch(Exception e){
e.printStackTrace();
Assert.fail(e.getMessage());
}
}
/**
* <prev>
* +---------Iteration-------+
* | |
* /--map--< >----\ |
* / | \ /-------< >---sink
* src-map | join------/ |
* \ | / |
* \ +-----/-------------------+
* \ /
* \--reduce--/
* <p/>
* </prev>
*/
@Test
public void testIterationWithStaticInput() {
FileDataSource source = new FileDataSource(DummyInputFormat.class, IN_FILE, "source");
MapOperator mappedSource = MapOperator.builder(IdentityMap.class).
input(source).
name("Identity mapped source").
build();
ReduceOperator reducedSource = ReduceOperator.builder(IdentityReduce.class).
input(source).
name("Identity reduce source").
build();
BulkIteration iteration = new BulkIteration("Loop");
iteration.setInput(mappedSource);
iteration.setMaximumNumberOfIterations(10);
JoinOperator nextPartialSolution = JoinOperator.builder(DummyMatchStub.class, IntValue.class, 0,0).
input1(iteration.getPartialSolution()).
input2(reducedSource).
name("Next partial solution").
build();
iteration.setNextPartialSolution(nextPartialSolution);
FileDataSink sink = new FileDataSink(DummyOutputFormat.class, OUT_FILE, iteration, "Iteration sink");
List<FileDataSink> sinks = new ArrayList<FileDataSink>();
sinks.add(sink);
Plan plan = new Plan(sinks);
try{
compileNoStats(plan);
}catch(Exception e){
e.printStackTrace();
Assert.fail(e.getMessage());
}
}
@Test
public void testBranchingBroadcastVariable() {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<String> input1 = env.readTextFile(IN_FILE).name("source1");
DataSet<String> input2 = env.readTextFile(IN_FILE).name("source2");
DataSet<String> input3 = env.readTextFile(IN_FILE).name("source3");
DataSet<String> result1 = input1
.map(new IdentityMapper<String>())
.reduceGroup(new Top1GroupReducer<String>())
.withBroadcastSet(input3, "bc");
DataSet<String> result2 = input2
.map(new IdentityMapper<String>())
.reduceGroup(new Top1GroupReducer<String>())
.withBroadcastSet(input3, "bc");
result1.join(result2)
.where(new IdentityKeyExtractor<String>())
.equalTo(new IdentityKeyExtractor<String>())
.with(new JoinFunction<String, String, String>() {
@Override
public String join(String first, String second) {
return null;
}
})
.withBroadcastSet(input3, "bc1")
.withBroadcastSet(input1, "bc2")
.withBroadcastSet(result1, "bc3")
.print();
Plan plan = env.createProgramPlan();
try{
compileNoStats(plan);
}catch(Exception e){
e.printStackTrace();
Assert.fail(e.getMessage());
}
}
@Test
public void testBCVariableClosure() {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<String> input = env.readTextFile(IN_FILE).name("source1");
DataSet<String> reduced = input
.map(new IdentityMapper<String>())
.reduceGroup(new Top1GroupReducer<String>());
DataSet<String> initialSolution = input.map(new IdentityMapper<String>()).withBroadcastSet(reduced, "bc");
IterativeDataSet<String> iteration = initialSolution.iterate(100);
iteration.closeWith(iteration.map(new IdentityMapper<String>()).withBroadcastSet(reduced, "red"))
.print();
Plan plan = env.createProgramPlan();
try{
compileNoStats(plan);
}catch(Exception e){
e.printStackTrace();
Assert.fail(e.getMessage());
}
}
@Test
public void testMultipleIterations() {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<String> input = env.readTextFile(IN_FILE).name("source1");
DataSet<String> reduced = input
.map(new IdentityMapper<String>())
.reduceGroup(new Top1GroupReducer<String>());
IterativeDataSet<String> iteration1 = input.iterate(100);
IterativeDataSet<String> iteration2 = input.iterate(20);
IterativeDataSet<String> iteration3 = input.iterate(17);
iteration1.closeWith(iteration1.map(new IdentityMapper<String>()).withBroadcastSet(reduced, "bc1")).print();
iteration2.closeWith(iteration2.reduceGroup(new Top1GroupReducer<String>()).withBroadcastSet(reduced, "bc2")).print();
iteration3.closeWith(iteration3.reduceGroup(new IdentityGroupReducer<String>()).withBroadcastSet(reduced, "bc3")).print();
Plan plan = env.createProgramPlan();
try{
compileNoStats(plan);
}catch(Exception e){
e.printStackTrace();
Assert.fail(e.getMessage());
}
}
@Test
public void testMultipleIterationsWithClosueBCVars() {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<String> input = env.readTextFile(IN_FILE).name("source1");
IterativeDataSet<String> iteration1 = input.iterate(100);
IterativeDataSet<String> iteration2 = input.iterate(20);
IterativeDataSet<String> iteration3 = input.iterate(17);
iteration1.closeWith(iteration1.map(new IdentityMapper<String>())).print();
iteration2.closeWith(iteration2.reduceGroup(new Top1GroupReducer<String>())).print();
iteration3.closeWith(iteration3.reduceGroup(new IdentityGroupReducer<String>())).print();
Plan plan = env.createProgramPlan();
try{
compileNoStats(plan);
}catch(Exception e){
e.printStackTrace();
Assert.fail(e.getMessage());
}
}
}