/***********************************************************************************************************************
* Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
**********************************************************************************************************************/
package eu.stratosphere.pact.compiler;
import junit.framework.Assert;
import org.junit.Test;
import eu.stratosphere.api.common.Plan;
import eu.stratosphere.api.java.record.operators.FileDataSink;
import eu.stratosphere.api.java.record.operators.FileDataSource;
import eu.stratosphere.api.java.record.operators.JoinOperator;
import eu.stratosphere.api.java.record.operators.MapOperator;
import eu.stratosphere.api.java.record.operators.ReduceOperator;
import eu.stratosphere.compiler.plan.Channel;
import eu.stratosphere.compiler.plan.DualInputPlanNode;
import eu.stratosphere.compiler.plan.OptimizedPlan;
import eu.stratosphere.compiler.plan.PlanNode;
import eu.stratosphere.compiler.plan.SingleInputPlanNode;
import eu.stratosphere.compiler.plan.SinkPlanNode;
import eu.stratosphere.compiler.plantranslate.NepheleJobGraphGenerator;
import eu.stratosphere.pact.compiler.util.DummyInputFormat;
import eu.stratosphere.pact.compiler.util.DummyMatchStub;
import eu.stratosphere.pact.compiler.util.DummyOutputFormat;
import eu.stratosphere.pact.compiler.util.IdentityMap;
import eu.stratosphere.pact.compiler.util.IdentityReduce;
import eu.stratosphere.pact.runtime.shipping.ShipStrategyType;
import eu.stratosphere.pact.runtime.task.util.LocalStrategy;
import eu.stratosphere.types.IntValue;
import eu.stratosphere.util.Visitor;
/**
* Tests in this class:
* <ul>
* <li>Tests that check the correct handling of the properties and strategies in the case where the degree of
* parallelism between tasks is increased or decreased.
* </ul>
*/
@SuppressWarnings("serial")
public class DOPChangeTest extends CompilerTestBase {
/**
* Simple Job: Map -> Reduce -> Map -> Reduce. All functions preserve all fields (hence all properties).
*
* Increases DOP between 1st reduce and 2nd map, so the hash partitioning from 1st reduce is not reusable.
* Expected to re-establish partitioning between reduce and map, via hash, because random is a full network
* transit as well.
*/
@Test
public void checkPropertyHandlingWithIncreasingGlobalParallelism1() {
final int degOfPar = DEFAULT_PARALLELISM;
// construct the plan
FileDataSource source = new FileDataSource(new DummyInputFormat(), IN_FILE, "Source");
source.setDegreeOfParallelism(degOfPar);
MapOperator map1 = MapOperator.builder(new IdentityMap()).name("Map1").build();
map1.setDegreeOfParallelism(degOfPar);
map1.setInput(source);
ReduceOperator reduce1 = ReduceOperator.builder(new IdentityReduce(), IntValue.class, 0).name("Reduce 1").build();
reduce1.setDegreeOfParallelism(degOfPar);
reduce1.setInput(map1);
MapOperator map2 = MapOperator.builder(new IdentityMap()).name("Map2").build();
map2.setDegreeOfParallelism(degOfPar * 2);
map2.setInput(reduce1);
ReduceOperator reduce2 = ReduceOperator.builder(new IdentityReduce(), IntValue.class, 0).name("Reduce 2").build();
reduce2.setDegreeOfParallelism(degOfPar * 2);
reduce2.setInput(map2);
FileDataSink sink = new FileDataSink(new DummyOutputFormat(), OUT_FILE, "Sink");
sink.setDegreeOfParallelism(degOfPar * 2);
sink.setInput(reduce2);
Plan plan = new Plan(sink, "Test Increasing Degree Of Parallelism");
// submit the plan to the compiler
OptimizedPlan oPlan = compileNoStats(plan);
// check the optimized Plan
// when reducer 1 distributes its data across the instances of map2, it needs to employ a local hash method,
// because map2 has twice as many instances and key/value pairs with the same key need to be processed by the same
// mapper respectively reducer
SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
SingleInputPlanNode red2Node = (SingleInputPlanNode) sinkNode.getPredecessor();
SingleInputPlanNode map2Node = (SingleInputPlanNode) red2Node.getPredecessor();
ShipStrategyType mapIn = map2Node.getInput().getShipStrategy();
ShipStrategyType redIn = red2Node.getInput().getShipStrategy();
Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.PARTITION_HASH, mapIn);
Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, redIn);
}
/**
* Simple Job: Map -> Reduce -> Map -> Reduce. All functions preserve all fields (hence all properties).
*
* Increases DOP between 2nd map and 2nd reduce, so the hash partitioning from 1st reduce is not reusable.
* Expected to re-establish partitioning between map and reduce (hash).
*/
@Test
public void checkPropertyHandlingWithIncreasingGlobalParallelism2() {
final int degOfPar = DEFAULT_PARALLELISM;
// construct the plan
FileDataSource source = new FileDataSource(new DummyInputFormat(), IN_FILE, "Source");
source.setDegreeOfParallelism(degOfPar);
MapOperator map1 = MapOperator.builder(new IdentityMap()).name("Map1").build();
map1.setDegreeOfParallelism(degOfPar);
map1.setInput(source);
ReduceOperator reduce1 = ReduceOperator.builder(new IdentityReduce(), IntValue.class, 0).name("Reduce 1").build();
reduce1.setDegreeOfParallelism(degOfPar);
reduce1.setInput(map1);
MapOperator map2 = MapOperator.builder(new IdentityMap()).name("Map2").build();
map2.setDegreeOfParallelism(degOfPar);
map2.setInput(reduce1);
ReduceOperator reduce2 = ReduceOperator.builder(new IdentityReduce(), IntValue.class, 0).name("Reduce 2").build();
reduce2.setDegreeOfParallelism(degOfPar * 2);
reduce2.setInput(map2);
FileDataSink sink = new FileDataSink(new DummyOutputFormat(), OUT_FILE, "Sink");
sink.setDegreeOfParallelism(degOfPar * 2);
sink.setInput(reduce2);
Plan plan = new Plan(sink, "Test Increasing Degree Of Parallelism");
// submit the plan to the compiler
OptimizedPlan oPlan = compileNoStats(plan);
// check the optimized Plan
// when reducer 1 distributes its data across the instances of map2, it needs to employ a local hash method,
// because map2 has twice as many instances and key/value pairs with the same key need to be processed by the same
// mapper respectively reducer
SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
SingleInputPlanNode red2Node = (SingleInputPlanNode) sinkNode.getPredecessor();
SingleInputPlanNode map2Node = (SingleInputPlanNode) red2Node.getPredecessor();
ShipStrategyType mapIn = map2Node.getInput().getShipStrategy();
ShipStrategyType reduceIn = red2Node.getInput().getShipStrategy();
Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, mapIn);
Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.PARTITION_HASH, reduceIn);
}
/**
* Simple Job: Map -> Reduce -> Map -> Reduce. All functions preserve all fields (hence all properties).
*
* Increases DOP between 1st reduce and 2nd map, such that more tasks are on one instance.
* Expected to re-establish partitioning between map and reduce via a local hash.
*/
@Test
public void checkPropertyHandlingWithIncreasingLocalParallelism() {
final int degOfPar = 2 * DEFAULT_PARALLELISM;
// construct the plan
FileDataSource source = new FileDataSource(new DummyInputFormat(), IN_FILE, "Source");
source.setDegreeOfParallelism(degOfPar);
MapOperator map1 = MapOperator.builder(new IdentityMap()).name("Map1").build();
map1.setDegreeOfParallelism(degOfPar);
map1.setInput(source);
ReduceOperator reduce1 = ReduceOperator.builder(new IdentityReduce(), IntValue.class, 0).name("Reduce 1").build();
reduce1.setDegreeOfParallelism(degOfPar);
reduce1.setInput(map1);
MapOperator map2 = MapOperator.builder(new IdentityMap()).name("Map2").build();
map2.setDegreeOfParallelism(degOfPar * 2);
map2.setInput(reduce1);
ReduceOperator reduce2 = ReduceOperator.builder(new IdentityReduce(), IntValue.class, 0).name("Reduce 2").build();
reduce2.setDegreeOfParallelism(degOfPar * 2);
reduce2.setInput(map2);
FileDataSink sink = new FileDataSink(new DummyOutputFormat(), OUT_FILE, "Sink");
sink.setDegreeOfParallelism(degOfPar * 2);
sink.setInput(reduce2);
Plan plan = new Plan(sink, "Test Increasing Degree Of Parallelism");
// submit the plan to the compiler
OptimizedPlan oPlan = compileNoStats(plan);
// check the optimized Plan
// when reducer 1 distributes its data across the instances of map2, it needs to employ a local hash method,
// because map2 has twice as many instances and key/value pairs with the same key need to be processed by the same
// mapper respectively reducer
SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
SingleInputPlanNode red2Node = (SingleInputPlanNode) sinkNode.getPredecessor();
SingleInputPlanNode map2Node = (SingleInputPlanNode) red2Node.getPredecessor();
ShipStrategyType mapIn = map2Node.getInput().getShipStrategy();
ShipStrategyType reduceIn = red2Node.getInput().getShipStrategy();
Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.PARTITION_LOCAL_HASH, mapIn);
Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, reduceIn);
}
@Test
public void checkPropertyHandlingWithDecreasingDegreeOfParallelism()
{
final int degOfPar = DEFAULT_PARALLELISM;
// construct the plan
FileDataSource source = new FileDataSource(new DummyInputFormat(), IN_FILE, "Source");
source.setDegreeOfParallelism(degOfPar * 2);
MapOperator map1 = MapOperator.builder(new IdentityMap()).name("Map1").build();
map1.setDegreeOfParallelism(degOfPar * 2);
map1.setInput(source);
ReduceOperator reduce1 = ReduceOperator.builder(new IdentityReduce(), IntValue.class, 0).name("Reduce 1").build();
reduce1.setDegreeOfParallelism(degOfPar * 2);
reduce1.setInput(map1);
MapOperator map2 = MapOperator.builder(new IdentityMap()).name("Map2").build();
map2.setDegreeOfParallelism(degOfPar);
map2.setInput(reduce1);
ReduceOperator reduce2 = ReduceOperator.builder(new IdentityReduce(), IntValue.class, 0).name("Reduce 2").build();
reduce2.setDegreeOfParallelism(degOfPar);
reduce2.setInput(map2);
FileDataSink sink = new FileDataSink(new DummyOutputFormat(), OUT_FILE, "Sink");
sink.setDegreeOfParallelism(degOfPar);
sink.setInput(reduce2);
Plan plan = new Plan(sink, "Test Increasing Degree Of Parallelism");
// submit the plan to the compiler
OptimizedPlan oPlan = compileNoStats(plan);
// check the optimized Plan
// when reducer 1 distributes its data across the instances of map2, it needs to employ a local hash method,
// because map2 has twice as many instances and key/value pairs with the same key need to be processed by the same
// mapper respectively reducer
SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
SingleInputPlanNode red2Node = (SingleInputPlanNode) sinkNode.getPredecessor();
Assert.assertEquals("The Reduce 2 Node has an invalid local strategy.", LocalStrategy.SORT, red2Node.getInput().getLocalStrategy());
}
/**
* Checks that re-partitioning happens when the inputs of a two-input contract have different DOPs.
*
* Test Plan:
* <pre>
*
* (source) -> reduce -\
* Match -> (sink)
* (source) -> reduce -/
*
* </pre>
*
*/
@Test
public void checkPropertyHandlingWithTwoInputs() {
// construct the plan
FileDataSource sourceA = new FileDataSource(new DummyInputFormat(), IN_FILE);
FileDataSource sourceB = new FileDataSource(new DummyInputFormat(), IN_FILE);
ReduceOperator redA = ReduceOperator.builder(new IdentityReduce(), IntValue.class, 0)
.input(sourceA)
.build();
ReduceOperator redB = ReduceOperator.builder(new IdentityReduce(), IntValue.class, 0)
.input(sourceB)
.build();
JoinOperator mat = JoinOperator.builder(new DummyMatchStub(), IntValue.class, 0, 0)
.input1(redA)
.input2(redB)
.build();
FileDataSink sink = new FileDataSink(new DummyOutputFormat(), OUT_FILE, mat);
sourceA.setDegreeOfParallelism(5);
sourceB.setDegreeOfParallelism(7);
redA.setDegreeOfParallelism(5);
redB.setDegreeOfParallelism(7);
mat.setDegreeOfParallelism(5);
sink.setDegreeOfParallelism(5);
// return the PACT plan
Plan plan = new Plan(sink, "Partition on DoP Change");
OptimizedPlan oPlan = compileNoStats(plan);
NepheleJobGraphGenerator jobGen = new NepheleJobGraphGenerator();
//Compile plan to verify that no error is thrown
jobGen.compileJobGraph(oPlan);
oPlan.accept(new Visitor<PlanNode>() {
@Override
public boolean preVisit(PlanNode visitable) {
if (visitable instanceof DualInputPlanNode) {
DualInputPlanNode node = (DualInputPlanNode) visitable;
Channel c1 = node.getInput1();
Channel c2 = node.getInput2();
Assert.assertEquals("Incompatible shipping strategy chosen for match", ShipStrategyType.FORWARD, c1.getShipStrategy());
Assert.assertEquals("Incompatible shipping strategy chosen for match", ShipStrategyType.PARTITION_HASH, c2.getShipStrategy());
return false;
}
return true;
}
@Override
public void postVisit(PlanNode visitable) {
// DO NOTHING
}
});
}
}