Package eu.stratosphere.pact.compiler

Source Code of eu.stratosphere.pact.compiler.BranchingPlansCompilerTest

/***********************************************************************************************************************
* Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
**********************************************************************************************************************/

package eu.stratosphere.pact.compiler;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import junit.framework.Assert;

import org.junit.Test;

import eu.stratosphere.api.common.Plan;
import eu.stratosphere.api.java.DataSet;
import eu.stratosphere.api.java.ExecutionEnvironment;
import eu.stratosphere.api.java.IterativeDataSet;
import eu.stratosphere.api.java.functions.JoinFunction;
import eu.stratosphere.api.java.record.operators.BulkIteration;
import eu.stratosphere.api.java.record.operators.DeltaIteration;
import eu.stratosphere.api.java.record.operators.FileDataSink;
import eu.stratosphere.api.java.record.operators.FileDataSource;
import eu.stratosphere.api.java.record.operators.CoGroupOperator;
import eu.stratosphere.api.java.record.operators.CrossOperator;
import eu.stratosphere.api.java.record.operators.JoinOperator;
import eu.stratosphere.api.java.record.operators.MapOperator;
import eu.stratosphere.api.java.record.operators.ReduceOperator;
import eu.stratosphere.compiler.PactCompiler;
import eu.stratosphere.compiler.plan.OptimizedPlan;
import eu.stratosphere.compiler.plan.SinkPlanNode;
import eu.stratosphere.compiler.plantranslate.NepheleJobGraphGenerator;
import eu.stratosphere.pact.compiler.testfunctions.IdentityGroupReducer;
import eu.stratosphere.pact.compiler.testfunctions.IdentityKeyExtractor;
import eu.stratosphere.pact.compiler.testfunctions.IdentityMapper;
import eu.stratosphere.pact.compiler.testfunctions.Top1GroupReducer;
import eu.stratosphere.pact.compiler.util.DummyCoGroupStub;
import eu.stratosphere.pact.compiler.util.DummyCrossStub;
import eu.stratosphere.pact.compiler.util.DummyInputFormat;
import eu.stratosphere.pact.compiler.util.DummyMatchStub;
import eu.stratosphere.pact.compiler.util.DummyNonPreservingMatchStub;
import eu.stratosphere.pact.compiler.util.DummyOutputFormat;
import eu.stratosphere.pact.compiler.util.IdentityMap;
import eu.stratosphere.pact.compiler.util.IdentityReduce;
import eu.stratosphere.types.IntValue;
import eu.stratosphere.types.LongValue;

@SuppressWarnings("serial")
public class BranchingPlansCompilerTest extends CompilerTestBase {
 
 
  @Test
  public void testCostComputationWithMultipleDataSinks() {
    final int SINKS = 5;
 
    try {
      List<FileDataSink> sinks = new ArrayList<FileDataSink>();
 
      // construct the plan
      final String out1Path = "file:///test/1";
      final String out2Path = "file:///test/2";
 
      FileDataSource sourceA = new FileDataSource(DummyInputFormat.class, IN_FILE);
 
      MapOperator mapA = MapOperator.builder(IdentityMap.class).input(sourceA).name("Map A").build();
      MapOperator mapC = MapOperator.builder(IdentityMap.class).input(mapA).name("Map C").build();
 
      FileDataSink[] sinkA = new FileDataSink[SINKS];
      FileDataSink[] sinkB = new FileDataSink[SINKS];
      for (int sink = 0; sink < SINKS; sink++) {
        sinkA[sink] = new FileDataSink(DummyOutputFormat.class, out1Path, mapA, "Sink A:" + sink);
        sinks.add(sinkA[sink]);
 
        sinkB[sink] = new FileDataSink(DummyOutputFormat.class, out2Path, mapC, "Sink B:" + sink);
        sinks.add(sinkB[sink]);
      }
 
      // return the PACT plan
      Plan plan = new Plan(sinks, "Plans With Multiple Data Sinks");
 
      OptimizedPlan oPlan = compileNoStats(plan);
 
      // ---------- compile plan to nephele job graph to verify that no error is thrown ----------
 
      NepheleJobGraphGenerator jobGen = new NepheleJobGraphGenerator();
      jobGen.compileJobGraph(oPlan);
    } catch (Exception e) {
      e.printStackTrace();
      Assert.fail(e.getMessage());
    }
  }


  /**
   *
   * <pre>
   *                (SRC A) 
   *                   |
   *                (MAP A)
   *             /         \  
   *          (MAP B)      (MAP C)
   *           /           /     \
   *        (SINK A)    (SINK B)  (SINK C)
   * </pre>
   */
  @Test
  public void testBranchingWithMultipleDataSinks2() {
    try {
      // construct the plan
      final String out1Path = "file:///test/1";
      final String out2Path = "file:///test/2";
      final String out3Path = "file:///test/3";
 
      FileDataSource sourceA = new FileDataSource(DummyInputFormat.class, IN_FILE);

      MapOperator mapA = MapOperator.builder(IdentityMap.class).input(sourceA).name("Map A").build();
      MapOperator mapB = MapOperator.builder(IdentityMap.class).input(mapA).name("Map B").build();
      MapOperator mapC = MapOperator.builder(IdentityMap.class).input(mapA).name("Map C").build();
     
      FileDataSink sinkA = new FileDataSink(DummyOutputFormat.class, out1Path, mapB, "Sink A");
      FileDataSink sinkB = new FileDataSink(DummyOutputFormat.class, out2Path, mapC, "Sink B");
      FileDataSink sinkC = new FileDataSink(DummyOutputFormat.class, out3Path, mapC, "Sink C");
     
      List<FileDataSink> sinks = new ArrayList<FileDataSink>();
      sinks.add(sinkA);
      sinks.add(sinkB);
      sinks.add(sinkC);
     
      // return the PACT plan
      Plan plan = new Plan(sinks, "Plans With Multiple Data Sinks");
     
      OptimizedPlan oPlan = compileNoStats(plan);
     
      // ---------- check the optimizer plan ----------
     
      // number of sinks
      Assert.assertEquals("Wrong number of data sinks.", 3, oPlan.getDataSinks().size());
     
      // sinks contain all sink paths
      Set<String> allSinks = new HashSet<String>();
      allSinks.add(out1Path);
      allSinks.add(out2Path);
      allSinks.add(out3Path);
     
      for (SinkPlanNode n : oPlan.getDataSinks()) {
        String path = ((FileDataSink) n.getSinkNode().getPactContract()).getFilePath();
        Assert.assertTrue("Invalid data sink.", allSinks.remove(path));
      }
     
      // ---------- compile plan to nephele job graph to verify that no error is thrown ----------
     
      NepheleJobGraphGenerator jobGen = new NepheleJobGraphGenerator();
      jobGen.compileJobGraph(oPlan);
    } catch (Exception e) {
      e.printStackTrace();
      Assert.fail(e.getMessage());
    }
  }
 

  /**
   * <pre>
   *                              SINK
   *                               |
   *                            COGROUP
   *                        +---/    \----+
   *                       /               \
   *                      /             MATCH10
   *                     /               |    \
   *                    /                |  MATCH9
   *                MATCH5               |  |   \
   *                |   \                |  | MATCH8
   *                | MATCH4             |  |  |   \
   *                |  |   \             |  |  | MATCH7
   *                |  | MATCH3          |  |  |  |   \
   *                |  |  |   \          |  |  |  | MATCH6
   *                |  |  | MATCH2       |  |  |  |  |  |
   *                |  |  |  |   \       +--+--+--+--+--+
   *                |  |  |  | MATCH1            MAP
   *                \  |  |  |  |  | /-----------/
   *                (DATA SOURCE ONE)
   * </pre>
   */
  @Test
  public void testBranchingSourceMultipleTimes() {
    try {
      // construct the plan
      FileDataSource sourceA = new FileDataSource(new DummyInputFormat(), IN_FILE);
     
      JoinOperator mat1 = JoinOperator.builder(new DummyMatchStub(), IntValue.class, 0, 0)
        .input1(sourceA)
        .input2(sourceA)
        .build();
      JoinOperator mat2 = JoinOperator.builder(new DummyMatchStub(), IntValue.class, 0, 0)
        .input1(sourceA)
        .input2(mat1)
        .build();
      JoinOperator mat3 = JoinOperator.builder(new DummyMatchStub(), IntValue.class, 0, 0)
        .input1(sourceA)
        .input2(mat2)
        .build();
      JoinOperator mat4 = JoinOperator.builder(new DummyMatchStub(), IntValue.class, 0, 0)
        .input1(sourceA)
        .input2(mat3)
        .build();
      JoinOperator mat5 = JoinOperator.builder(new DummyMatchStub(), IntValue.class, 0, 0)
        .input1(sourceA)
        .input2(mat4)
        .build();
     
      MapOperator ma = MapOperator.builder(new IdentityMap()).input(sourceA).build();
     
      JoinOperator mat6 = JoinOperator.builder(new DummyMatchStub(), IntValue.class, 0, 0)
        .input1(ma)
        .input2(ma)
        .build();
      JoinOperator mat7 = JoinOperator.builder(new DummyMatchStub(), IntValue.class, 0, 0)
        .input1(ma)
        .input2(mat6)
        .build();
      JoinOperator mat8 = JoinOperator.builder(new DummyMatchStub(), IntValue.class, 0, 0)
        .input1(ma)
        .input2(mat7)
        .build();
      JoinOperator mat9 = JoinOperator.builder(new DummyMatchStub(), IntValue.class, 0, 0)
        .input1(ma)
        .input2(mat8)
        .build();
      JoinOperator mat10 = JoinOperator.builder(new DummyMatchStub(), IntValue.class, 0, 0)
        .input1(ma)
        .input2(mat9)
        .build();
     
      CoGroupOperator co = CoGroupOperator.builder(new DummyCoGroupStub(), IntValue.class, 0, 0)
        .input1(mat5)
        .input2(mat10)
        .build();
 
      FileDataSink sink = new FileDataSink(new DummyOutputFormat(), OUT_FILE, co);
     
      // return the PACT plan
      Plan plan = new Plan(sink, "Branching Source Multiple Times");
     
      OptimizedPlan oPlan = compileNoStats(plan);
     
      NepheleJobGraphGenerator jobGen = new NepheleJobGraphGenerator();
     
      //Compile plan to verify that no error is thrown
      jobGen.compileJobGraph(oPlan);
    } catch (Exception e) {
      e.printStackTrace();
      Assert.fail(e.getMessage());
    }
  }
 
  /**
   *
   * <pre>

   *              (SINK A)
   *                  |    (SINK B)    (SINK C)
   *                CROSS    /          /
   *               /     \   |  +------+
   *              /       \  | /
   *          REDUCE      MATCH2
   *             |    +---/    \
   *              \  /          |
   *               MAP          |
   *                |           |
   *             COGROUP      MATCH1
   *             /     \     /     \
   *        (SRC A)    (SRC B)    (SRC C)
   * </pre>
   */
  @Test
  public void testBranchingWithMultipleDataSinks() {
    try {
      // construct the plan
      final String out1Path = "file:///test/1";
      final String out2Path = "file:///test/2";
      final String out3Path = "file:///test/3";
 
      FileDataSource sourceA = new FileDataSource(new DummyInputFormat(), IN_FILE);
      FileDataSource sourceB = new FileDataSource(new DummyInputFormat(), IN_FILE);
      FileDataSource sourceC = new FileDataSource(new DummyInputFormat(), IN_FILE);
     
      CoGroupOperator co = CoGroupOperator.builder(new DummyCoGroupStub(), IntValue.class, 0,0)
        .input1(sourceA)
        .input2(sourceB)
        .build();
      MapOperator ma = MapOperator.builder(new IdentityMap()).input(co).build();
      JoinOperator mat1 = JoinOperator.builder(new DummyMatchStub(), IntValue.class, 0, 0)
        .input1(sourceB)
        .input2(sourceC)
        .build();
      JoinOperator mat2 = JoinOperator.builder(new DummyMatchStub(), IntValue.class, 0, 0)
        .input1(ma)
        .input2(mat1)
        .build();
      ReduceOperator r = ReduceOperator.builder(new IdentityReduce(), IntValue.class, 0)
        .input(ma)
        .build();
      CrossOperator c = CrossOperator.builder(new DummyCrossStub())
        .input1(r)
        .input2(mat2)
        .build();
     
      FileDataSink sinkA = new FileDataSink(new DummyOutputFormat(), out1Path, c);
      FileDataSink sinkB = new FileDataSink(new DummyOutputFormat(), out2Path, mat2);
      FileDataSink sinkC = new FileDataSink(new DummyOutputFormat(), out3Path, mat2);
     
      List<FileDataSink> sinks = new ArrayList<FileDataSink>();
      sinks.add(sinkA);
      sinks.add(sinkB);
      sinks.add(sinkC);
     
      // return the PACT plan
      Plan plan = new Plan(sinks, "Branching Plans With Multiple Data Sinks");
     
      OptimizedPlan oPlan = compileNoStats(plan);
     
      // ---------- check the optimizer plan ----------
     
      // number of sinks
      Assert.assertEquals("Wrong number of data sinks.", 3, oPlan.getDataSinks().size());
     
      // sinks contain all sink paths
      Set<String> allSinks = new HashSet<String>();
      allSinks.add(out1Path);
      allSinks.add(out2Path);
      allSinks.add(out3Path);
     
      for (SinkPlanNode n : oPlan.getDataSinks()) {
        String path = ((FileDataSink) n.getSinkNode().getPactContract()).getFilePath();
        Assert.assertTrue("Invalid data sink.", allSinks.remove(path));
      }
     
      // ---------- compile plan to nephele job graph to verify that no error is thrown ----------
     
      NepheleJobGraphGenerator jobGen = new NepheleJobGraphGenerator();
      jobGen.compileJobGraph(oPlan);
    } catch (Exception e) {
      e.printStackTrace();
      Assert.fail(e.getMessage());
    }
  }
 
  @Test
  public void testBranchEachContractType() {
    try {
      // construct the plan
      FileDataSource sourceA = new FileDataSource(new DummyInputFormat(), "file:///test/file1", "Source A");
      FileDataSource sourceB = new FileDataSource(new DummyInputFormat(), "file:///test/file2", "Source B");
      FileDataSource sourceC = new FileDataSource(new DummyInputFormat(), "file:///test/file3", "Source C");
     
      MapOperator map1 = MapOperator.builder(new IdentityMap()).input(sourceA).name("Map 1").build();
     
      ReduceOperator reduce1 = ReduceOperator.builder(new IdentityReduce(), IntValue.class, 0)
        .input(map1)
        .name("Reduce 1")
        .build();
     
      @SuppressWarnings("unchecked")
      JoinOperator match1 = JoinOperator.builder(new DummyMatchStub(), IntValue.class, 0, 0)
        .input1(sourceB, sourceB, sourceC)
        .input2(sourceC)
        .name("Match 1")
        .build();
      ;
      CoGroupOperator cogroup1 = CoGroupOperator.builder(new DummyCoGroupStub(), IntValue.class, 0,0)
        .input1(sourceA)
        .input2(sourceB)
        .name("CoGroup 1")
        .build();
     
      CrossOperator cross1 = CrossOperator.builder(new DummyCrossStub())
        .input1(reduce1)
        .input2(cogroup1)
        .name("Cross 1")
        .build();
     
     
      CoGroupOperator cogroup2 = CoGroupOperator.builder(new DummyCoGroupStub(), IntValue.class, 0,0)
        .input1(cross1)
        .input2(cross1)
        .name("CoGroup 2")
        .build();
     
      CoGroupOperator cogroup3 = CoGroupOperator.builder(new DummyCoGroupStub(), IntValue.class, 0,0)
        .input1(map1)
        .input2(match1)
        .name("CoGroup 3")
        .build();
     
     
      MapOperator map2 = MapOperator.builder(new IdentityMap()).input(cogroup3).name("Map 2").build();
     
      CoGroupOperator cogroup4 = CoGroupOperator.builder(new DummyCoGroupStub(), IntValue.class, 0,0)
        .input1(map2)
        .input2(match1)
        .name("CoGroup 4")
        .build();
     
      CoGroupOperator cogroup5 = CoGroupOperator.builder(new DummyCoGroupStub(), IntValue.class, 0,0)
        .input1(cogroup2)
        .input2(cogroup1)
        .name("CoGroup 5")
        .build();
     
      CoGroupOperator cogroup6 = CoGroupOperator.builder(new DummyCoGroupStub(), IntValue.class, 0,0)
        .input1(reduce1)
        .input2(cogroup4)
        .name("CoGroup 6")
        .build();
     
      CoGroupOperator cogroup7 = CoGroupOperator.builder(new DummyCoGroupStub(), IntValue.class, 0,0)
        .input1(cogroup5)
        .input2(cogroup6)
        .name("CoGroup 7")
        .build();
     
      FileDataSink sink = new FileDataSink(new DummyOutputFormat(), OUT_FILE, cogroup7);
  //    sink.addInput(sourceA);
  //    sink.addInput(co3);
  //    sink.addInput(co4);
  //    sink.addInput(co1);
     
      // return the PACT plan
      Plan plan = new Plan(sink, "Branching of each contract type");
     
      OptimizedPlan oPlan = compileNoStats(plan);
     
      NepheleJobGraphGenerator jobGen = new NepheleJobGraphGenerator();
     
      //Compile plan to verify that no error is thrown
      jobGen.compileJobGraph(oPlan);
    } catch (Exception e) {
      e.printStackTrace();
      Assert.fail(e.getMessage());
    }
  }
 

  @Test
  public void testBranchingUnion() {
    try {
      // construct the plan
      FileDataSource source1 = new FileDataSource(new DummyInputFormat(), IN_FILE);
      FileDataSource source2 = new FileDataSource(new DummyInputFormat(), IN_FILE);
     
      JoinOperator mat1 = JoinOperator.builder(new DummyMatchStub(), IntValue.class, 0, 0)
        .input1(source1)
        .input2(source2)
        .name("Match 1")
        .build();
     
      MapOperator ma1 = MapOperator.builder(new IdentityMap()).input(mat1).name("Map1").build();
     
      ReduceOperator r1 = ReduceOperator.builder(new IdentityReduce(), IntValue.class, 0)
        .input(ma1)
        .name("Reduce 1")
        .build();
     
      ReduceOperator r2 = ReduceOperator.builder(new IdentityReduce(), IntValue.class, 0)
        .input(mat1)
        .name("Reduce 2")
        .build();
     
      MapOperator ma2 = MapOperator.builder(new IdentityMap()).input(mat1).name("Map 2").build();
     
      MapOperator ma3 = MapOperator.builder(new IdentityMap()).input(ma2).name("Map 3").build();
     
      @SuppressWarnings("unchecked")
      JoinOperator mat2 = JoinOperator.builder(new DummyMatchStub(), IntValue.class, 0, 0)
        .input1(r1, r2, ma2, ma3)
        .input2(ma2)
        .name("Match 2")
        .build();
      mat2.setParameter(PactCompiler.HINT_LOCAL_STRATEGY, PactCompiler.HINT_LOCAL_STRATEGY_MERGE);
     
      FileDataSink sink = new FileDataSink(new DummyOutputFormat(), OUT_FILE, mat2);
     
     
      // return the PACT plan
      Plan plan = new Plan(sink, "Branching Union");
     
      OptimizedPlan oPlan = compileNoStats(plan);
     
      NepheleJobGraphGenerator jobGen = new NepheleJobGraphGenerator();
     
      //Compile plan to verify that no error is thrown
      jobGen.compileJobGraph(oPlan);
    } catch (Exception e) {
      e.printStackTrace();
      Assert.fail(e.getMessage());
    }
  }
 
  /**
   *
   * <pre>
   *             (SRC A)    
   *             /     \     
   *        (SINK A)    (SINK B)
   * </pre>
   */
  @Test
  public void testBranchingWithMultipleDataSinksSmall() {
    try {
      // construct the plan
      final String out1Path = "file:///test/1";
      final String out2Path = "file:///test/2";
 
      FileDataSource sourceA = new FileDataSource(DummyInputFormat.class, IN_FILE);
     
      FileDataSink sinkA = new FileDataSink(DummyOutputFormat.class, out1Path, sourceA);
      FileDataSink sinkB = new FileDataSink(DummyOutputFormat.class, out2Path, sourceA);
     
      List<FileDataSink> sinks = new ArrayList<FileDataSink>();
      sinks.add(sinkA);
      sinks.add(sinkB);
     
      // return the PACT plan
      Plan plan = new Plan(sinks, "Plans With Multiple Data Sinks");
     
      OptimizedPlan oPlan = compileNoStats(plan);
     
      // ---------- check the optimizer plan ----------
     
      // number of sinks
      Assert.assertEquals("Wrong number of data sinks.", 2, oPlan.getDataSinks().size());
     
      // sinks contain all sink paths
      Set<String> allSinks = new HashSet<String>();
      allSinks.add(out1Path);
      allSinks.add(out2Path);
     
      for (SinkPlanNode n : oPlan.getDataSinks()) {
        String path = ((FileDataSink) n.getSinkNode().getPactContract()).getFilePath();
        Assert.assertTrue("Invalid data sink.", allSinks.remove(path));
      }
     
      // ---------- compile plan to nephele job graph to verify that no error is thrown ----------
     
      NepheleJobGraphGenerator jobGen = new NepheleJobGraphGenerator();
      jobGen.compileJobGraph(oPlan);
    } catch (Exception e) {
      e.printStackTrace();
      Assert.fail(e.getMessage());
    }
  }
 
  /**
   *
   * <pre>
   *           (SINK A)    (SINK B)
   *             /           /
   *         (SRC A)     (SRC B)
   * </pre>
   */
  @Test
  public void testSimpleDisjointPlan() {
    // construct the plan
    final String out1Path = "file:///test/1";
    final String out2Path = "file:///test/2";

    FileDataSource sourceA = new FileDataSource(DummyInputFormat.class, IN_FILE);
    FileDataSource sourceB = new FileDataSource(DummyInputFormat.class, IN_FILE);
   
    FileDataSink sinkA = new FileDataSink(DummyOutputFormat.class, out1Path, sourceA);
    FileDataSink sinkB = new FileDataSink(DummyOutputFormat.class, out2Path, sourceB);
   
    List<FileDataSink> sinks = new ArrayList<FileDataSink>();
    sinks.add(sinkA);
    sinks.add(sinkB);
   
    // return the PACT plan
    Plan plan = new Plan(sinks, "Disjoint plan with multiple data sinks");
   
    try {
      compileNoStats(plan);
      Assert.fail("Plan must not be compilable, it contains disjoint sub-plans.");
    }
    catch (Exception ex) {
      // as expected
    }
  }
 
  /**
   *
   * <pre>
   *     (SINK 3) (SINK 1)   (SINK 2) (SINK 4)
   *         \     /             \     /
   *         (SRC A)             (SRC B)
   * </pre>
   *
   * NOTE: this case is currently not caught by the compiler. we should enable the test once it is caught.
   */
//  @Test (Deactivated for now because of unsupported feature)
  public void testBranchingDisjointPlan() {
    // construct the plan
    final String out1Path = "file:///test/1";
    final String out2Path = "file:///test/2";
    final String out3Path = "file:///test/3";
    final String out4Path = "file:///test/4";

    FileDataSource sourceA = new FileDataSource(DummyInputFormat.class, IN_FILE);
    FileDataSource sourceB = new FileDataSource(DummyInputFormat.class, IN_FILE);
   
    FileDataSink sink1 = new FileDataSink(DummyOutputFormat.class, out1Path, sourceA, "1");
    FileDataSink sink2 = new FileDataSink(DummyOutputFormat.class, out2Path, sourceB, "2");
    FileDataSink sink3 = new FileDataSink(DummyOutputFormat.class, out3Path, sourceA, "3");
    FileDataSink sink4 = new FileDataSink(DummyOutputFormat.class, out4Path, sourceB, "4");
   
   
    List<FileDataSink> sinks = new ArrayList<FileDataSink>();
    sinks.add(sink1);
    sinks.add(sink2);
    sinks.add(sink3);
    sinks.add(sink4);
   
    // return the PACT plan
    Plan plan = new Plan(sinks, "Disjoint plan with multiple data sinks and branches");
   
    try {
      compileNoStats(plan);
      Assert.fail("Plan must not be compilable, it contains disjoint sub-plans.");
    }
    catch (Exception ex) {
      // as expected
    }
  }
 
  @Test
  public void testBranchAfterIteration() {
    FileDataSource sourceA = new FileDataSource(DummyInputFormat.class, IN_FILE, "Source 2");
   
    BulkIteration iteration = new BulkIteration("Loop");
    iteration.setInput(sourceA);
    iteration.setMaximumNumberOfIterations(10);
   
    MapOperator mapper = MapOperator.builder(IdentityMap.class).name("Mapper").input(iteration.getPartialSolution()).build();
    iteration.setNextPartialSolution(mapper);
   
    FileDataSink sink1 = new FileDataSink(DummyOutputFormat.class, OUT_FILE, iteration, "Sink 1");
   
    MapOperator postMap = MapOperator.builder(IdentityMap.class).name("Post Iteration Mapper")
        .input(iteration).build();
   
    FileDataSink sink2 = new FileDataSink(DummyOutputFormat.class, OUT_FILE, postMap, "Sink 2");
   
    List<FileDataSink> sinks = new ArrayList<FileDataSink>();
    sinks.add(sink1);
    sinks.add(sink2);
   
    Plan plan = new Plan(sinks);
   
    try {
      compileNoStats(plan);
    }
    catch (Exception e) {
      e.printStackTrace();
      Assert.fail(e.getMessage());
    }
  }
 
  @Test
  public void testBranchBeforeIteration() {
    FileDataSource source1 = new FileDataSource(DummyInputFormat.class, IN_FILE, "Source 1");
    FileDataSource source2 = new FileDataSource(DummyInputFormat.class, IN_FILE, "Source 2");
   
    BulkIteration iteration = new BulkIteration("Loop");
    iteration.setInput(source2);
    iteration.setMaximumNumberOfIterations(10);
   
    MapOperator inMap = MapOperator.builder(new IdentityMap())
                               .input(source1)
                               .name("In Iteration Map")
                               .setBroadcastVariable("BC", iteration.getPartialSolution())
                               .build();
   
    iteration.setNextPartialSolution(inMap);
   
    MapOperator postMap = MapOperator.builder(new IdentityMap())
                     .input(source1)
                     .name("Post Iteration Map")
                     .setBroadcastVariable("BC", iteration)
                     .build();
   
    FileDataSink sink = new FileDataSink(DummyOutputFormat.class, OUT_FILE, postMap, "Sink");
   
    Plan plan = new Plan(sink);
   
    try {
      compileNoStats(plan);
    }
    catch (Exception e) {
      e.printStackTrace();
      Assert.fail(e.getMessage());
    }
  }

  /**
   * Test to ensure that sourceA is inside as well as outside of the iteration the same
   * node.
   *
   * <pre>
   *       (SRC A)               (SRC B)
   *      /       \             /       \
   *  (SINK 1)   (ITERATION)    |     (SINK 2)
   *             /        \     /
   *         (SINK 3)     (CROSS => NEXT PARTIAL SOLUTION)
   * </pre>
   */
  @Test
  public void testClosure() {
    FileDataSource sourceA = new FileDataSource(DummyInputFormat.class, IN_FILE, "Source 1");
    FileDataSource sourceB = new FileDataSource(DummyInputFormat.class, IN_FILE, "Source 2");

    FileDataSink sink1 = new FileDataSink(DummyOutputFormat.class, OUT_FILE, sourceA, "Sink 1");
    FileDataSink sink2 = new FileDataSink(DummyOutputFormat.class, OUT_FILE, sourceB, "Sink 2");

    BulkIteration iteration = new BulkIteration("Loop");
    iteration.setInput(sourceA);
    iteration.setMaximumNumberOfIterations(10);

    CrossOperator stepFunction = CrossOperator.builder(DummyCrossStub.class).name("StepFunction").
        input1(iteration.getPartialSolution()).
        input2(sourceB).
        build();

    iteration.setNextPartialSolution(stepFunction);

    FileDataSink sink3 = new FileDataSink(DummyOutputFormat.class, OUT_FILE, iteration, "Sink 3");

    List<FileDataSink> sinks = new ArrayList<FileDataSink>();
    sinks.add(sink1);
    sinks.add(sink2);
    sinks.add(sink3);

    Plan plan = new Plan(sinks);

    try{
      compileNoStats(plan);
    }catch(Exception e){
      e.printStackTrace();
      Assert.fail(e.getMessage());
    }
  }

  /**
   * <pre>
   *       (SRC A)         (SRC B)          (SRC C)
   *      /       \       /                /       \
   *  (SINK 1) (DELTA ITERATION)          |     (SINK 2)
   *             /    |   \               /
   *         (SINK 3) |   (CROSS => NEXT WORKSET)
   *                  |             |
   *                (JOIN => SOLUTION SET DELTA)
   * </pre>
   */
  @Test
  public void testClosureDeltaIteration() {
    FileDataSource sourceA = new FileDataSource(DummyInputFormat.class, IN_FILE, "Source 1");
    FileDataSource sourceB = new FileDataSource(DummyInputFormat.class, IN_FILE, "Source 2");
    FileDataSource sourceC = new FileDataSource(DummyInputFormat.class, IN_FILE, "Source 3");

    FileDataSink sink1 = new FileDataSink(DummyOutputFormat.class, OUT_FILE, sourceA, "Sink 1");
    FileDataSink sink2 = new FileDataSink(DummyOutputFormat.class, OUT_FILE, sourceC, "Sink 2");

    DeltaIteration iteration = new DeltaIteration(0, "Loop");
    iteration.setInitialSolutionSet(sourceA);
    iteration.setInitialWorkset(sourceB);
    iteration.setMaximumNumberOfIterations(10);

    CrossOperator nextWorkset = CrossOperator.builder(DummyCrossStub.class).name("Next workset").
        input1(iteration.getWorkset()).
        input2(sourceC).
        build();

    JoinOperator solutionSetDelta = JoinOperator.builder(DummyMatchStub.class, LongValue.class,0,0).
        name("Next solution set.").
        input1(nextWorkset).
        input2(iteration.getSolutionSet()).
        build();

    iteration.setNextWorkset(nextWorkset);
    iteration.setSolutionSetDelta(solutionSetDelta);

    FileDataSink sink3 = new FileDataSink(DummyOutputFormat.class, OUT_FILE, iteration, "Sink 3");

    List<FileDataSink> sinks = new ArrayList<FileDataSink>();
    sinks.add(sink1);
    sinks.add(sink2);
    sinks.add(sink3);

    Plan plan = new Plan(sinks);

    try{
      compileNoStats(plan);
    }catch(Exception e){
      e.printStackTrace();
      Assert.fail(e.getMessage());
    }
  }

  /**
   * <prev>
   *                  +----Iteration-------+
   *                  |                    |
   *       /---------< >---------join-----< >---sink
   *      / (Solution)|           /        |
   *     /            |          /         |
   *    /--map-------< >----\   /       /--|
   *   /     (Workset)|      \ /       /   |
   * src-map          |     join------/    |
   *   \           |      /             |
   *    \             +-----/--------------+
   *     \                 /
   *      \--reduce-------/
   * <p/>
   * </prev>
   */
  @Test
  public void testDeltaIterationWithStaticInput() {
    FileDataSource source = new FileDataSource(DummyInputFormat.class, IN_FILE, "source");

    MapOperator mappedSource = MapOperator.builder(IdentityMap.class).
        input(source).
        name("Identity mapped source").
        build();

    ReduceOperator reducedSource = ReduceOperator.builder(IdentityReduce.class).
        input(source).
        name("Identity reduce source").
        build();

    DeltaIteration iteration = new DeltaIteration(0,"Loop");
    iteration.setMaximumNumberOfIterations(10);
    iteration.setInitialSolutionSet(source);
    iteration.setInitialWorkset(mappedSource);

    JoinOperator nextWorkset = JoinOperator.builder(DummyNonPreservingMatchStub.class, IntValue.class, 0,0).
        input1(iteration.getWorkset()).
        input2(reducedSource).
        name("Next work set").
        build();

    JoinOperator solutionSetDelta = JoinOperator.builder(DummyNonPreservingMatchStub.class, IntValue.class, 0,
        0).
        input1(iteration.getSolutionSet()).
        input2(nextWorkset).
        name("Solution set delta").
        build();

    iteration.setNextWorkset(nextWorkset);
    iteration.setSolutionSetDelta(solutionSetDelta);

    FileDataSink sink = new FileDataSink(DummyOutputFormat.class, OUT_FILE, iteration, "Iteration sink");
    List<FileDataSink> sinks = new ArrayList<FileDataSink>();
    sinks.add(sink);

    Plan plan = new Plan(sinks);

    try{
      compileNoStats(plan);
    }catch(Exception e){
      e.printStackTrace();
      Assert.fail(e.getMessage());
    }
  }

  /**
   * <prev>
   *             +---------Iteration-------+
   *             |                         |
   *    /--map--< >----\                   |
   *   /         |      \         /-------< >---sink
   * src-map     |     join------/         |
   *   \         |      /                  |
   *    \        +-----/-------------------+
   *     \            /
   *      \--reduce--/
   * <p/>
   * </prev>
   */
  @Test
  public void testIterationWithStaticInput() {
    FileDataSource source = new FileDataSource(DummyInputFormat.class, IN_FILE, "source");

    MapOperator mappedSource = MapOperator.builder(IdentityMap.class).
        input(source).
        name("Identity mapped source").
        build();

    ReduceOperator reducedSource = ReduceOperator.builder(IdentityReduce.class).
        input(source).
        name("Identity reduce source").
        build();

    BulkIteration iteration = new BulkIteration("Loop");
    iteration.setInput(mappedSource);
    iteration.setMaximumNumberOfIterations(10);

    JoinOperator nextPartialSolution = JoinOperator.builder(DummyMatchStub.class, IntValue.class, 0,0).
        input1(iteration.getPartialSolution()).
        input2(reducedSource).
        name("Next partial solution").
        build();

    iteration.setNextPartialSolution(nextPartialSolution);

    FileDataSink sink = new FileDataSink(DummyOutputFormat.class, OUT_FILE, iteration, "Iteration sink");
    List<FileDataSink> sinks = new ArrayList<FileDataSink>();
    sinks.add(sink);

    Plan plan = new Plan(sinks);

    try{
      compileNoStats(plan);
    }catch(Exception e){
      e.printStackTrace();
      Assert.fail(e.getMessage());
    }
  }
 
  @Test
  public void testBranchingBroadcastVariable() {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
   
    DataSet<String> input1 = env.readTextFile(IN_FILE).name("source1");
    DataSet<String> input2 = env.readTextFile(IN_FILE).name("source2");
    DataSet<String> input3 = env.readTextFile(IN_FILE).name("source3");
   
    DataSet<String> result1 = input1
        .map(new IdentityMapper<String>())
        .reduceGroup(new Top1GroupReducer<String>())
          .withBroadcastSet(input3, "bc");
   
    DataSet<String> result2 = input2
        .map(new IdentityMapper<String>())
        .reduceGroup(new Top1GroupReducer<String>())
          .withBroadcastSet(input3, "bc");
   
    result1.join(result2)
        .where(new IdentityKeyExtractor<String>())
        .equalTo(new IdentityKeyExtractor<String>())
        .with(new JoinFunction<String, String, String>() {
          @Override
          public String join(String first, String second) {
            return null;
          }
        })
        .withBroadcastSet(input3, "bc1")
        .withBroadcastSet(input1, "bc2")
        .withBroadcastSet(result1, "bc3")
      .print();
   
    Plan plan = env.createProgramPlan();
   
    try{
      compileNoStats(plan);
    }catch(Exception e){
      e.printStackTrace();
      Assert.fail(e.getMessage());
    }
  }
 
  @Test
  public void testBCVariableClosure() {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
   
    DataSet<String> input = env.readTextFile(IN_FILE).name("source1");
   
    DataSet<String> reduced = input
        .map(new IdentityMapper<String>())
        .reduceGroup(new Top1GroupReducer<String>());
   
   
    DataSet<String> initialSolution = input.map(new IdentityMapper<String>()).withBroadcastSet(reduced, "bc");
   
   
    IterativeDataSet<String> iteration = initialSolution.iterate(100);
   
    iteration.closeWith(iteration.map(new IdentityMapper<String>()).withBroadcastSet(reduced, "red"))
        .print();
   
    Plan plan = env.createProgramPlan();
   
    try{
      compileNoStats(plan);
    }catch(Exception e){
      e.printStackTrace();
      Assert.fail(e.getMessage());
    }
  }
 
  @Test
  public void testMultipleIterations() {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
   
    DataSet<String> input = env.readTextFile(IN_FILE).name("source1");
   
    DataSet<String> reduced = input
        .map(new IdentityMapper<String>())
        .reduceGroup(new Top1GroupReducer<String>());
     
    IterativeDataSet<String> iteration1 = input.iterate(100);
    IterativeDataSet<String> iteration2 = input.iterate(20);
    IterativeDataSet<String> iteration3 = input.iterate(17);
   
    iteration1.closeWith(iteration1.map(new IdentityMapper<String>()).withBroadcastSet(reduced, "bc1")).print();
    iteration2.closeWith(iteration2.reduceGroup(new Top1GroupReducer<String>()).withBroadcastSet(reduced, "bc2")).print();
    iteration3.closeWith(iteration3.reduceGroup(new IdentityGroupReducer<String>()).withBroadcastSet(reduced, "bc3")).print();
   
    Plan plan = env.createProgramPlan();
   
    try{
      compileNoStats(plan);
    }catch(Exception e){
      e.printStackTrace();
      Assert.fail(e.getMessage());
    }
  }
 
  @Test
  public void testMultipleIterationsWithClosueBCVars() {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
   
    DataSet<String> input = env.readTextFile(IN_FILE).name("source1");
     
    IterativeDataSet<String> iteration1 = input.iterate(100);
    IterativeDataSet<String> iteration2 = input.iterate(20);
    IterativeDataSet<String> iteration3 = input.iterate(17);
   
   
    iteration1.closeWith(iteration1.map(new IdentityMapper<String>())).print();
    iteration2.closeWith(iteration2.reduceGroup(new Top1GroupReducer<String>())).print();
    iteration3.closeWith(iteration3.reduceGroup(new IdentityGroupReducer<String>())).print();
   
    Plan plan = env.createProgramPlan();
   
    try{
      compileNoStats(plan);
    }catch(Exception e){
      e.printStackTrace();
      Assert.fail(e.getMessage());
    }
  }
}
TOP

Related Classes of eu.stratosphere.pact.compiler.BranchingPlansCompilerTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.