Examples of org.apache.flink.api.java.ExecutionEnvironment

org.apache.flink.api.java.ExecutionEnvironment
The ExecutionEnviroment is the context in which a program is executed. A {@link LocalEnvironment} will cause execution in the current JVM, a{@link RemoteEnvironment} will cause execution on a remote setup.
The environment provides methods to control the job execution (such as setting the parallelism) and to interact with the outside world (data access).
Please note that the execution environment needs strong type information for the input and return types of all operations that are executed. This means that the environments needs to know that the return value of an operation is for example a Tuple of String and Integer. Because the Java compiler throws much of the generic type information away, most methods attempt to re- obtain that information using reflection. In certain cases, it may be necessary to manually supply that information to some of the methods. @see LocalEnvironment @see RemoteEnvironment

public class WorksetIterationCornerCasesTest extends CompilerTestBase {


  @Test
  public void testWorksetIterationNotDependingOnSolutionSet() {
    try {
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      
      DataSet<Tuple2<Long, Long>> input = env.generateSequence(1, 100).map(new Duplicator<Long>());
      
      DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = input.iterateDelta(input, 100, 1);
      
      DataSet<Tuple2<Long, Long>> iterEnd = iteration.getWorkset().map(new TestMapper<Tuple2<Long,Long>>());
      iteration.closeWith(iterEnd, iterEnd).print();
      
      Plan p = env.createProgramPlan();
      OptimizedPlan op = compileNoStats(p);
      
      WorksetIterationPlanNode wipn = (WorksetIterationPlanNode) op.getDataSinks().iterator().next().getInput().getSource();
      assertTrue(wipn.getSolutionSetPlanNode().getOutgoingChannels().isEmpty());

View Full Code Here

    }
  }
  
  private Plan getTestPlanRightStatic(String strategy) {
    
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setDegreeOfParallelism(DEFAULT_PARALLELISM);
    
    DataSet<Tuple3<Long, Long, Long>> bigInput = env.readCsvFile("file://bigFile").types(Long.class, Long.class, Long.class).name("bigFile");
    
    DataSet<Tuple3<Long, Long, Long>> smallInput = env.readCsvFile("file://smallFile").types(Long.class, Long.class, Long.class).name("smallFile");
    
    IterativeDataSet<Tuple3<Long, Long, Long>> iteration = bigInput.iterate(10);
    
    Configuration joinStrategy = new Configuration();
    joinStrategy.setString(PactCompiler.HINT_SHIP_STRATEGY, PactCompiler.HINT_SHIP_STRATEGY_REPARTITION_HASH);
    
    if(strategy != "") {
      joinStrategy.setString(PactCompiler.HINT_LOCAL_STRATEGY, strategy);
    }
    
    DataSet<Tuple3<Long, Long, Long>> inner = iteration.join(smallInput).where(0).equalTo(0).with(new DummyJoiner()).name("DummyJoiner").withParameters(joinStrategy);


    DataSet<Tuple3<Long, Long, Long>> output = iteration.closeWith(inner);
    
    output.print();
    
    return env.createProgramPlan();
    
  }

View Full Code Here

    
  }
  
  private Plan getTestPlanLeftStatic(String strategy) {
    
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setDegreeOfParallelism(DEFAULT_PARALLELISM);
    
    @SuppressWarnings("unchecked")
    DataSet<Tuple3<Long, Long, Long>> bigInput = env.fromElements(new Tuple3<Long, Long, Long>(1L, 2L, 3L),
        new Tuple3<Long, Long, Long>(1L, 2L, 3L),new Tuple3<Long, Long, Long>(1L, 2L, 3L)).name("Big");
    
    @SuppressWarnings("unchecked")
    DataSet<Tuple3<Long, Long, Long>> smallInput = env.fromElements(new Tuple3<Long, Long, Long>(1L, 2L, 3L)).name("Small");
    
    IterativeDataSet<Tuple3<Long, Long, Long>> iteration = bigInput.iterate(10);
    
    Configuration joinStrategy = new Configuration();
    joinStrategy.setString(PactCompiler.HINT_LOCAL_STRATEGY, strategy);
    
    DataSet<Tuple3<Long, Long, Long>> inner = smallInput.join(iteration).where(0).equalTo(0).with(new DummyJoiner()).name("DummyJoiner").withParameters(joinStrategy);


    DataSet<Tuple3<Long, Long, Long>> output = iteration.closeWith(inner);
    
    output.print();
    
    return env.createProgramPlan();
    
  }

View Full Code Here


  
  public static void main(String[] args) throws Exception {
    final int numVertices = 100;
    
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    
    // enumerate some sample edges and assign an initial uniform probability (rank)
    DataSet<Tuple2<Long, Double>> intialRanks = env.generateSequence(1, numVertices)
                .map(new MapFunction<Long, Tuple2<Long, Double>>() {
                  public Tuple2<Long, Double> map(Long value) {
                    return new Tuple2<Long, Double>(value, 1.0/numVertices);
                  }
                });
    
    // generate some random edges. the transition probability on each edge is 1/num-out-edges of the source vertex
    DataSet<Tuple3<Long, Long, Double>> edgesWithProbability = env.generateSequence(1, numVertices)
                .flatMap(new FlatMapFunction<Long, Tuple3<Long, Long, Double>>() {
                  public void flatMap(Long value, Collector<Tuple3<Long, Long, Double>> out) {
                    int numOutEdges = (int) (Math.random() * (numVertices / 2));
                    for (int i = 0; i < numOutEdges; i++) {
                      long target = (long) (Math.random() * numVertices) + 1;
                      out.collect(new Tuple3<Long, Long, Double>(value, target, 1.0/numOutEdges));
                    }
                  }
                });
    
    DataSet<Tuple2<Long, Double>> result = intialRanks.runOperation(
      VertexCentricIteration.withValuedEdges(edgesWithProbability,
            new VertexRankUpdater(numVertices, BETA), new RankMessenger(), 20));
    
    result.print();
    env.execute("Spargel PageRank");
  }

View Full Code Here


@SuppressWarnings({"serial", "unchecked"})
public class SpargelConnectedComponents {


  public static void main(String[] args) throws Exception {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    
    DataSet<Long> vertexIds = env.generateSequence(0, 10);
    DataSet<Tuple2<Long, Long>> edges = env.fromElements(new Tuple2<Long, Long>(0L, 2L), new Tuple2<Long, Long>(2L, 4L), new Tuple2<Long, Long>(4L, 8L),
                              new Tuple2<Long, Long>(1L, 5L), new Tuple2<Long, Long>(3L, 7L), new Tuple2<Long, Long>(3L, 9L));
    
    DataSet<Tuple2<Long, Long>> initialVertices = vertexIds.map(new IdAssigner());
    
    DataSet<Tuple2<Long, Long>> result = initialVertices.runOperation(VertexCentricIteration.withPlainEdges(edges, new CCUpdater(), new CCMessager(), 100));
    
    result.print();
    env.execute("Spargel Connected Components");
  }

View Full Code Here


  
  public static void main(String[] args) throws Exception {
    final int NUM_VERTICES = 100;
    
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    
    // a list of vertices
    DataSet<Long> vertices = env.generateSequence(1, NUM_VERTICES);
    
    // generate some random edges. the transition probability on each edge is 1/num-out-edges of the source vertex
    DataSet<Tuple3<Long, Long, Double>> edgesWithProbability = env.generateSequence(1, NUM_VERTICES)
                .flatMap(new FlatMapFunction<Long, Tuple3<Long, Long, Double>>() {
                  public void flatMap(Long value, Collector<Tuple3<Long, Long, Double>> out) {
                    int numOutEdges = (int) (Math.random() * (NUM_VERTICES / 2));
                    for (int i = 0; i < numOutEdges; i++) {
                      long target = (long) (Math.random() * NUM_VERTICES) + 1;
                      out.collect(new Tuple3<Long, Long, Double>(value, target, 1.0/numOutEdges));
                    }
                  }
                });
    
    // ---------- start of the algorithm ---------------
    
    // count the number of vertices
    DataSet<Long> count = vertices
      .map(new MapFunction<Long, Long>() {
        public Long map(Long value) {
          return 1L;
        }
      })
      .reduce(new ReduceFunction<Long>() {
        public Long reduce(Long value1, Long value2) {
          return value1 + value2;
        }
      });
    
    // enumerate some sample edges and assign an initial uniform probability (rank)
    DataSet<Tuple2<Long, Double>> intialRanks = vertices
      .map(new RichMapFunction<Long, Tuple2<Long, Double>>() {
        
        private long numVertices;
        
        @Override
        public void open(Configuration parameters) {
          numVertices = getRuntimeContext().<Long>getBroadcastVariable("count").iterator().next();
        }
        
        public Tuple2<Long, Double> map(Long value) {
          return new Tuple2<Long, Double>(value, 1.0/numVertices);
        }
      }).withBroadcastSet(count, "count");
    


    VertexCentricIteration<Long, Double, Double, Double> iteration = VertexCentricIteration.withValuedEdges(edgesWithProbability,
        new VertexRankUpdater(BETA), new RankMessenger(), 20);
    iteration.addBroadcastSetForUpdateFunction("count", count);
    
    
    DataSet<Tuple2<Long, Double>> result = intialRanks.runOperation(iteration);
    
    result.print();
    env.execute("Spargel PageRank");
  }

View Full Code Here

  public void testUnionNewApiAssembly() {
    final int NUM_INPUTS = 4;
    
    // construct the plan it will be multiple flat maps, all unioned
    // and the "unioned" dataSet will be grouped
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    
    DataSet<String> source = env.readTextFile(IN_FILE);
    DataSet<Tuple2<String, Integer>> lastUnion = source.flatMap(new DummyFlatMap());
  
    for (int i = 1; i< NUM_INPUTS; i++){
      lastUnion = lastUnion.union(source.flatMap(new DummyFlatMap()));
    }
    
    DataSet<Tuple2<String, Integer>> result = lastUnion.groupBy(0).aggregate(Aggregations.SUM, 1);
    result.writeAsText(OUT_FILE);
  
    // return the plan
    Plan plan = env.createProgramPlan("Test union on new java-api");
    OptimizedPlan oPlan = compileNoStats(plan);
    NepheleJobGraphGenerator jobGen = new NepheleJobGraphGenerator();
    
    // Compile plan to verify that no error is thrown
    jobGen.compileJobGraph(oPlan);

View Full Code Here

    }
  }


  @Test
  public void testCoGroupSolutionSet() {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<Tuple1<Integer>> raw = env.readCsvFile(IN_FILE).types(Integer.class);


    DeltaIteration<Tuple1<Integer>, Tuple1<Integer>> iteration = raw.iterateDelta(raw, 1000, 0);


    DataSet<Tuple1<Integer>> test = iteration.getWorkset().map(new SimpleMap());
    DataSet<Tuple1<Integer>> delta = iteration.getSolutionSet().coGroup(test).where(0).equalTo(0).with(new SimpleCGroup());
    DataSet<Tuple1<Integer>> feedback = iteration.getWorkset().map(new SimpleMap());
    DataSet<Tuple1<Integer>> result = iteration.closeWith(delta, feedback);


    result.print();


    Plan plan = env.createProgramPlan();
    OptimizedPlan oPlan = null;
    try {
      oPlan = compileNoStats(plan);
    } catch(CompilerException e) {
      Assert.fail(e.getMessage());

View Full Code Here

        
        /*
         * UDF Join on tuples with key field positions
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.getSmall3TupleDataSet(env);
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple2<String, String>> joinDs = 
            ds1.join(ds2)
            .where(1)
            .equalTo(1)
            .with(new T3T5FlatJoin());
        
        joinDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "Hi,Hallo\n" +
            "Hello,Hallo Welt\n" +
            "Hello world,Hallo Welt\n";
        
      }
      case 2: {
        
        /*
         * UDF Join on tuples with multiple key field positions
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple2<String, String>> joinDs = 
            ds1.join(ds2)
               .where(0,1)
               .equalTo(0,4)
               .with(new T3T5FlatJoin());
        
        joinDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "Hi,Hallo\n" +
            "Hello,Hallo Welt\n" +
            "Hello world,Hallo Welt wie gehts?\n" +
            "Hello world,ABC\n" +
            "I am fine.,HIJ\n" +
            "I am fine.,IJK\n";
        
      }
      case 3: {
        
        /*
         * Default Join on tuples
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.getSmall3TupleDataSet(env);
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple2<Tuple3<Integer, Long, String>,Tuple5<Integer, Long, Integer, String, Long>>> joinDs = 
            ds1.join(ds2)
               .where(0)
               .equalTo(2);
        
        joinDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "(1,1,Hi),(2,2,1,Hallo Welt,2)\n" +
            "(2,2,Hello),(2,3,2,Hallo Welt wie,1)\n" +
            "(3,2,Hello world),(3,4,3,Hallo Welt wie gehts?,2)\n";
      
      }
      case 4: {
        
        /*
         * Join with Huge
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.getSmall3TupleDataSet(env);
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple2<String, String>> joinDs = ds1.joinWithHuge(ds2)
                              .where(1)
                              .equalTo(1)
                              .with(new T3T5FlatJoin());
        
        joinDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "Hi,Hallo\n" +
            "Hello,Hallo Welt\n" +
            "Hello world,Hallo Welt\n";
        
      }
      case 5: {
        
        /*
         * Join with Tiny
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.getSmall3TupleDataSet(env);
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple2<String, String>> joinDs = 
            ds1.joinWithTiny(ds2)
               .where(1)
               .equalTo(1)
               .with(new T3T5FlatJoin());
        
        joinDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "Hi,Hallo\n" +
            "Hello,Hallo Welt\n" +
            "Hello world,Hallo Welt\n";
        
      }
      
      case 6: {
        
        /*
         * Join that returns the left input object
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.getSmall3TupleDataSet(env);
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> joinDs = 
            ds1.join(ds2)
               .where(1)
               .equalTo(1)
               .with(new LeftReturningJoin());
        
        joinDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "1,1,Hi\n" +
            "2,2,Hello\n" +
            "3,2,Hello world\n";
      }
      case 7: {
        
        /*
         * Join that returns the right input object
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.getSmall3TupleDataSet(env);
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> joinDs = 
            ds1.join(ds2)
               .where(1)
               .equalTo(1)
               .with(new RightReturningJoin());
        
        joinDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "1,1,0,Hallo,1\n" +
            "2,2,1,Hallo Welt,2\n" +
            "2,2,1,Hallo Welt,2\n";
      }
      case 8: {
        
        /*
         * Join with broadcast set
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Integer> intDs = CollectionDataSets.getIntegerDataSet(env);
        
        DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.getSmall5TupleDataSet(env);
        DataSet<Tuple3<String, String, Integer>> joinDs = 
            ds1.join(ds2)
               .where(1)
               .equalTo(4)
               .with(new T3T5BCJoin())
               .withBroadcastSet(intDs, "ints");
        
        joinDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "Hi,Hallo,55\n" +
            "Hi,Hallo Welt wie,55\n" +
            "Hello,Hallo Welt,55\n" +
            "Hello world,Hallo Welt,55\n";
      }
      case 9: {
      
        /*
         * Join on a tuple input with key field selector and a custom type input with key extractor
         */


        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();


        DataSet<CustomType> ds1 = CollectionDataSets.getSmallCustomTypeDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> ds2 = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Tuple2<String, String>> joinDs =
            ds1.join(ds2)
               .where(new KeySelector<CustomType, Integer>() {
                    @Override
                    public Integer getKey(CustomType value) {
                      return value.myInt;
                    }
                  }
               )
               .equalTo(0)
               .with(new CustT3Join());


        joinDs.writeAsCsv(resultPath);
        env.execute();


        // return expected result
        return "Hi,Hi\n" +
            "Hello,Hello\n" +
            "Hello world,Hello\n";


        }
      case 10: {
        
        /*
         * Project join on a tuple input 1
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.getSmall3TupleDataSet(env);
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple6<String, Long, String, Integer, Long, Long>> joinDs = 
            ds1.join(ds2)
               .where(1)
               .equalTo(1)
               .projectFirst(2,1)
               .projectSecond(3)
               .projectFirst(0)
               .projectSecond(4,1)
               .types(String.class, Long.class, String.class, Integer.class, Long.class, Long.class);
        
        joinDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "Hi,1,Hallo,1,1,1\n" +
            "Hello,2,Hallo Welt,2,2,2\n" +
            "Hello world,2,Hallo Welt,3,2,2\n";
        
      }
      case 11: {
        
        /*
         * Project join on a tuple input 2
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.getSmall3TupleDataSet(env);
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple6<String, String, Long, Long, Long, Integer>> joinDs = 
            ds1.join(ds2)
               .where(1)
               .equalTo(1)
               .projectSecond(3)
               .projectFirst(2,1)
               .projectSecond(4,1)
               .projectFirst(0)
               .types(String.class, String.class, Long.class, Long.class, Long.class, Integer.class);
        
        joinDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "Hallo,Hi,1,1,1,1\n" +
            "Hallo Welt,Hello,2,2,2,2\n" +
            "Hallo Welt,Hello world,2,2,2,3\n";
      }
        
      case 12: {
        
        /*
         * Join on a tuple input with key field selector and a custom type input with key extractor
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.getSmall3TupleDataSet(env);
        DataSet<CustomType> ds2 = CollectionDataSets.getCustomTypeDataSet(env);
        DataSet<Tuple2<String, String>> joinDs = 
            ds1.join(ds2)
               .where(1).equalTo(new KeySelector<CustomType, Long>() {
                     @Override
                     public Long getKey(CustomType value) {
                       return value.myLong;
                     }
                   })
               .with(new T3CustJoin());
        
        joinDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "Hi,Hello\n" +
            "Hello,Hello world\n" +
            "Hello world,Hello world\n";
            
      }
      
      case 13: {
        
        /*
         * (Default) Join on two custom type inputs with key extractors
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<CustomType> ds1 = CollectionDataSets.getCustomTypeDataSet(env);
        DataSet<CustomType> ds2 = CollectionDataSets.getSmallCustomTypeDataSet(env);
        
        DataSet<Tuple2<CustomType, CustomType>> joinDs = 
          ds1.join(ds2)
             .where(
                 new KeySelector<CustomType, Integer>() {
                   @Override
                   public Integer getKey(CustomType value) {
                     return value.myInt;
                   }
                 }
                )
            .equalTo(
                new KeySelector<CustomType, Integer>() {
                     @Override
                     public Integer getKey(CustomType value) {
                       return value.myInt;
                     }
                   }
                );
                                        
        joinDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "1,0,Hi,1,0,Hi\n" +
            "2,1,Hello,2,1,Hello\n" +
            "2,1,Hello,2,2,Hello world\n" +
            "2,2,Hello world,2,1,Hello\n" +
            "2,2,Hello world,2,2,Hello world\n";
  
      }
      case 14: {
        /*
         * UDF Join on tuples with tuple-returning key selectors
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple2<String, String>> joinDs = 
            ds1.join(ds2)
               .where(new KeySelector<Tuple3<Integer,Long,String>, Tuple2<Integer, Long>>() {
                private static final long serialVersionUID = 1L;
                
                @Override
                public Tuple2<Integer, Long> getKey(Tuple3<Integer,Long,String> t) {
                  return new Tuple2<Integer, Long>(t.f0, t.f1);
                }
              })
               .equalTo(new KeySelector<Tuple5<Integer,Long,Integer,String,Long>, Tuple2<Integer, Long>>() {
                private static final long serialVersionUID = 1L;
                
                @Override
                public Tuple2<Integer, Long> getKey(Tuple5<Integer,Long,Integer,String,Long> t) {
                  return new Tuple2<Integer, Long>(t.f0, t.f4);
                }
              })
               .with(new T3T5FlatJoin());
        
        joinDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "Hi,Hallo\n" +
            "Hello,Hallo Welt\n" +
            "Hello world,Hallo Welt wie gehts?\n" +
            "Hello world,ABC\n" +
            "I am fine.,HIJ\n" +
            "I am fine.,IJK\n";
      }
      /**
       *  Joins with POJOs
       */
      case 15: {
        /*
         * Join nested pojo against tuple (selected using a string)
         */
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<POJO> ds1 = CollectionDataSets.getSmallPojoDataSet(env);
        DataSet<Tuple7<Integer, String, Integer, Integer, Long, String, Long>> ds2 = CollectionDataSets.getSmallTuplebasedDataSet(env);
        DataSet<Tuple2<POJO, Tuple7<Integer, String, Integer, Integer, Long, String, Long> >> joinDs = 
            ds1.join(ds2).where("nestedPojo.longNumber").equalTo("f6");
        
        joinDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "1 First (10,100,1000,One) 10000,(1,First,10,100,1000,One,10000)\n" +
             "2 Second (20,200,2000,Two) 20000,(2,Second,20,200,2000,Two,20000)\n" +
             "3 Third (30,300,3000,Three) 30000,(3,Third,30,300,3000,Three,30000)\n";
      }
      
      case 16: {
        /*
         * Join nested pojo against tuple (selected as an integer)
         */
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<POJO> ds1 = CollectionDataSets.getSmallPojoDataSet(env);
        DataSet<Tuple7<Integer, String, Integer, Integer, Long, String, Long>> ds2 = CollectionDataSets.getSmallTuplebasedDataSet(env);
        DataSet<Tuple2<POJO, Tuple7<Integer, String, Integer, Integer, Long, String, Long> >> joinDs = 
            ds1.join(ds2).where("nestedPojo.longNumber").equalTo(6); // <--- difference!
        
        joinDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "1 First (10,100,1000,One) 10000,(1,First,10,100,1000,One,10000)\n" +
             "2 Second (20,200,2000,Two) 20000,(2,Second,20,200,2000,Two,20000)\n" +
             "3 Third (30,300,3000,Three) 30000,(3,Third,30,300,3000,Three,30000)\n";
      }
      case 17: {
        /*
         * selecting multiple fields using expression language
         */
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<POJO> ds1 = CollectionDataSets.getSmallPojoDataSet(env);
        DataSet<Tuple7<Integer, String, Integer, Integer, Long, String, Long>> ds2 = CollectionDataSets.getSmallTuplebasedDataSet(env);
        DataSet<Tuple2<POJO, Tuple7<Integer, String, Integer, Integer, Long, String, Long> >> joinDs = 
            ds1.join(ds2).where("nestedPojo.longNumber", "number", "str").equalTo("f6","f0","f1");
        
        joinDs.writeAsCsv(resultPath);
        env.setDegreeOfParallelism(1);
        env.execute();
        
        // return expected result
        return "1 First (10,100,1000,One) 10000,(1,First,10,100,1000,One,10000)\n" +
             "2 Second (20,200,2000,Two) 20000,(2,Second,20,200,2000,Two,20000)\n" +
             "3 Third (30,300,3000,Three) 30000,(3,Third,30,300,3000,Three,30000)\n";
        
      }
      case 18: {
        /*
         * nested into tuple
         */
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<POJO> ds1 = CollectionDataSets.getSmallPojoDataSet(env);
        DataSet<Tuple7<Integer, String, Integer, Integer, Long, String, Long>> ds2 = CollectionDataSets.getSmallTuplebasedDataSet(env);
        DataSet<Tuple2<POJO, Tuple7<Integer, String, Integer, Integer, Long, String, Long> >> joinDs = 
            ds1.join(ds2).where("nestedPojo.longNumber", "number","nestedTupleWithCustom.f0").equalTo("f6","f0","f2");
        
        joinDs.writeAsCsv(resultPath);
        env.setDegreeOfParallelism(1);
        env.execute();
        
        // return expected result
        return "1 First (10,100,1000,One) 10000,(1,First,10,100,1000,One,10000)\n" +
             "2 Second (20,200,2000,Two) 20000,(2,Second,20,200,2000,Two,20000)\n" +
             "3 Third (30,300,3000,Three) 30000,(3,Third,30,300,3000,Three,30000)\n";
        
      }
      case 19: {
        /*
         * nested into tuple into pojo
         */
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<POJO> ds1 = CollectionDataSets.getSmallPojoDataSet(env);
        DataSet<Tuple7<Integer, String, Integer, Integer, Long, String, Long>> ds2 = CollectionDataSets.getSmallTuplebasedDataSet(env);
        DataSet<Tuple2<POJO, Tuple7<Integer, String, Integer, Integer, Long, String, Long> >> joinDs = 
            ds1.join(ds2).where("nestedTupleWithCustom.f0","nestedTupleWithCustom.f1.myInt","nestedTupleWithCustom.f1.myLong").equalTo("f2","f3","f4");
        
        joinDs.writeAsCsv(resultPath);
        env.setDegreeOfParallelism(1);
        env.execute();
        
        // return expected result
        return "1 First (10,100,1000,One) 10000,(1,First,10,100,1000,One,10000)\n" +
             "2 Second (20,200,2000,Two) 20000,(2,Second,20,200,2000,Two,20000)\n" +
             "3 Third (30,300,3000,Three) 30000,(3,Third,30,300,3000,Three,30000)\n";
        
      }
      case 20: {
        /*
         * Non-POJO test to verify that full-tuple keys are working.
         */
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple2<Tuple2<Integer, Integer>, String>> ds1 = CollectionDataSets.getSmallNestedTupleDataSet(env);
        DataSet<Tuple2<Tuple2<Integer, Integer>, String>> ds2 = CollectionDataSets.getSmallNestedTupleDataSet(env);
        DataSet<Tuple2<Tuple2<Tuple2<Integer, Integer>, String>, Tuple2<Tuple2<Integer, Integer>, String> >> joinDs = 
            ds1.join(ds2).where(0).equalTo("f0.f0", "f0.f1"); // key is now Tuple2<Integer, Integer>
        
        joinDs.writeAsCsv(resultPath);
        env.setDegreeOfParallelism(1);
        env.execute();
        
        // return expected result
        return "((1,1),one),((1,1),one)\n" +
             "((2,2),two),((2,2),two)\n" +
             "((3,3),three),((3,3),three)\n";
        
      }
      case 21: {
        /*
         * Non-POJO test to verify "nested" tuple-element selection.
         */
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple2<Tuple2<Integer, Integer>, String>> ds1 = CollectionDataSets.getSmallNestedTupleDataSet(env);
        DataSet<Tuple2<Tuple2<Integer, Integer>, String>> ds2 = CollectionDataSets.getSmallNestedTupleDataSet(env);
        DataSet<Tuple2<Tuple2<Tuple2<Integer, Integer>, String>, Tuple2<Tuple2<Integer, Integer>, String> >> joinDs = 
            ds1.join(ds2).where("f0.f0").equalTo("f0.f0"); // key is now Integer from Tuple2<Integer, Integer>
        
        joinDs.writeAsCsv(resultPath);
        env.setDegreeOfParallelism(1);
        env.execute();
        
        // return expected result
        return "((1,1),one),((1,1),one)\n" +
             "((2,2),two),((2,2),two)\n" +
             "((3,3),three),((3,3),three)\n";
        
      }
      case 22: {
        /*
         * full pojo with full tuple
         */
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<POJO> ds1 = CollectionDataSets.getSmallPojoDataSet(env);
        DataSet<Tuple7<Long, Integer, Integer, Long, String, Integer, String>> ds2 = CollectionDataSets.getSmallTuplebasedDataSetMatchingPojo(env);
        DataSet<Tuple2<POJO, Tuple7<Long, Integer, Integer, Long, String, Integer, String> >> joinDs = 
            ds1.join(ds2).where("*").equalTo("*");
        
        joinDs.writeAsCsv(resultPath);
        env.setDegreeOfParallelism(1);
        env.execute();
        
        // return expected result
        return "1 First (10,100,1000,One) 10000,(10000,10,100,1000,One,1,First)\n"+
            "2 Second (20,200,2000,Two) 20000,(20000,20,200,2000,Two,2,Second)\n"+
            "3 Third (30,300,3000,Three) 30000,(30000,30,300,3000,Three,3,Third)\n";

View Full Code Here

        
        /*
         * check correctness of groupReduce on tuples with key field selector
         */


          final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();


          DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
          DataSet<Tuple2<Integer, Long>> reduceDs = ds.
              groupBy(1).reduceGroup(new Tuple3GroupReduce());


          reduceDs.writeAsCsv(resultPath);
          env.execute();


          // return expected result
          return "1,1\n" +
              "5,2\n" +
              "15,3\n" +
              "34,4\n" +
              "65,5\n" +
              "111,6\n";
        }
        case 2: {
        
        /*
         * check correctness of groupReduce on tuples with multiple key field selector
         */


          final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();


          DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds = CollectionDataSets.get5TupleDataSet(env);
          DataSet<Tuple5<Integer, Long, Integer, String, Long>> reduceDs = ds.
              groupBy(4, 0).reduceGroup(new Tuple5GroupReduce());


          reduceDs.writeAsCsv(resultPath);
          env.execute();


          // return expected result
          return "1,1,0,P-),1\n" +
              "2,3,0,P-),1\n" +
              "2,2,0,P-),2\n" +
              "3,9,0,P-),2\n" +
              "3,6,0,P-),3\n" +
              "4,17,0,P-),1\n" +
              "4,17,0,P-),2\n" +
              "5,11,0,P-),1\n" +
              "5,29,0,P-),2\n" +
              "5,25,0,P-),3\n";
        }
        case 3: {
        
        /*
         * check correctness of groupReduce on tuples with key field selector and group sorting
         */


          final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
          env.setDegreeOfParallelism(1);


          DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
          DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.
              groupBy(1).sortGroup(2, Order.ASCENDING).reduceGroup(new Tuple3SortedGroupReduce());


          reduceDs.writeAsCsv(resultPath);
          env.execute();


          // return expected result
          return "1,1,Hi\n" +
              "5,2,Hello-Hello world\n" +
              "15,3,Hello world, how are you?-I am fine.-Luke Skywalker\n" +
              "34,4,Comment#1-Comment#2-Comment#3-Comment#4\n" +
              "65,5,Comment#5-Comment#6-Comment#7-Comment#8-Comment#9\n" +
              "111,6,Comment#10-Comment#11-Comment#12-Comment#13-Comment#14-Comment#15\n";


        }
        case 4: {
        /*
         * check correctness of groupReduce on tuples with key extractor
         */


          final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();


          DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
          DataSet<Tuple2<Integer, Long>> reduceDs = ds.
              groupBy(new KeySelector<Tuple3<Integer, Long, String>, Long>() {
                private static final long serialVersionUID = 1L;


                @Override
                public Long getKey(Tuple3<Integer, Long, String> in) {
                  return in.f1;
                }
              }).reduceGroup(new Tuple3GroupReduce());


          reduceDs.writeAsCsv(resultPath);
          env.execute();


          // return expected result
          return "1,1\n" +
              "5,2\n" +
              "15,3\n" +
              "34,4\n" +
              "65,5\n" +
              "111,6\n";


        }
        case 5: {
        
        /*
         * check correctness of groupReduce on custom type with type extractor
         */


          final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();


          DataSet<CustomType> ds = CollectionDataSets.getCustomTypeDataSet(env);
          DataSet<CustomType> reduceDs = ds.
              groupBy(new KeySelector<CustomType, Integer>() {
                private static final long serialVersionUID = 1L;


                @Override
                public Integer getKey(CustomType in) {
                  return in.myInt;
                }
              }).reduceGroup(new CustomTypeGroupReduce());


          reduceDs.writeAsText(resultPath);
          env.execute();


          // return expected result
          return "1,0,Hello!\n" +
              "2,3,Hello!\n" +
              "3,12,Hello!\n" +
              "4,30,Hello!\n" +
              "5,60,Hello!\n" +
              "6,105,Hello!\n";
        }
        case 6: {
        
        /*
         * check correctness of all-groupreduce for tuples
         */


          final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();


          DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
          DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.reduceGroup(new AllAddingTuple3GroupReduce());


          reduceDs.writeAsCsv(resultPath);
          env.execute();


          // return expected result
          return "231,91,Hello World\n";
        }
        case 7: {
        /*
         * check correctness of all-groupreduce for custom types
         */


          final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();


          DataSet<CustomType> ds = CollectionDataSets.getCustomTypeDataSet(env);
          DataSet<CustomType> reduceDs = ds.reduceGroup(new AllAddingCustomTypeGroupReduce());


          reduceDs.writeAsText(resultPath);
          env.execute();


          // return expected result
          return "91,210,Hello!";
        }
        case 8: {
        
        /*
         * check correctness of groupReduce with broadcast set
         */


          final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();


          DataSet<Integer> intDs = CollectionDataSets.getIntegerDataSet(env);


          DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
          DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.
              groupBy(1).reduceGroup(new BCTuple3GroupReduce()).withBroadcastSet(intDs, "ints");


          reduceDs.writeAsCsv(resultPath);
          env.execute();


          // return expected result
          return "1,1,55\n" +
              "5,2,55\n" +
              "15,3,55\n" +
              "34,4,55\n" +
              "65,5,55\n" +
              "111,6,55\n";
        }
        case 9: {
        
        /*
         * check correctness of groupReduce if UDF returns input objects multiple times and changes it in between
         */


          final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();


          DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
          DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.
              groupBy(1).reduceGroup(new InputReturningTuple3GroupReduce());


          reduceDs.writeAsCsv(resultPath);
          env.execute();


          // return expected result
          return "11,1,Hi!\n" +
              "21,1,Hi again!\n" +
              "12,2,Hi!\n" +
              "22,2,Hi again!\n" +
              "13,2,Hi!\n" +
              "23,2,Hi again!\n";
        }
        case 10: {
        
        /*
         * check correctness of groupReduce on custom type with key extractor and combine
         */


          final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();


          DataSet<CustomType> ds = CollectionDataSets.getCustomTypeDataSet(env);
          DataSet<CustomType> reduceDs = ds.
              groupBy(new KeySelector<CustomType, Integer>() {
                private static final long serialVersionUID = 1L;


                @Override
                public Integer getKey(CustomType in) {
                  return in.myInt;
                }
              }).reduceGroup(new CustomTypeGroupReduceWithCombine());


          reduceDs.writeAsText(resultPath);
          env.execute();


          // return expected result
          if (collectionExecution) {
            return null;


          } else {
            return "1,0,test1\n" +
                "2,3,test2\n" +
                "3,12,test3\n" +
                "4,30,test4\n" +
                "5,60,test5\n" +
                "6,105,test6\n";
          }
        }
        case 11: {
        
        /*
         * check correctness of groupReduce on tuples with combine
         */


          final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
          env.setDegreeOfParallelism(2); // important because it determines how often the combiner is called


          DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
          DataSet<Tuple2<Integer, String>> reduceDs = ds.
              groupBy(1).reduceGroup(new Tuple3GroupReduceWithCombine());


          reduceDs.writeAsCsv(resultPath);
          env.execute();


          // return expected result
          if (collectionExecution) {
            return null;


          } else {
            return "1,test1\n" +
                "5,test2\n" +
                "15,test3\n" +
                "34,test4\n" +
                "65,test5\n" +
                "111,test6\n";
          }
        }
        // all-groupreduce with combine
        case 12: {
        
        /*
         * check correctness of all-groupreduce for tuples with combine
         */


          final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();


          DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env)
              .map(new IdentityMapper<Tuple3<Integer, Long, String>>()).setParallelism(4);


          Configuration cfg = new Configuration();
          cfg.setString(PactCompiler.HINT_SHIP_STRATEGY, PactCompiler.HINT_SHIP_STRATEGY_REPARTITION);
          DataSet<Tuple2<Integer, String>> reduceDs = ds.reduceGroup(new Tuple3AllGroupReduceWithCombine())
              .withParameters(cfg);


          reduceDs.writeAsCsv(resultPath);
          env.execute();


          // return expected result
          if (collectionExecution) {
            return null;
          } else {
            return "322,testtesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttest\n";
          }
        }
        case 13: {
        
        /*
         * check correctness of groupReduce with descending group sort
         */
          final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
          env.setDegreeOfParallelism(1);


          DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
          DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.
              groupBy(1).sortGroup(2, Order.DESCENDING).reduceGroup(new Tuple3SortedGroupReduce());


          reduceDs.writeAsCsv(resultPath);
          env.execute();


          // return expected result
          return "1,1,Hi\n" +
              "5,2,Hello world-Hello\n" +
              "15,3,Luke Skywalker-I am fine.-Hello world, how are you?\n" +
              "34,4,Comment#4-Comment#3-Comment#2-Comment#1\n" +
              "65,5,Comment#9-Comment#8-Comment#7-Comment#6-Comment#5\n" +
              "111,6,Comment#15-Comment#14-Comment#13-Comment#12-Comment#11-Comment#10\n";


        }
        case 14: {
          /*
           * check correctness of groupReduce on tuples with tuple-returning key selector
           */


            final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();


            DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds = CollectionDataSets.get5TupleDataSet(env);
            DataSet<Tuple5<Integer, Long, Integer, String, Long>> reduceDs = ds.
                groupBy(
                    new KeySelector<Tuple5<Integer,Long,Integer,String,Long>, Tuple2<Integer, Long>>() {
                      private static final long serialVersionUID = 1L;
        
                      @Override
                      public Tuple2<Integer, Long> getKey(Tuple5<Integer,Long,Integer,String,Long> t) {
                        return new Tuple2<Integer, Long>(t.f0, t.f4);
                      }
                    }).reduceGroup(new Tuple5GroupReduce());


            reduceDs.writeAsCsv(resultPath);
            env.execute();


            // return expected result
            return "1,1,0,P-),1\n" +
                "2,3,0,P-),1\n" +
                "2,2,0,P-),2\n" +
                "3,9,0,P-),2\n" +
                "3,6,0,P-),3\n" +
                "4,17,0,P-),1\n" +
                "4,17,0,P-),2\n" +
                "5,11,0,P-),1\n" +
                "5,29,0,P-),2\n" +
                "5,25,0,P-),3\n";
        }
        case 15: {
          /*
           * check that input of combiner is also sorted for combinable groupReduce with group sorting
           */


          final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
          env.setDegreeOfParallelism(1);


          DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
          DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.
              groupBy(1).sortGroup(0, Order.ASCENDING).reduceGroup(new OrderCheckingCombinableReduce());


          reduceDs.writeAsCsv(resultPath);
          env.execute();


          // return expected result
          return "1,1,Hi\n" +
              "2,2,Hello\n" +
              "4,3,Hello world, how are you?\n" +
              "7,4,Comment#1\n" +
              "11,5,Comment#5\n" +
              "16,6,Comment#10\n";
          
        }
        case 16: {
          /*
           * Deep nesting test
           * + null value in pojo
           */
          final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
          
          DataSet<CrazyNested> ds = CollectionDataSets.getCrazyNestedDataSet(env);
          DataSet<Tuple2<String, Integer>> reduceDs = ds.groupBy("nest_Lvl1.nest_Lvl2.nest_Lvl3.nest_Lvl4.f1nal")
              .reduceGroup(new GroupReduceFunction<CollectionDataSets.CrazyNested, Tuple2<String, Integer>>() {
                private static final long serialVersionUID = 1L;


                @Override
                public void reduce(Iterable<CrazyNested> values,
                    Collector<Tuple2<String, Integer>> out)
                    throws Exception {
                  int c = 0; String n = null;
                  for(CrazyNested v : values) {
                    c++; // haha
                    n = v.nest_Lvl1.nest_Lvl2.nest_Lvl3.nest_Lvl4.f1nal;
                  }
                  out.collect(new Tuple2<String, Integer>(n,c));
                }});
          
          reduceDs.writeAsCsv(resultPath);
          env.execute();
          
          // return expected result
          return "aa,1\nbb,2\ncc,3\n";
        } 
        case 17: {
          /*
           * Test Pojo extending from tuple WITH custom fields
           */
          final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
          
          DataSet<FromTupleWithCTor> ds = CollectionDataSets.getPojoExtendingFromTuple(env);
          DataSet<Integer> reduceDs = ds.groupBy("special", "f2")
              .reduceGroup(new GroupReduceFunction<FromTupleWithCTor, Integer>() {
                private static final long serialVersionUID = 1L;
                @Override
                public void reduce(Iterable<FromTupleWithCTor> values,
                    Collector<Integer> out)
                    throws Exception {
                  int c = 0;
                  for(FromTuple v : values) {
                    c++;
                  }
                  out.collect(c);
                }});
          
          reduceDs.writeAsText(resultPath);
          env.execute();
          
          // return expected result
          return "3\n2\n";
        } 
        case 18: {
          /*
           * Test Pojo containing a Writable and Tuples
           */
          final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
          
          DataSet<PojoContainingTupleAndWritable> ds = CollectionDataSets.getPojoContainingTupleAndWritable(env);
          DataSet<Integer> reduceDs = ds.groupBy("hadoopFan", "theTuple.*") // full tuple selection
              .reduceGroup(new GroupReduceFunction<PojoContainingTupleAndWritable, Integer>() {
                private static final long serialVersionUID = 1L;
                @Override
                public void reduce(Iterable<PojoContainingTupleAndWritable> values,
                    Collector<Integer> out)
                    throws Exception {
                  int c = 0;
                  for(PojoContainingTupleAndWritable v : values) {
                    c++;
                  }
                  out.collect(c);
                }});
          
          reduceDs.writeAsText(resultPath);
          env.execute();
          
          // return expected result
          return "1\n5\n";
        } 
        case 19: {
          /*
           * Test Tuple containing pojos and regular fields
           */
          final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
          
          DataSet<Tuple3<Integer,CrazyNested, POJO>> ds = CollectionDataSets.getTupleContainingPojos(env);
          DataSet<Integer> reduceDs = ds.groupBy("f0", "f1.*") // nested full tuple selection
              .reduceGroup(new GroupReduceFunction<Tuple3<Integer,CrazyNested, POJO>, Integer>() {
                private static final long serialVersionUID = 1L;
                @Override
                public void reduce(Iterable<Tuple3<Integer,CrazyNested, POJO>> values,
                    Collector<Integer> out)
                    throws Exception {
                  int c = 0;
                  for(Tuple3<Integer,CrazyNested, POJO> v : values) {
                    c++;
                  }
                  out.collect(c);
                }});
          
          reduceDs.writeAsText(resultPath);
          env.execute();
          
          // return expected result
          return "3\n1\n";
        }
        case 20: {
          /*
           * Test string-based definition on group sort, based on test:
           * check correctness of groupReduce with descending group sort
           */
          final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
          env.setDegreeOfParallelism(1);


          DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
          DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.
              groupBy(1).sortGroup("f2", Order.DESCENDING).reduceGroup(new Tuple3SortedGroupReduce());


          reduceDs.writeAsCsv(resultPath);
          env.execute();


          // return expected result
          return "1,1,Hi\n" +
              "5,2,Hello world-Hello\n" +
              "15,3,Luke Skywalker-I am fine.-Hello world, how are you?\n" +
              "34,4,Comment#4-Comment#3-Comment#2-Comment#1\n" +
              "65,5,Comment#9-Comment#8-Comment#7-Comment#6-Comment#5\n" +
              "111,6,Comment#15-Comment#14-Comment#13-Comment#12-Comment#11-Comment#10\n";


        }
        case 21: {
          /*
           * Test int-based definition on group sort, for (full) nested Tuple
           */
          final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
          env.setDegreeOfParallelism(1);


          DataSet<Tuple2<Tuple2<Integer, Integer>, String>> ds = CollectionDataSets.getGroupSortedNestedTupleDataSet(env);
          DataSet<String> reduceDs = ds.groupBy("f1").sortGroup(0, Order.DESCENDING).reduceGroup(new NestedTupleReducer());
          reduceDs.writeAsText(resultPath);
          env.execute();


          // return expected result
          return "a--(2,1)-(1,3)-(1,2)-\n" +
              "b--(2,2)-\n"+
              "c--(4,9)-(3,6)-(3,3)-\n";
        }
        case 22: {
          /*
           * Test int-based definition on group sort, for (partial) nested Tuple ASC
           */
          final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
          env.setDegreeOfParallelism(1);


          DataSet<Tuple2<Tuple2<Integer, Integer>, String>> ds = CollectionDataSets.getGroupSortedNestedTupleDataSet(env);
          // f0.f0 is first integer
          DataSet<String> reduceDs = ds.groupBy("f1")
              .sortGroup("f0.f0", Order.ASCENDING)
              .sortGroup("f0.f1", Order.ASCENDING)
              .reduceGroup(new NestedTupleReducer());
          reduceDs.writeAsText(resultPath);
          env.execute();
          
          // return expected result
          return "a--(1,2)-(1,3)-(2,1)-\n" +
              "b--(2,2)-\n"+
              "c--(3,3)-(3,6)-(4,9)-\n";
        }
        case 23: {
          /*
           * Test string-based definition on group sort, for (partial) nested Tuple DESC
           */
          final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
          env.setDegreeOfParallelism(1);


          DataSet<Tuple2<Tuple2<Integer, Integer>, String>> ds = CollectionDataSets.getGroupSortedNestedTupleDataSet(env);
          // f0.f0 is first integer
          DataSet<String> reduceDs = ds.groupBy("f1").sortGroup("f0.f0", Order.DESCENDING).reduceGroup(new NestedTupleReducer());
          reduceDs.writeAsText(resultPath);
          env.execute();
          
          // return expected result
          return "a--(2,1)-(1,3)-(1,2)-\n" +
              "b--(2,2)-\n"+
              "c--(4,9)-(3,3)-(3,6)-\n";
        }
        case 24: {
          /*
           * Test string-based definition on group sort, for two grouping keys
           */
          final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
          env.setDegreeOfParallelism(1);


          DataSet<Tuple2<Tuple2<Integer, Integer>, String>> ds = CollectionDataSets.getGroupSortedNestedTupleDataSet(env);
          // f0.f0 is first integer
          DataSet<String> reduceDs = ds.groupBy("f1").sortGroup("f0.f0", Order.DESCENDING).sortGroup("f0.f1", Order.DESCENDING).reduceGroup(new NestedTupleReducer());
          reduceDs.writeAsText(resultPath);
          env.execute();
          
          // return expected result
          return "a--(2,1)-(1,3)-(1,2)-\n" +
              "b--(2,2)-\n"+
              "c--(4,9)-(3,6)-(3,3)-\n";
        }
        case 25: {
          /*
           * Test string-based definition on group sort, for two grouping keys with Pojos
           */
          final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
          env.setDegreeOfParallelism(1);


          DataSet<PojoContainingTupleAndWritable> ds = CollectionDataSets.getGroupSortedPojoContainingTupleAndWritable(env);
          // f0.f0 is first integer
          DataSet<String> reduceDs = ds.groupBy("hadoopFan").sortGroup("theTuple.f0", Order.DESCENDING).sortGroup("theTuple.f1", Order.DESCENDING)
              .reduceGroup(new GroupReduceFunction<CollectionDataSets.PojoContainingTupleAndWritable, String>() {
                @Override
                public void reduce(
                    Iterable<PojoContainingTupleAndWritable> values,
                    Collector<String> out) throws Exception {
                  boolean once = false;
                  StringBuilder concat = new StringBuilder();
                  for(PojoContainingTupleAndWritable value : values) {
                    if(!once) {
                      concat.append(value.hadoopFan.get());
                      concat.append("---");
                      once = true;
                    }
                    concat.append(value.theTuple);
                    concat.append("-");
                  }
                  out.collect(concat.toString());
                }
          });
          reduceDs.writeAsText(resultPath);
          env.execute();
          
          // return expected result
          return "1---(10,100)-\n" +
              "2---(30,600)-(30,400)-(30,200)-(20,201)-(20,200)-\n";
        }
        case 26: {
          /*
           * Test grouping with pojo containing multiple pojos (was a bug)
           */
          final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
          env.setDegreeOfParallelism(1);


          DataSet<CollectionDataSets.PojoWithMultiplePojos> ds = CollectionDataSets.getPojoWithMultiplePojos(env);
          // f0.f0 is first integer
          DataSet<String> reduceDs = ds.groupBy("p2.a2")
              .reduceGroup(new GroupReduceFunction<CollectionDataSets.PojoWithMultiplePojos, String>() {
                @Override
                public void reduce(
                    Iterable<CollectionDataSets.PojoWithMultiplePojos> values,
                    Collector<String> out) throws Exception {
                  StringBuilder concat = new StringBuilder();
                  for(CollectionDataSets.PojoWithMultiplePojos value : values) {
                    concat.append(value.p2.a2);
                  }
                  out.collect(concat.toString());
                }
              });
          reduceDs.writeAsText(resultPath);
          env.execute();


          // return expected result
          return "b\nccc\nee\n";
        }

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.flink.api.java.ExecutionEnvironment

org.apache.flink.api.avro.AvroOutputFormatTest

org.apache.flink.api.common.cache.DistributedCache.DistributedCacheEntry

org.apache.flink.api.common.operators.CollectionExecutionIterationTest

org.apache.flink.api.common.operators.CollectionExecutionWithBroadcastVariableTest

org.apache.flink.api.java.functions.SemanticPropertiesProjectionTest

org.apache.flink.api.java.functions.SemanticPropertiesTranslationTest

org.apache.flink.api.java.io.AvroInputFormatTypeExtractionTest

org.apache.flink.api.java.io.CsvReader

org.apache.flink.api.java.io.jdbc.example.JDBCExample

org.apache.flink.api.java.io.TextInputFormat

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.