Examples of eu.stratosphere.api.java.ExecutionEnvironment

eu.stratosphere.api.java.ExecutionEnvironment
The ExecutionEnviroment is the context in which a program is executed. A {@link LocalEnvironment} will cause execution in the current JVM, a{@link RemoteEnvironment} will cause execution on a remote setup.
The environment provides methods to control the job execution (such as setting the parallelism) and to interact with the outside world (data access).
Please note that the execution environment needs strong type information for the input and return types of all operations that are executed. This means that the environments needs to know that the return value of an operation is for example a Tuple of String and Integer. Because the Java compiler throws much of the generic type information away, most methods attempt to re- obtain that information using reflection. In certain cases, it may be necessary to manually supply that information to some of the methods. @see LocalEnvironment @see RemoteEnvironment

  //
  
  public static void main(String[] args) throws Exception {
    
    // set up the execution environment
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    
    // get input data
    DataSet<String> text = env.fromElements(
        "To be, or not to be,--that is the question:--",
        "Whether 'tis nobler in the mind to suffer",
        "The slings and arrows of outrageous fortune",
        "Or to take arms against a sea of troubles,"
        );
    
    DataSet<Tuple2<String, Integer>> counts = 
        // split up the lines in pairs (2-tuples) containing: (word,1)
        text.flatMap(new LineSplitter())
        // group by the tuple field "0" and sum up tuple field "1"
        .groupBy(0)
        .aggregate(Aggregations.SUM, 1);


    // emit result
    counts.print();
    
    // execute program
    env.execute("WordCount Example");
  }

View Full Code Here

 */
public class Job {
  
  public static void main(String[] args) throws Exception {
    // set up the execution environment
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    
    
    /**
     * Here, you can start creating your execution plan for Stratosphere.
     * 
     * Start with getting some data from the environment, like
     *   env.readTextFile(textPath);
     * 
     * then, transform the resulting DataSet<String> using operations
     * like
     *   .filter()
     *   .flatMap()
     *   .join()
     *   .group()
     * and many more.
     * Have a look at the programming guide for the Java API:
     * 
     * http://stratosphere.eu/docs/0.5/programming_guides/java.html
     * 
     * and the examples
     * 
     * http://stratosphere.eu/docs/0.5/programming_guides/examples.html
     * 
     */
    
    // execute program
    env.execute("Stratosphere Java API Skeleton");
  }

View Full Code Here

        
        /*
         * UDF Join on tuples with key field positions
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.getSmall3TupleDataSet(env);
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple2<String, String>> joinDs = 
            ds1.join(ds2)
            .where(1)
            .equalTo(1)
            .with(new T3T5Join());
        
        joinDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "Hi,Hallo\n" +
            "Hello,Hallo Welt\n" +
            "Hello world,Hallo Welt\n";
        
      }
      case 2: {
        
        /*
         * UDF Join on tuples with multiple key field positions
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple2<String, String>> joinDs = 
            ds1.join(ds2)
               .where(0,1)
               .equalTo(0,4)
               .with(new T3T5Join());
        
        joinDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "Hi,Hallo\n" +
            "Hello,Hallo Welt\n" +
            "Hello world,Hallo Welt wie gehts?\n" +
            "Hello world,ABC\n" +
            "I am fine.,HIJ\n" +
            "I am fine.,IJK\n";
        
      }
      case 3: {
        
        /*
         * Default Join on tuples
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.getSmall3TupleDataSet(env);
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple2<Tuple3<Integer, Long, String>,Tuple5<Integer, Long, Integer, String, Long>>> joinDs = 
            ds1.join(ds2)
               .where(0)
               .equalTo(2);
        
        joinDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "(1, 1, Hi),(2, 2, 1, Hallo Welt, 2)\n" +
            "(2, 2, Hello),(2, 3, 2, Hallo Welt wie, 1)\n" +
            "(3, 2, Hello world),(3, 4, 3, Hallo Welt wie gehts?, 2)\n";
      
      }
      case 4: {
        
        /*
         * Join with Huge
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.getSmall3TupleDataSet(env);
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple2<String, String>> joinDs = ds1.joinWithHuge(ds2)
                              .where(1)
                              .equalTo(1)
                              .with(new T3T5Join());
        
        joinDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "Hi,Hallo\n" +
            "Hello,Hallo Welt\n" +
            "Hello world,Hallo Welt\n";
        
      }
      case 5: {
        
        /*
         * Join with Tiny
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.getSmall3TupleDataSet(env);
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple2<String, String>> joinDs = 
            ds1.joinWithTiny(ds2)
               .where(1)
               .equalTo(1)
               .with(new T3T5Join());
        
        joinDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "Hi,Hallo\n" +
            "Hello,Hallo Welt\n" +
            "Hello world,Hallo Welt\n";
        
      }
      
      case 6: {
        
        /*
         * Join that returns the left input object
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.getSmall3TupleDataSet(env);
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> joinDs = 
            ds1.join(ds2)
               .where(1)
               .equalTo(1)
               .with(new LeftReturningJoin());
        
        joinDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "1,1,Hi\n" +
            "2,2,Hello\n" +
            "3,2,Hello world\n";
      }
      case 7: {
        
        /*
         * Join that returns the right input object
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.getSmall3TupleDataSet(env);
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> joinDs = 
            ds1.join(ds2)
               .where(1)
               .equalTo(1)
               .with(new RightReturningJoin());
        
        joinDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "1,1,0,Hallo,1\n" +
            "2,2,1,Hallo Welt,2\n" +
            "2,2,1,Hallo Welt,2\n";
      }
      case 8: {
        
        /*
         * Join with broadcast set
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Integer> intDs = CollectionDataSets.getIntegerDataSet(env);
        
        DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.getSmall5TupleDataSet(env);
        DataSet<Tuple3<String, String, Integer>> joinDs = 
            ds1.join(ds2)
               .where(1)
               .equalTo(4)
               .with(new T3T5BCJoin())
               .withBroadcastSet(intDs, "ints");
        
        joinDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "Hi,Hallo,55\n" +
            "Hi,Hallo Welt wie,55\n" +
            "Hello,Hallo Welt,55\n" +
            "Hello world,Hallo Welt,55\n";
      }
      case 9: {
      
      /*
       * Join on a tuple input with key field selector and a custom type input with key extractor
       */
      
      final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();


      DataSet<CustomType> ds1 = CollectionDataSets.getSmallCustomTypeDataSet(env);
      DataSet<Tuple3<Integer, Long, String>> ds2 = CollectionDataSets.get3TupleDataSet(env);
      DataSet<Tuple2<String, String>> joinDs = 
          ds1.join(ds2)
             .where(new KeySelector<CustomType, Integer>() {
                   @Override
                   public Integer getKey(CustomType value) {
                     return value.myInt;
                   }
                 }
                 )
             .equalTo(0)
             .with(new CustT3Join());
      
      joinDs.writeAsCsv(resultPath);
      env.execute();
      
      // return expected result
      return "Hi,Hi\n" +
          "Hello,Hello\n" +
          "Hello world,Hello\n";
      
      }
      case 10: {
        
        /*
         * Project join on a tuple input 1
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.getSmall3TupleDataSet(env);
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple6<String, Long, String, Integer, Long, Long>> joinDs = 
            ds1.join(ds2)
               .where(1)
               .equalTo(1)
               .projectFirst(2,1)
               .projectSecond(3)
               .projectFirst(0)
               .projectSecond(4,1)
               .types(String.class, Long.class, String.class, Integer.class, Long.class, Long.class);
        
        joinDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "Hi,1,Hallo,1,1,1\n" +
            "Hello,2,Hallo Welt,2,2,2\n" +
            "Hello world,2,Hallo Welt,3,2,2\n";
        
      }
      case 11: {
        
        /*
         * Project join on a tuple input 2
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.getSmall3TupleDataSet(env);
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple6<String, String, Long, Long, Long, Integer>> joinDs = 
            ds1.join(ds2)
               .where(1)
               .equalTo(1)
               .projectSecond(3)
               .projectFirst(2,1)
               .projectSecond(4,1)
               .projectFirst(0)
               .types(String.class, String.class, Long.class, Long.class, Long.class, Integer.class);
        
        joinDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "Hallo,Hi,1,1,1,1\n" +
            "Hallo Welt,Hello,2,2,2,2\n" +
            "Hallo Welt,Hello world,2,2,2,3\n";
      }
        
      case 12: {
        
        /*
         * Join on a tuple input with key field selector and a custom type input with key extractor
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.getSmall3TupleDataSet(env);
        DataSet<CustomType> ds2 = CollectionDataSets.getCustomTypeDataSet(env);
        DataSet<Tuple2<String, String>> joinDs = 
            ds1.join(ds2)
               .where(1).equalTo(new KeySelector<CustomType, Long>() {
                     @Override
                     public Long getKey(CustomType value) {
                       return value.myLong;
                     }
                   })
               .with(new T3CustJoin());
        
        joinDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "Hi,Hello\n" +
            "Hello,Hello world\n" +
            "Hello world,Hello world\n";
            
      }
      
      case 13: {
        
        /*
         * (Default) Join on two custom type inputs with key extractors
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<CustomType> ds1 = CollectionDataSets.getCustomTypeDataSet(env);
        DataSet<CustomType> ds2 = CollectionDataSets.getSmallCustomTypeDataSet(env);
        
        DataSet<Tuple2<CustomType, CustomType>> joinDs = 
          ds1.join(ds2)
             .where(
                 new KeySelector<CustomType, Integer>() {
                   @Override
                   public Integer getKey(CustomType value) {
                     return value.myInt;
                   }
                 }
                )
            .equalTo(
                new KeySelector<CustomType, Integer>() {
                     @Override
                     public Integer getKey(CustomType value) {
                       return value.myInt;
                     }
                   }
                );
                                        
        joinDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "1,0,Hi,1,0,Hi\n" +
            "2,1,Hello,2,1,Hello\n" +
            "2,1,Hello,2,2,Hello world\n" +

View Full Code Here

        
        /*
         * check correctness of groupReduce on tuples with key field selector
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Tuple2<Integer, Long>> reduceDs = ds.
            groupBy(1).reduceGroup(new Tuple3GroupReduce());
        
        reduceDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "1,1\n" +
            "5,2\n" +
            "15,3\n" +
            "34,4\n" +
            "65,5\n" +
            "111,6\n";
      }
      case 2: {
        
        /*
         * check correctness of groupReduce on tuples with multiple key field selector
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> reduceDs = ds.
            groupBy(4,0).reduceGroup(new Tuple5GroupReduce());
        
        reduceDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "1,1,0,P-),1\n" +
            "2,3,0,P-),1\n" +
            "2,2,0,P-),2\n" +
            "3,9,0,P-),2\n" +
            "3,6,0,P-),3\n" +
            "4,17,0,P-),1\n" +
            "4,17,0,P-),2\n" +
            "5,11,0,P-),1\n" +
            "5,29,0,P-),2\n" +
            "5,25,0,P-),3\n";
      }
      case 3: {
        
        /*
         * check correctness of groupReduce on tuples with key field selector and group sorting
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setDegreeOfParallelism(1);
        
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.
            groupBy(1).sortGroup(2,Order.ASCENDING).reduceGroup(new Tuple3SortedGroupReduce());
        
        reduceDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "1,1,Hi\n" +
            "5,2,Hello-Hello world\n" +
            "15,3,Hello world, how are you?-I am fine.-Luke Skywalker\n" +
            "34,4,Comment#1-Comment#2-Comment#3-Comment#4\n" +
            "65,5,Comment#5-Comment#6-Comment#7-Comment#8-Comment#9\n" +
            "111,6,Comment#10-Comment#11-Comment#12-Comment#13-Comment#14-Comment#15\n";
                
      }
      case 4: {
        /*
         * check correctness of groupReduce on tuples with key extractor
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Tuple2<Integer, Long>> reduceDs = ds.
            groupBy(new KeySelector<Tuple3<Integer,Long,String>, Long>() {
                  private static final long serialVersionUID = 1L;
                  @Override
                  public Long getKey(Tuple3<Integer, Long, String> in) {
                    return in.f1;
                  }
                }).reduceGroup(new Tuple3GroupReduce());
        
        reduceDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "1,1\n" +
            "5,2\n" +
            "15,3\n" +
            "34,4\n" +
            "65,5\n" +
            "111,6\n";
        
      }
      case 5: {
        
        /*
         * check correctness of groupReduce on custom type with type extractor
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<CustomType> ds = CollectionDataSets.getCustomTypeDataSet(env);
        DataSet<CustomType> reduceDs = ds.
            groupBy(new KeySelector<CustomType, Integer>() {
                  private static final long serialVersionUID = 1L;
                  @Override
                  public Integer getKey(CustomType in) {
                    return in.myInt;
                  }
                }).reduceGroup(new CustomTypeGroupReduce());
        
        reduceDs.writeAsText(resultPath);
        env.execute();
        
        // return expected result
        return "1,0,Hello!\n" +
            "2,3,Hello!\n" +
            "3,12,Hello!\n" +
            "4,30,Hello!\n" +
            "5,60,Hello!\n" +
            "6,105,Hello!\n";
      }
      case 6: {
        
        /*
         * check correctness of all-groupreduce for tuples
         */


        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.reduceGroup(new AllAddingTuple3GroupReduce());
        
        reduceDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "231,91,Hello World\n";
      }
      case 7: {
        /*
         * check correctness of all-groupreduce for custom types
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<CustomType> ds = CollectionDataSets.getCustomTypeDataSet(env);
        DataSet<CustomType> reduceDs = ds.reduceGroup(new AllAddingCustomTypeGroupReduce());
        
        reduceDs.writeAsText(resultPath);
        env.execute();
        
        // return expected result
        return "91,210,Hello!";
      }
      case 8: {
        
        /*
         * check correctness of groupReduce with broadcast set
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Integer> intDs = CollectionDataSets.getIntegerDataSet(env);
        
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.
            groupBy(1).reduceGroup(new BCTuple3GroupReduce()).withBroadcastSet(intDs, "ints");
        
        reduceDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "1,1,55\n" +
            "5,2,55\n" +
            "15,3,55\n" +
            "34,4,55\n" +
            "65,5,55\n" +
            "111,6,55\n";
      }
      case 9: {
        
        /*
         * check correctness of groupReduce if UDF returns input objects multiple times and changes it in between
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.
            groupBy(1).reduceGroup(new InputReturningTuple3GroupReduce());
        
        reduceDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "11,1,Hi!\n" +
            "21,1,Hi again!\n" +
            "12,2,Hi!\n" +
            "22,2,Hi again!\n" +
            "13,2,Hi!\n" +
            "23,2,Hi again!\n";
      }
      case 10: {
        
        /*
         * check correctness of groupReduce on custom type with key extractor and combine
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<CustomType> ds = CollectionDataSets.getCustomTypeDataSet(env);
        DataSet<CustomType> reduceDs = ds.
            groupBy(new KeySelector<CustomType, Integer>() {
                  private static final long serialVersionUID = 1L;
                  @Override
                  public Integer getKey(CustomType in) {
                    return in.myInt;
                  }
                }).reduceGroup(new CustomTypeGroupReduceWithCombine());
        
        reduceDs.writeAsText(resultPath);
        env.execute();
        
        // return expected result
        return "1,0,test1\n" +
            "2,3,test2\n" +
            "3,12,test3\n" +
            "4,30,test4\n" +
            "5,60,test5\n" +
            "6,105,test6\n";
      }
      case 11: {
        
        /*
         * check correctness of groupReduce on tuples with combine
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setDegreeOfParallelism(2); // important because it determines how often the combiner is called
        
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Tuple2<Integer, String>> reduceDs = ds.
            groupBy(1).reduceGroup(new Tuple3GroupReduceWithCombine());
        
        reduceDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "1,test1\n" +
            "5,test2\n" +
            "15,test3\n" +
            "34,test4\n" +
            "65,test5\n" +
            "111,test6\n";
      }
      // all-groupreduce with combine
      case 12: {
        
        /*
         * check correctness of all-groupreduce for tuples with combine
         */


        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env)
              .map(new IdentityMapper<Tuple3<Integer,Long,String>>()).setParallelism(4);
        
        Configuration cfg = new Configuration();
        cfg.setString(PactCompiler.HINT_SHIP_STRATEGY, PactCompiler.HINT_SHIP_STRATEGY_REPARTITION);
        DataSet<Tuple2<Integer, String>> reduceDs = ds.reduceGroup(new Tuple3AllGroupReduceWithCombine())
            .withParameters(cfg);
        
        reduceDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "322,testtesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttest\n";
      }
      // descending sort not working
      case 13: {
        
        /*
         * check correctness of groupReduce on tuples with key field selector and group sorting
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setDegreeOfParallelism(1);
        
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.
            groupBy(1).sortGroup(2,Order.DESCENDING).reduceGroup(new Tuple3SortedGroupReduce());
        
        reduceDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "1,1,Hi\n" +
            "5,2,Hello world-Hello\n" +
            "15,3,Luke Skywalker-I am fine.-Hello world, how are you?\n" +

View Full Code Here

    resultPath = getTempFilePath("results");
  }
  
  @Override
  protected void testProgram() throws Exception {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    
    DataSet<Long> vertexIds = env.generateSequence(1, NUM_VERTICES);
    DataSet<String> edgeString = env.fromElements(ConnectedComponentsData.getRandomOddEvenEdges(NUM_EDGES, NUM_VERTICES, SEED).split("\n"));
    
    DataSet<Tuple2<Long, Long>> edges = edgeString.map(new EdgeParser());
    
    DataSet<Tuple2<Long, Long>> initialVertices = vertexIds.map(new IdAssigner());
    DataSet<Tuple2<Long, Long>> result = initialVertices.runOperation(VertexCentricIteration.withPlainEdges(edges, new CCUpdater(), new CCMessager(), 100));
    
    result.writeAsCsv(resultPath, "\n", " ");
    env.execute("Spargel Connected Components");
  }

View Full Code Here

public class BulkIterationWithAllReducerITCase extends JavaProgramTestBase {


  @Override
  protected void testProgram() throws Exception {
    
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setDegreeOfParallelism(1);
    
    DataSet<Integer> data = env.fromElements(1, 2, 3, 4, 5, 6, 7, 8);
    
    IterativeDataSet<Integer> iteration = data.iterate(10);
    
    DataSet<Integer> result = data.reduceGroup(new PickOneAllReduce()).withBroadcastSet(iteration, "bc");
    
    final List<Integer> resultList = new ArrayList<Integer>();
    iteration.closeWith(result).output(new LocalCollectionOutputFormat<Integer>(resultList));
    
    env.execute();
    
    Assert.assertEquals(8, resultList.get(0).intValue());
  }

View Full Code Here

      case 1: {
        /*
         * Test non-passing flatmap
         */
    
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<String> ds = CollectionDataSets.getStringDataSet(env);
        DataSet<String> nonPassingFlatMapDs = ds.
            flatMap(new FlatMapFunction<String, String>() {
              private static final long serialVersionUID = 1L;


              @Override
              public void flatMap(String value, Collector<String> out) throws Exception {
                if ( value.contains("bananas") ) {
                  out.collect(value);
                }
              }
            });
        
        nonPassingFlatMapDs.writeAsText(resultPath);
        env.execute();
        
        // return expected result
        return   "\n";
      }
      case 2: {
        /*
         * Test data duplicating flatmap
         */
    
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<String> ds = CollectionDataSets.getStringDataSet(env);
        DataSet<String> duplicatingFlatMapDs = ds.
            flatMap(new FlatMapFunction<String, String>() {
              private static final long serialVersionUID = 1L;


              @Override
              public void flatMap(String value, Collector<String> out) throws Exception {
                  out.collect(value);
                  out.collect(value.toUpperCase());
              }
            });
        
        duplicatingFlatMapDs.writeAsText(resultPath);
        env.execute();
        
        // return expected result
        return   "Hi\n" + "HI\n" +
            "Hello\n" + "HELLO\n" +
            "Hello world\n" + "HELLO WORLD\n" +
            "Hello world, how are you?\n" + "HELLO WORLD, HOW ARE YOU?\n" +
            "I am fine.\n" + "I AM FINE.\n" +
            "Luke Skywalker\n" + "LUKE SKYWALKER\n" +
            "Random comment\n" + "RANDOM COMMENT\n" +
            "LOL\n" + "LOL\n";
      }
      case 3: {
        /*
         * Test flatmap with varying number of emitted tuples
         */
    
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> varyingTuplesMapDs = ds.
            flatMap(new FlatMapFunction<Tuple3<Integer, Long, String>, Tuple3<Integer, Long, String>>() {
              private static final long serialVersionUID = 1L;


              @Override
              public void flatMap(Tuple3<Integer, Long, String> value,
                  Collector<Tuple3<Integer, Long, String>> out) throws Exception {
                final int numTuples = value.f0 % 3; 
                for ( int i = 0; i < numTuples; i++ ) {
                  out.collect(value);
                }
              }
            });
        
        varyingTuplesMapDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return  "1,1,Hi\n" +
            "2,2,Hello\n" + "2,2,Hello\n" +
            "4,3,Hello world, how are you?\n" +
            "5,3,I am fine.\n" + "5,3,I am fine.\n" +
            "7,4,Comment#1\n" +
            "8,4,Comment#2\n" + "8,4,Comment#2\n" + 
            "10,4,Comment#4\n" +
            "11,5,Comment#5\n" + "11,5,Comment#5\n" +
            "13,5,Comment#7\n" +
            "14,5,Comment#8\n" + "14,5,Comment#8\n" +
            "16,6,Comment#10\n" +
            "17,6,Comment#11\n" + "17,6,Comment#11\n" +
            "19,6,Comment#13\n" +
            "20,6,Comment#14\n" + "20,6,Comment#14\n";
      }
      case 4: {
        /*
         * Test type conversion flatmapper (Custom -> Tuple)
         */
    
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<CustomType> ds = CollectionDataSets.getCustomTypeDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> typeConversionFlatMapDs = ds.
            flatMap(new FlatMapFunction<CustomType, Tuple3<Integer, Long, String>>() {
              private static final long serialVersionUID = 1L;
              private final Tuple3<Integer, Long, String> outTuple = 
                  new Tuple3<Integer, Long, String>();
              
              @Override
              public void flatMap(CustomType value, Collector<Tuple3<Integer, Long, String>> out)
                  throws Exception {
                outTuple.setField(value.myInt, 0);
                outTuple.setField(value.myLong, 1);
                outTuple.setField(value.myString, 2);
                out.collect(outTuple);
              }
            });
        
        typeConversionFlatMapDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return   "1,0,Hi\n" +
            "2,1,Hello\n" +
            "2,2,Hello world\n" +
            "3,3,Hello world, how are you?\n" +
            "3,4,I am fine.\n" +
            "3,5,Luke Skywalker\n" +
            "4,6,Comment#1\n" +
            "4,7,Comment#2\n" +
            "4,8,Comment#3\n" +
            "4,9,Comment#4\n" +
            "5,10,Comment#5\n" +
            "5,11,Comment#6\n" +
            "5,12,Comment#7\n" +
            "5,13,Comment#8\n" +
            "5,14,Comment#9\n" +
            "6,15,Comment#10\n" +
            "6,16,Comment#11\n" +
            "6,17,Comment#12\n" +
            "6,18,Comment#13\n" +
            "6,19,Comment#14\n" +
            "6,20,Comment#15\n";
      }
      case 5: {
        /*
         * Test type conversion flatmapper (Tuple -> Basic)
         */
    
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
        DataSet<String> typeConversionFlatMapDs = ds.
            flatMap(new FlatMapFunction<Tuple3<Integer, Long, String>, String>() {
              private static final long serialVersionUID = 1L;
              
              @Override
              public void flatMap(Tuple3<Integer, Long, String> value, 
                  Collector<String> out) throws Exception {
                out.collect(value.f2);
              }
            });
        
        typeConversionFlatMapDs.writeAsText(resultPath);
        env.execute();
        
        // return expected result
        return   "Hi\n" + "Hello\n" + "Hello world\n" +
            "Hello world, how are you?\n" +
            "I am fine.\n" + "Luke Skywalker\n" +
            "Comment#1\n" +  "Comment#2\n" +
            "Comment#3\n" +  "Comment#4\n" +
            "Comment#5\n" +  "Comment#6\n" +
            "Comment#7\n" + "Comment#8\n" +
            "Comment#9\n" +  "Comment#10\n" +
            "Comment#11\n" + "Comment#12\n" +
            "Comment#13\n" + "Comment#14\n" +
            "Comment#15\n";
      }
      case 6: {
        /*
         * Test flatmapper if UDF returns input object 
         * multiple times and changes it in between
         */
    
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> inputObjFlatMapDs = ds.
            flatMap(new FlatMapFunction<Tuple3<Integer, Long, String>, Tuple3<Integer, Long, String>>() {
              private static final long serialVersionUID = 1L;
              
              @Override
              public void flatMap( Tuple3<Integer, Long, String> value,
                  Collector<Tuple3<Integer, Long, String>> out) throws Exception {
                final int numTuples = value.f0 % 4;
                for ( int i = 0; i < numTuples; i++ ) {
                  value.setField(i, 0);
                  out.collect(value);
                }              
              }
            });
        
        inputObjFlatMapDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return  "0,1,Hi\n" +
            "0,2,Hello\n" + "1,2,Hello\n" +
            "0,2,Hello world\n" + "1,2,Hello world\n" + "2,2,Hello world\n" +
            "0,3,I am fine.\n" +
            "0,3,Luke Skywalker\n" + "1,3,Luke Skywalker\n" +
            "0,4,Comment#1\n" + "1,4,Comment#1\n" + "2,4,Comment#1\n" +
            "0,4,Comment#3\n" +
            "0,4,Comment#4\n" + "1,4,Comment#4\n" +
            "0,5,Comment#5\n" + "1,5,Comment#5\n" + "2,5,Comment#5\n" +
            "0,5,Comment#7\n" +
            "0,5,Comment#8\n" + "1,5,Comment#8\n" +
            "0,5,Comment#9\n" + "1,5,Comment#9\n" + "2,5,Comment#9\n" +
            "0,6,Comment#11\n" +
            "0,6,Comment#12\n" + "1,6,Comment#12\n" +
            "0,6,Comment#13\n" + "1,6,Comment#13\n" + "2,6,Comment#13\n" +
            "0,6,Comment#15\n";
      }
      case 7: {
        /*
         * Test flatmap with broadcast set 
         */
          
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Integer> ints = CollectionDataSets.getIntegerDataSet(env);
        
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> bcFlatMapDs = ds.
            flatMap(new FlatMapFunction<Tuple3<Integer,Long,String>, Tuple3<Integer,Long,String>>() {
              private static final long serialVersionUID = 1L;
              private final Tuple3<Integer, Long, String> outTuple = 
                  new Tuple3<Integer, Long, String>();
              private Integer f2Replace = 0;
              
              @Override
              public void open(Configuration config) {
                Collection<Integer> ints = this.getRuntimeContext().getBroadcastVariable("ints");
                int sum = 0;
                for(Integer i : ints) {
                  sum += i;
                }
                f2Replace = sum;
              }
              
              @Override
              public void flatMap(Tuple3<Integer, Long, String> value,
                  Collector<Tuple3<Integer, Long, String>> out) throws Exception {
                outTuple.setFields(f2Replace, value.f1, value.f2);
                out.collect(outTuple);
              }
            }).withBroadcastSet(ints, "ints");
        bcFlatMapDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return   "55,1,Hi\n" +
            "55,2,Hello\n" +
            "55,2,Hello world\n" +

View Full Code Here

      case 1: {
        /*
         * Reduce on tuples with key field selector
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.
            groupBy(1).reduce(new Tuple3Reduce("B-)"));
        
        reduceDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "1,1,Hi\n" +
            "5,2,B-)\n" +
            "15,3,B-)\n" +
            "34,4,B-)\n" +
            "65,5,B-)\n" +
            "111,6,B-)\n";
      }
      case 2: {
        /*
         * Reduce on tuples with multiple key field selectors
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> reduceDs = ds.
            groupBy(4,0).reduce(new Tuple5Reduce());
        
        reduceDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "1,1,0,Hallo,1\n" +
            "2,3,2,Hallo Welt wie,1\n" +
            "2,2,1,Hallo Welt,2\n" +
            "3,9,0,P-),2\n" +
            "3,6,5,BCD,3\n" +
            "4,17,0,P-),1\n" +
            "4,17,0,P-),2\n" +
            "5,11,10,GHI,1\n" +
            "5,29,0,P-),2\n" +
            "5,25,0,P-),3\n";
      } 
      case 3: {
        /*
         * Reduce on tuples with key extractor
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.
            groupBy(new KeySelector<Tuple3<Integer,Long,String>, Long>() {
                  private static final long serialVersionUID = 1L;
                  @Override
                  public Long getKey(Tuple3<Integer, Long, String> in) {
                    return in.f1;
                  }
                }).reduce(new Tuple3Reduce("B-)"));
        
        reduceDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "1,1,Hi\n" +
            "5,2,B-)\n" +
            "15,3,B-)\n" +
            "34,4,B-)\n" +
            "65,5,B-)\n" +
            "111,6,B-)\n";
        
      }
      case 4: {
        /*
         * Reduce on custom type with key extractor
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<CustomType> ds = CollectionDataSets.getCustomTypeDataSet(env);
        DataSet<CustomType> reduceDs = ds.
            groupBy(new KeySelector<CustomType, Integer>() {
                  private static final long serialVersionUID = 1L;
                  @Override
                  public Integer getKey(CustomType in) {
                    return in.myInt;
                  }
                }).reduce(new CustomTypeReduce());
        
        reduceDs.writeAsText(resultPath);
        env.execute();
        
        // return expected result
        return "1,0,Hi\n" +
            "2,3,Hello!\n" +
            "3,12,Hello!\n" +
            "4,30,Hello!\n" +
            "5,60,Hello!\n" +
            "6,105,Hello!\n";
      }
      case 5: {
        /*
         * All-reduce for tuple
         */


        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.
            reduce(new AllAddingTuple3Reduce());
        
        reduceDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "231,91,Hello World\n";
      }
      case 6: {
        /*
         * All-reduce for custom types
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<CustomType> ds = CollectionDataSets.getCustomTypeDataSet(env);
        DataSet<CustomType> reduceDs = ds.
            reduce(new AllAddingCustomTypeReduce());
        
        reduceDs.writeAsText(resultPath);
        env.execute();
        
        // return expected result
        return "91,210,Hello!";
      }
      case 7: {
        
        /*
         * Reduce with broadcast set
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Integer> intDs = CollectionDataSets.getIntegerDataSet(env);
        
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.
            groupBy(1).reduce(new BCTuple3Reduce()).withBroadcastSet(intDs, "ints");
        
        reduceDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "1,1,Hi\n" +
            "5,2,55\n" +
            "15,3,55\n" +
            "34,4,55\n" +
            "65,5,55\n" +
            "111,6,55\n";
      }
      case 8: {
        /*
         * Reduce with UDF that returns the second input object (check mutable object handling)
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.
            groupBy(1).reduce(new InputReturningTuple3Reduce());
        
        reduceDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "1,1,Hi\n" +
            "5,2,Hi again!\n" +
            "15,3,Hi again!\n" +

View Full Code Here

        
        /*
         * CoGroup on tuples with key field selector
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple2<Integer, Integer>> coGroupDs = ds.coGroup(ds2).where(0).equalTo(0).with(new Tuple5CoGroup());
        
        coGroupDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "1,0\n" +
            "2,6\n" +
            "3,24\n" +
            "4,60\n" +
            "5,120\n";
      }
      case 2: {
        
        /*
         * CoGroup on two custom type inputs with key extractors
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<CustomType> ds = CollectionDataSets.getCustomTypeDataSet(env);
        DataSet<CustomType> ds2 = CollectionDataSets.getCustomTypeDataSet(env);
        DataSet<CustomType> coGroupDs = ds.coGroup(ds2).where(new KeySelector<CustomType, Integer>() {
                  private static final long serialVersionUID = 1L;
                  @Override
                  public Integer getKey(CustomType in) {
                    return in.myInt;
                  }
                }).equalTo(new KeySelector<CustomType, Integer>() {
                  private static final long serialVersionUID = 1L;
                  @Override
                  public Integer getKey(CustomType in) {
                    return in.myInt;
                  }
                }).with(new CustomTypeCoGroup());
        
        coGroupDs.writeAsText(resultPath);
        env.execute();
        
        // return expected result
        return "1,0,test\n" +
            "2,6,test\n" +
            "3,24,test\n" +
            "4,60,test\n" +
            "5,120,test\n" +
            "6,210,test\n";
      }
      case 3: {
        
        /*
         * check correctness of cogroup if UDF returns left input objects
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> ds2 = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> coGroupDs = ds.coGroup(ds2).where(0).equalTo(0).with(new Tuple3ReturnLeft());
        
        coGroupDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "1,1,Hi\n" +
            "2,2,Hello\n" +
            "3,2,Hello world\n" +
            "4,3,Hello world, how are you?\n" +
            "5,3,I am fine.\n";
        
      }
      case 4: {
        
        /*
         * check correctness of cogroup if UDF returns right input objects
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> coGroupDs = ds.coGroup(ds2).where(0).equalTo(0).with(new Tuple5ReturnRight());
        
        coGroupDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "1,1,0,Hallo,1\n" +
            "2,2,1,Hallo Welt,2\n" +
            "2,3,2,Hallo Welt wie,1\n" +
            "3,4,3,Hallo Welt wie gehts?,2\n" +
            "3,5,4,ABC,2\n" +
            "3,6,5,BCD,3\n";
        
      }
      case 5: {
        
        /*
         * Reduce with broadcast set
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Integer> intDs = CollectionDataSets.getIntegerDataSet(env);
        
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple3<Integer, Integer, Integer>> coGroupDs = ds.coGroup(ds2).where(0).equalTo(0).with(new Tuple5CoGroupBC()).withBroadcastSet(intDs, "ints");
        
        coGroupDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "1,0,55\n" +
            "2,6,55\n" +
            "3,24,55\n" +
            "4,60,55\n" +
            "5,120,55\n";
      }
      case 6: {
        
        /*
         * CoGroup on a tuple input with key field selector and a custom type input with key extractor
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds = CollectionDataSets.get5TupleDataSet(env);
        DataSet<CustomType> ds2 = CollectionDataSets.getCustomTypeDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> coGroupDs = ds.coGroup(ds2).where(2).equalTo(new KeySelector<CustomType, Integer>() {
                  private static final long serialVersionUID = 1L;
                  @Override
                  public Integer getKey(CustomType in) {
                    return in.myInt;
                  }
                }).with(new MixedCoGroup());
        
        coGroupDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "0,1,test\n" +
            "1,2,test\n" +
            "2,5,test\n" +
            "3,15,test\n" +
            "4,33,test\n" +
            "5,63,test\n" +
            "6,109,test\n" +
            "7,4,test\n" + 
            "8,4,test\n" + 
            "9,4,test\n" + 
            "10,5,test\n" + 
            "11,5,test\n" + 
            "12,5,test\n" + 
            "13,5,test\n" +
            "14,5,test\n"; 
            
      }
      case 7: {
        
        /*
         * CoGroup on a tuple input with key field selector and a custom type input with key extractor
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds = CollectionDataSets.get5TupleDataSet(env);
        DataSet<CustomType> ds2 = CollectionDataSets.getCustomTypeDataSet(env);
        DataSet<CustomType> coGroupDs = ds2.coGroup(ds).where(new KeySelector<CustomType, Integer>() {
                  private static final long serialVersionUID = 1L;
                  @Override
                  public Integer getKey(CustomType in) {
                    return in.myInt;
                  }
                }).equalTo(2).with(new MixedCoGroup2());
        
        coGroupDs.writeAsText(resultPath);
        env.execute();
        
        // return expected result
        return "0,1,test\n" +
            "1,2,test\n" +
            "2,5,test\n" +

View Full Code Here

    
  @Test
  public void testMultiSolutionSetJoinPlan() {
    try {
      
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      
      @SuppressWarnings("unchecked")
      DataSet<Tuple2<Long, Double>> inputData = env.fromElements(new Tuple2<Long, Double>(1L, 1.0));
      DataSet<Tuple2<Long, Double>> result = constructPlan(inputData, 10);
      
      // add two sinks, to test the case of branching after an iteration
      result.print();
      result.print();
    
      Plan p = env.createProgramPlan();
      
      OptimizedPlan optPlan = compileNoStats(p);
      
      OptimizerPlanNodeResolver or = getOptimizerPlanNodeResolver(optPlan);

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of eu.stratosphere.api.java.ExecutionEnvironment

$.Job

$.WordCountJob

eu.stratosphere.api.avro.testjar.AvroExternalJarProgram

eu.stratosphere.api.java.functions.SemanticPropertiesTranslationTest

eu.stratosphere.api.java.io.CsvReader

eu.stratosphere.api.java.io.jdbc.example.JDBCExample

eu.stratosphere.api.java.io.TextInputFormat

eu.stratosphere.api.java.io.TextValueInputFormat

eu.stratosphere.api.java.operator.AggregateOperatorTest

eu.stratosphere.api.java.operator.CoGroupOperatorTest

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.