Examples of org.apache.flink.api.java.ExecutionEnvironment

org.apache.flink.api.java.ExecutionEnvironment
The ExecutionEnviroment is the context in which a program is executed. A {@link LocalEnvironment} will cause execution in the current JVM, a{@link RemoteEnvironment} will cause execution on a remote setup.
The environment provides methods to control the job execution (such as setting the parallelism) and to interact with the outside world (data access).
Please note that the execution environment needs strong type information for the input and return types of all operations that are executed. This means that the environments needs to know that the return value of an operation is for example a Tuple of String and Integer. Because the Java compiler throws much of the generic type information away, most methods attempt to re- obtain that information using reflection. In certain cases, it may be necessary to manually supply that information to some of the methods. @see LocalEnvironment @see RemoteEnvironment

      switch(progId) {
      case 1: {
        /*
         * Test standard counting with combiner
         */
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple2<IntWritable, IntWritable>> ds = HadoopTestData.getKVPairDataSet(env).
            map(new MapFunction<Tuple2<IntWritable, Text>, Tuple2<IntWritable, IntWritable>>() {
              private static final long serialVersionUID = 1L;
              Tuple2<IntWritable,IntWritable> outT = new Tuple2<IntWritable,IntWritable>();
              @Override
              public Tuple2<IntWritable, IntWritable> map(Tuple2<IntWritable, Text> v)
                  throws Exception {
                outT.f0 = new IntWritable(v.f0.get() / 6);
                outT.f1 = new IntWritable(1);
                return outT;
              }
            });
            
        DataSet<Tuple2<IntWritable, IntWritable>> counts = ds.
            groupBy(0).
            reduceGroup(new HadoopReduceCombineFunction<IntWritable, IntWritable, IntWritable, IntWritable>(
                new SumReducer(), new SumReducer()));
        
        counts.writeAsText(resultPath);
        env.execute();
        
        // return expected result
        return   "(0,5)\n"+
            "(1,6)\n" +
            "(2,6)\n" +
            "(3,4)\n";
      }
      case 2: {
        /*
         * Test ungrouped Hadoop reducer
         */
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple2<IntWritable, IntWritable>> ds = HadoopTestData.getKVPairDataSet(env).
            map(new MapFunction<Tuple2<IntWritable, Text>, Tuple2<IntWritable, IntWritable>>() {
              private static final long serialVersionUID = 1L;
              Tuple2<IntWritable,IntWritable> outT = new Tuple2<IntWritable,IntWritable>();
              @Override
              public Tuple2<IntWritable, IntWritable> map(Tuple2<IntWritable, Text> v)
                  throws Exception {
                outT.f0 = new IntWritable(0);
                outT.f1 = v.f0;
                return outT;
              }
            });
            
        DataSet<Tuple2<IntWritable, IntWritable>> sum = ds.
            reduceGroup(new HadoopReduceCombineFunction<IntWritable, IntWritable, IntWritable, IntWritable>(
                new SumReducer(), new SumReducer()));
        
        sum.writeAsText(resultPath);
        env.execute();
        
        // return expected result
        return   "(0,231)\n";
      }
      case 3: {
        /* Test combiner */
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple2<IntWritable, IntWritable>> ds = HadoopTestData.getKVPairDataSet(env).
            map(new MapFunction<Tuple2<IntWritable, Text>, Tuple2<IntWritable, IntWritable>>() {
              private static final long serialVersionUID = 1L;
              Tuple2<IntWritable,IntWritable> outT = new Tuple2<IntWritable,IntWritable>();
              @Override
              public Tuple2<IntWritable, IntWritable> map(Tuple2<IntWritable, Text> v)
                  throws Exception {
                outT.f0 = v.f0;
                outT.f1 = new IntWritable(1);
                return outT;
              }
            });
            
        DataSet<Tuple2<IntWritable, IntWritable>> counts = ds.
            groupBy(0).
            reduceGroup(new HadoopReduceCombineFunction<IntWritable, IntWritable, IntWritable, IntWritable>(
                new SumReducer(), new KeyChangingReducer()));
        
        counts.writeAsText(resultPath);
        env.execute();
        
        // return expected result
        return   "(0,5)\n"+
            "(1,6)\n" +
            "(2,5)\n" +
            "(3,5)\n";
      }
      case 4: {
        /*
         * Test configuration via JobConf
         */
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        JobConf conf = new JobConf();
        conf.set("my.cntPrefix", "Hello");
        
        DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env).
            map(new MapFunction<Tuple2<IntWritable, Text>, Tuple2<IntWritable, Text>>() {
              private static final long serialVersionUID = 1L;
              @Override
              public Tuple2<IntWritable, Text> map(Tuple2<IntWritable, Text> v)
                  throws Exception {
                v.f0 = new IntWritable(v.f0.get() % 5);
                return v;
              }
            });
            
        DataSet<Tuple2<IntWritable, IntWritable>> hellos = ds.
            groupBy(0).
            reduceGroup(new HadoopReduceFunction<IntWritable, Text, IntWritable, IntWritable>(
                new ConfigurableCntReducer(), conf));
        
        hellos.writeAsText(resultPath);
        env.execute();
        
        // return expected result
        return   "(0,0)\n"+
            "(1,0)\n" +
            "(2,1)\n" +

View Full Code Here

public class SpargelCompilerTest extends CompilerTestBase {


  @Test
  public void testSpargelCompiler() {
    try {
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      env.setDegreeOfParallelism(DEFAULT_PARALLELISM);
      // compose test program
      {
        DataSet<Long> vertexIds = env.generateSequence(1, 2);
        
        @SuppressWarnings("unchecked")
        DataSet<Tuple2<Long, Long>> edges = env.fromElements(new Tuple2<Long, Long>(1L, 2L));
        
        DataSet<Tuple2<Long, Long>> initialVertices = vertexIds.map(new IdAssigner());
        DataSet<Tuple2<Long, Long>> result = initialVertices.runOperation(VertexCentricIteration.withPlainEdges(edges, new CCUpdater(), new CCMessager(), 100));
        
        result.print();
      }
      
      Plan p = env.createProgramPlan("Spargel Connected Components");
      OptimizedPlan op = compileNoStats(p);
      
      // check the sink
      SinkPlanNode sink = op.getDataSinks().iterator().next();
      assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());

View Full Code Here

  public void testSpargelCompilerWithBroadcastVariable() {
    try {
      final String BC_VAR_NAME = "borat variable";
      
      
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      env.setDegreeOfParallelism(DEFAULT_PARALLELISM);
      // compose test program
      {
        DataSet<Long> bcVar = env.fromElements(1L);
        
        DataSet<Long> vertexIds = env.generateSequence(1, 2);
        
        @SuppressWarnings("unchecked")
        DataSet<Tuple2<Long, Long>> edges = env.fromElements(new Tuple2<Long, Long>(1L, 2L));
        
        DataSet<Tuple2<Long, Long>> initialVertices = vertexIds.map(new IdAssigner());
        
        VertexCentricIteration<Long, Long, Long, ?> vcIter = VertexCentricIteration.withPlainEdges(edges, new CCUpdater(), new CCMessager(), 100);
        vcIter.addBroadcastSetForMessagingFunction(BC_VAR_NAME, bcVar);
        vcIter.addBroadcastSetForUpdateFunction(BC_VAR_NAME, bcVar);
        
        DataSet<Tuple2<Long, Long>> result = initialVertices.runOperation(vcIter);
        
        result.print();
      }
      
      Plan p = env.createProgramPlan("Spargel Connected Components");
      OptimizedPlan op = compileNoStats(p);
      
      // check the sink
      SinkPlanNode sink = op.getDataSinks().iterator().next();
      assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());

View Full Code Here

  @Test
  public void testDeltaIterationNotDependingOnSolutionSet() {
    try {
      final List<Tuple2<Long, Long>> result = new ArrayList<Tuple2<Long,Long>>();
      
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      env.setDegreeOfParallelism(1);
      
      DataSet<Tuple2<Long, Long>> input = env.generateSequence(0, 9).map(new Duplicator<Long>());
      
      DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = input.iterateDelta(input, 5, 1);
      
      iteration.closeWith(iteration.getWorkset(), iteration.getWorkset().map(new TestMapper()))
        .output(new LocalCollectionOutputFormat<Tuple2<Long,Long>>(result));
      
      env.execute();
      
      boolean[] present = new boolean[50];
      for (Tuple2<Long, Long> t : result) {
        present[t.f0.intValue()] = true;
      }

View Full Code Here

    }
    
    final String inputPath = args[0];
    final String outputPath = args[1];
    
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setDegreeOfParallelism(1);
    
    // Set up the Hadoop Input Format
    Job job = Job.getInstance();
    HadoopInputFormat<LongWritable, Text> hadoopInputFormat = new HadoopInputFormat<LongWritable, Text>(new TextInputFormat(), LongWritable.class, Text.class, job);
    TextInputFormat.addInputPath(job, new Path(inputPath));
    
    // Create a Flink job with it
    DataSet<Tuple2<LongWritable, Text>> text = env.createInput(hadoopInputFormat);
    
    // Tokenize the line and convert from Writable "Text" to String for better handling
    DataSet<Tuple2<String, Integer>> words = text.flatMap(new Tokenizer());
    
    // Sum up the words
    DataSet<Tuple2<String, Integer>> result = words.groupBy(0).aggregate(Aggregations.SUM, 1);
    
    // Convert String back to Writable "Text" for use with Hadoop Output Format
    DataSet<Tuple2<Text, IntWritable>> hadoopResult = result.map(new HadoopDatatypeMapper());
    
    // Set up Hadoop Output Format
    HadoopOutputFormat<Text, IntWritable> hadoopOutputFormat = new HadoopOutputFormat<Text, IntWritable>(new TextOutputFormat<Text, IntWritable>(), job);
    hadoopOutputFormat.getConfiguration().set("mapreduce.output.textoutputformat.separator", " ");
    hadoopOutputFormat.getConfiguration().set("mapred.textoutputformat.separator", " "); // set the value for both, since this test
    // is being executed with both types (hadoop1 and hadoop2 profile)
    TextOutputFormat.setOutputPath(job, new Path(outputPath));
    
    // Output & Execute
    hadoopResult.output(hadoopOutputFormat);
    env.execute("Word Count");
  }

View Full Code Here

      final int NUM_ITERATIONS = 13;
      
      final int ITERATION_DOP = 77;
      
      
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      
      DataSet<Long> bcMessaging = env.fromElements(1L);
      DataSet<Long> bcUpdate = env.fromElements(1L);
      
      DataSet<Tuple2<String, Double>> result;
      
      // ------------ construct the test program ------------------
      {
        
        @SuppressWarnings("unchecked")
        DataSet<Tuple2<String, Double>> initialVertices = env.fromElements(new Tuple2<String, Double>("abc", 3.44));
  
        @SuppressWarnings("unchecked")
        DataSet<Tuple2<String, String>> edges = env.fromElements(new Tuple2<String, String>("a", "c"));
        
        
        VertexCentricIteration<String, Double, Long, ?> vertexIteration = 
            VertexCentricIteration.withPlainEdges(edges, new UpdateFunction(), new MessageFunctionNoEdgeValue(), NUM_ITERATIONS);
        vertexIteration.addBroadcastSetForMessagingFunction(BC_SET_MESSAGES_NAME, bcMessaging);

View Full Code Here

    resultPath = getTempFilePath("results");
  }
  
  @Override
  protected void testProgram() throws Exception {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    
    DataSet<Long> vertexIds = env.generateSequence(1, NUM_VERTICES);
    DataSet<String> edgeString = env.fromElements(ConnectedComponentsData.getRandomOddEvenEdges(NUM_EDGES, NUM_VERTICES, SEED).split("\n"));
    
    DataSet<Tuple2<Long, Long>> edges = edgeString.map(new EdgeParser());
    
    DataSet<Tuple2<Long, Long>> initialVertices = vertexIds.map(new IdAssigner());
    DataSet<Tuple2<Long, Long>> result = initialVertices.runOperation(VertexCentricIteration.withPlainEdges(edges, new CCUpdater(), new CCMessager(), 100));
    
    result.writeAsCsv(resultPath, "\n", " ");
    env.execute("Spargel Connected Components");
  }

View Full Code Here

      final int NUM_ITERATIONS = 13;
      
      final int ITERATION_DOP = 77;
      
      
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      
      DataSet<Long> bcVar = env.fromElements(1L);
      
      DataSet<Tuple2<String, Double>> result;
      
      // ------------ construct the test program ------------------
      {
        
        @SuppressWarnings("unchecked")
        DataSet<Tuple2<String, Double>> initialVertices = env.fromElements(new Tuple2<String, Double>("abc", 3.44));
  
        @SuppressWarnings("unchecked")
        DataSet<Tuple2<String, String>> edges = env.fromElements(new Tuple2<String, String>("a", "c"));
        
        
        VertexCentricIteration<String, Double, Long, ?> vertexIteration = 
            VertexCentricIteration.withPlainEdges(edges, new UpdateFunction(), new MessageFunctionNoEdgeValue(), NUM_ITERATIONS);
        vertexIteration.addBroadcastSetForMessagingFunction(BC_SET_MESSAGES_NAME, bcVar);

View Full Code Here

    }
    
    final String inputPath = args[0];
    final String outputPath = args[1];
    
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    
    // Set up the Hadoop Input Format
    HadoopInputFormat<LongWritable, Text> hadoopInputFormat = new HadoopInputFormat<LongWritable, Text>(new TextInputFormat(), LongWritable.class, Text.class, new JobConf());
    TextInputFormat.addInputPath(hadoopInputFormat.getJobConf(), new Path(inputPath));
    
    // Create a Flink job with it
    DataSet<Tuple2<LongWritable, Text>> text = env.createInput(hadoopInputFormat);
    
    DataSet<Tuple2<Text, LongWritable>> words = 
        text.flatMap(new HadoopMapFunction<LongWritable, Text, Text, LongWritable>(new Tokenizer()))
          .groupBy(0).reduceGroup(new HadoopReduceCombineFunction<Text, LongWritable, Text, LongWritable>(new Counter(), new Counter()));
    
    // Set up Hadoop Output Format
    HadoopOutputFormat<Text, LongWritable> hadoopOutputFormat = 
        new HadoopOutputFormat<Text, LongWritable>(new TextOutputFormat<Text, LongWritable>(), new JobConf());
    hadoopOutputFormat.getJobConf().set("mapred.textoutputformat.separator", " ");
    TextOutputFormat.setOutputPath(hadoopOutputFormat.getJobConf(), new Path(outputPath));
    
    // Output & Execute
    words.output(hadoopOutputFormat).setParallelism(1);
    env.execute("Hadoop Compat WordCount");
  }

View Full Code Here

    final String centersPath = args[1];
    final String outputPath = args[2];
    final int numIterations = Integer.parseInt(args[3]);




    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setDegreeOfParallelism(4);


    // get input data
    DataSet<Point> points = env.readCsvFile(pointsPath)
        .fieldDelimiter('|')
        .includeFields(true, true)
        .types(Double.class, Double.class)
        .map(new TuplePointConverter());


    DataSet<Centroid> centroids = env.readCsvFile(centersPath)
        .fieldDelimiter('|')
        .includeFields(true, true, true)
        .types(Integer.class, Double.class, Double.class)
        .map(new TupleCentroidConverter());


    // set number of bulk iterations for KMeans algorithm
    IterativeDataSet<Centroid> loop = centroids.iterate(numIterations);


    DataSet<Centroid> newCentroids = points
      // compute closest centroid for each point
      .map(new SelectNearestCenter()).withBroadcastSet(loop, "centroids")
      // count and sum point coordinates for each centroid
      .map(new CountAppender())
      // !test if key expressions are working!
      .groupBy("field0").reduce(new CentroidAccumulator())
      // compute new centroids from point counts and coordinate sums
      .map(new CentroidAverager());


    // feed new centroids back into next iteration
    DataSet<Centroid> finalCentroids = loop.closeWith(newCentroids);


    DataSet<Tuple2<Integer, Point>> clusteredPoints = points
        // assign points to final clusters
        .map(new SelectNearestCenter()).withBroadcastSet(finalCentroids, "centroids");


    // emit result
    clusteredPoints.writeAsCsv(outputPath, "\n", " ");


    return env.createProgramPlan();
  }

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.flink.api.java.ExecutionEnvironment

org.apache.flink.api.avro.AvroOutputFormatTest

org.apache.flink.api.common.cache.DistributedCache.DistributedCacheEntry

org.apache.flink.api.common.operators.CollectionExecutionIterationTest

org.apache.flink.api.common.operators.CollectionExecutionWithBroadcastVariableTest

org.apache.flink.api.java.functions.SemanticPropertiesProjectionTest

org.apache.flink.api.java.functions.SemanticPropertiesTranslationTest

org.apache.flink.api.java.io.AvroInputFormatTypeExtractionTest

org.apache.flink.api.java.io.CsvReader

org.apache.flink.api.java.io.jdbc.example.JDBCExample

org.apache.flink.api.java.io.TextInputFormat

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.