Package org.apache.flink.api.java

Examples of org.apache.flink.api.java.ExecutionEnvironment


      switch(progId) {
      case 1: {
        /*
         * Test standard counting with combiner
         */
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        DataSet<Tuple2<IntWritable, IntWritable>> ds = HadoopTestData.getKVPairDataSet(env).
            map(new MapFunction<Tuple2<IntWritable, Text>, Tuple2<IntWritable, IntWritable>>() {
              private static final long serialVersionUID = 1L;
              Tuple2<IntWritable,IntWritable> outT = new Tuple2<IntWritable,IntWritable>();
              @Override
              public Tuple2<IntWritable, IntWritable> map(Tuple2<IntWritable, Text> v)
                  throws Exception {
                outT.f0 = new IntWritable(v.f0.get() / 6);
                outT.f1 = new IntWritable(1);
                return outT;
              }
            });
           
        DataSet<Tuple2<IntWritable, IntWritable>> counts = ds.
            groupBy(0).
            reduceGroup(new HadoopReduceCombineFunction<IntWritable, IntWritable, IntWritable, IntWritable>(
                new SumReducer(), new SumReducer()));
       
        counts.writeAsText(resultPath);
        env.execute();
       
        // return expected result
        return   "(0,5)\n"+
            "(1,6)\n" +
            "(2,6)\n" +
            "(3,4)\n";
      }
      case 2: {
        /*
         * Test ungrouped Hadoop reducer
         */
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        DataSet<Tuple2<IntWritable, IntWritable>> ds = HadoopTestData.getKVPairDataSet(env).
            map(new MapFunction<Tuple2<IntWritable, Text>, Tuple2<IntWritable, IntWritable>>() {
              private static final long serialVersionUID = 1L;
              Tuple2<IntWritable,IntWritable> outT = new Tuple2<IntWritable,IntWritable>();
              @Override
              public Tuple2<IntWritable, IntWritable> map(Tuple2<IntWritable, Text> v)
                  throws Exception {
                outT.f0 = new IntWritable(0);
                outT.f1 = v.f0;
                return outT;
              }
            });
           
        DataSet<Tuple2<IntWritable, IntWritable>> sum = ds.
            reduceGroup(new HadoopReduceCombineFunction<IntWritable, IntWritable, IntWritable, IntWritable>(
                new SumReducer(), new SumReducer()));
       
        sum.writeAsText(resultPath);
        env.execute();
       
        // return expected result
        return   "(0,231)\n";
      }
      case 3: {
        /* Test combiner */
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        DataSet<Tuple2<IntWritable, IntWritable>> ds = HadoopTestData.getKVPairDataSet(env).
            map(new MapFunction<Tuple2<IntWritable, Text>, Tuple2<IntWritable, IntWritable>>() {
              private static final long serialVersionUID = 1L;
              Tuple2<IntWritable,IntWritable> outT = new Tuple2<IntWritable,IntWritable>();
              @Override
              public Tuple2<IntWritable, IntWritable> map(Tuple2<IntWritable, Text> v)
                  throws Exception {
                outT.f0 = v.f0;
                outT.f1 = new IntWritable(1);
                return outT;
              }
            });
           
        DataSet<Tuple2<IntWritable, IntWritable>> counts = ds.
            groupBy(0).
            reduceGroup(new HadoopReduceCombineFunction<IntWritable, IntWritable, IntWritable, IntWritable>(
                new SumReducer(), new KeyChangingReducer()));
       
        counts.writeAsText(resultPath);
        env.execute();
       
        // return expected result
        return   "(0,5)\n"+
            "(1,6)\n" +
            "(2,5)\n" +
            "(3,5)\n";
      }
      case 4: {
        /*
         * Test configuration via JobConf
         */
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        JobConf conf = new JobConf();
        conf.set("my.cntPrefix", "Hello");
       
        DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env).
            map(new MapFunction<Tuple2<IntWritable, Text>, Tuple2<IntWritable, Text>>() {
              private static final long serialVersionUID = 1L;
              @Override
              public Tuple2<IntWritable, Text> map(Tuple2<IntWritable, Text> v)
                  throws Exception {
                v.f0 = new IntWritable(v.f0.get() % 5);
                return v;
              }
            });
           
        DataSet<Tuple2<IntWritable, IntWritable>> hellos = ds.
            groupBy(0).
            reduceGroup(new HadoopReduceFunction<IntWritable, Text, IntWritable, IntWritable>(
                new ConfigurableCntReducer(), conf));
       
        hellos.writeAsText(resultPath);
        env.execute();
       
        // return expected result
        return   "(0,0)\n"+
            "(1,0)\n" +
            "(2,1)\n" +
View Full Code Here


public class SpargelCompilerTest extends CompilerTestBase {

  @Test
  public void testSpargelCompiler() {
    try {
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      env.setDegreeOfParallelism(DEFAULT_PARALLELISM);
      // compose test program
      {
        DataSet<Long> vertexIds = env.generateSequence(1, 2);
       
        @SuppressWarnings("unchecked")
        DataSet<Tuple2<Long, Long>> edges = env.fromElements(new Tuple2<Long, Long>(1L, 2L));
       
        DataSet<Tuple2<Long, Long>> initialVertices = vertexIds.map(new IdAssigner());
        DataSet<Tuple2<Long, Long>> result = initialVertices.runOperation(VertexCentricIteration.withPlainEdges(edges, new CCUpdater(), new CCMessager(), 100));
       
        result.print();
      }
     
      Plan p = env.createProgramPlan("Spargel Connected Components");
      OptimizedPlan op = compileNoStats(p);
     
      // check the sink
      SinkPlanNode sink = op.getDataSinks().iterator().next();
      assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
View Full Code Here

  public void testSpargelCompilerWithBroadcastVariable() {
    try {
      final String BC_VAR_NAME = "borat variable";
     
     
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      env.setDegreeOfParallelism(DEFAULT_PARALLELISM);
      // compose test program
      {
        DataSet<Long> bcVar = env.fromElements(1L);
       
        DataSet<Long> vertexIds = env.generateSequence(1, 2);
       
        @SuppressWarnings("unchecked")
        DataSet<Tuple2<Long, Long>> edges = env.fromElements(new Tuple2<Long, Long>(1L, 2L));
       
        DataSet<Tuple2<Long, Long>> initialVertices = vertexIds.map(new IdAssigner());
       
        VertexCentricIteration<Long, Long, Long, ?> vcIter = VertexCentricIteration.withPlainEdges(edges, new CCUpdater(), new CCMessager(), 100);
        vcIter.addBroadcastSetForMessagingFunction(BC_VAR_NAME, bcVar);
        vcIter.addBroadcastSetForUpdateFunction(BC_VAR_NAME, bcVar);
       
        DataSet<Tuple2<Long, Long>> result = initialVertices.runOperation(vcIter);
       
        result.print();
      }
     
      Plan p = env.createProgramPlan("Spargel Connected Components");
      OptimizedPlan op = compileNoStats(p);
     
      // check the sink
      SinkPlanNode sink = op.getDataSinks().iterator().next();
      assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
View Full Code Here

  @Test
  public void testDeltaIterationNotDependingOnSolutionSet() {
    try {
      final List<Tuple2<Long, Long>> result = new ArrayList<Tuple2<Long,Long>>();
     
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      env.setDegreeOfParallelism(1);
     
      DataSet<Tuple2<Long, Long>> input = env.generateSequence(0, 9).map(new Duplicator<Long>());
     
      DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = input.iterateDelta(input, 5, 1);
     
      iteration.closeWith(iteration.getWorkset(), iteration.getWorkset().map(new TestMapper()))
        .output(new LocalCollectionOutputFormat<Tuple2<Long,Long>>(result));
     
      env.execute();
     
      boolean[] present = new boolean[50];
      for (Tuple2<Long, Long> t : result) {
        present[t.f0.intValue()] = true;
      }
View Full Code Here

    }
   
    final String inputPath = args[0];
    final String outputPath = args[1];
   
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setDegreeOfParallelism(1);
   
    // Set up the Hadoop Input Format
    Job job = Job.getInstance();
    HadoopInputFormat<LongWritable, Text> hadoopInputFormat = new HadoopInputFormat<LongWritable, Text>(new TextInputFormat(), LongWritable.class, Text.class, job);
    TextInputFormat.addInputPath(job, new Path(inputPath));
   
    // Create a Flink job with it
    DataSet<Tuple2<LongWritable, Text>> text = env.createInput(hadoopInputFormat);
   
    // Tokenize the line and convert from Writable "Text" to String for better handling
    DataSet<Tuple2<String, Integer>> words = text.flatMap(new Tokenizer());
   
    // Sum up the words
    DataSet<Tuple2<String, Integer>> result = words.groupBy(0).aggregate(Aggregations.SUM, 1);
   
    // Convert String back to Writable "Text" for use with Hadoop Output Format
    DataSet<Tuple2<Text, IntWritable>> hadoopResult = result.map(new HadoopDatatypeMapper());
   
    // Set up Hadoop Output Format
    HadoopOutputFormat<Text, IntWritable> hadoopOutputFormat = new HadoopOutputFormat<Text, IntWritable>(new TextOutputFormat<Text, IntWritable>(), job);
    hadoopOutputFormat.getConfiguration().set("mapreduce.output.textoutputformat.separator", " ");
    hadoopOutputFormat.getConfiguration().set("mapred.textoutputformat.separator", " "); // set the value for both, since this test
    // is being executed with both types (hadoop1 and hadoop2 profile)
    TextOutputFormat.setOutputPath(job, new Path(outputPath));
   
    // Output & Execute
    hadoopResult.output(hadoopOutputFormat);
    env.execute("Word Count");
  }
View Full Code Here

      final int NUM_ITERATIONS = 13;
     
      final int ITERATION_DOP = 77;
     
     
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
     
      DataSet<Long> bcMessaging = env.fromElements(1L);
      DataSet<Long> bcUpdate = env.fromElements(1L);
     
      DataSet<Tuple2<String, Double>> result;
     
      // ------------ construct the test program ------------------
      {
       
        @SuppressWarnings("unchecked")
        DataSet<Tuple2<String, Double>> initialVertices = env.fromElements(new Tuple2<String, Double>("abc", 3.44));
 
        @SuppressWarnings("unchecked")
        DataSet<Tuple2<String, String>> edges = env.fromElements(new Tuple2<String, String>("a", "c"));
       
       
        VertexCentricIteration<String, Double, Long, ?> vertexIteration =
            VertexCentricIteration.withPlainEdges(edges, new UpdateFunction(), new MessageFunctionNoEdgeValue(), NUM_ITERATIONS);
        vertexIteration.addBroadcastSetForMessagingFunction(BC_SET_MESSAGES_NAME, bcMessaging);
View Full Code Here

    resultPath = getTempFilePath("results");
  }
 
  @Override
  protected void testProgram() throws Exception {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
   
    DataSet<Long> vertexIds = env.generateSequence(1, NUM_VERTICES);
    DataSet<String> edgeString = env.fromElements(ConnectedComponentsData.getRandomOddEvenEdges(NUM_EDGES, NUM_VERTICES, SEED).split("\n"));
   
    DataSet<Tuple2<Long, Long>> edges = edgeString.map(new EdgeParser());
   
    DataSet<Tuple2<Long, Long>> initialVertices = vertexIds.map(new IdAssigner());
    DataSet<Tuple2<Long, Long>> result = initialVertices.runOperation(VertexCentricIteration.withPlainEdges(edges, new CCUpdater(), new CCMessager(), 100));
   
    result.writeAsCsv(resultPath, "\n", " ");
    env.execute("Spargel Connected Components");
  }
View Full Code Here

      final int NUM_ITERATIONS = 13;
     
      final int ITERATION_DOP = 77;
     
     
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
     
      DataSet<Long> bcVar = env.fromElements(1L);
     
      DataSet<Tuple2<String, Double>> result;
     
      // ------------ construct the test program ------------------
      {
       
        @SuppressWarnings("unchecked")
        DataSet<Tuple2<String, Double>> initialVertices = env.fromElements(new Tuple2<String, Double>("abc", 3.44));
 
        @SuppressWarnings("unchecked")
        DataSet<Tuple2<String, String>> edges = env.fromElements(new Tuple2<String, String>("a", "c"));
       
       
        VertexCentricIteration<String, Double, Long, ?> vertexIteration =
            VertexCentricIteration.withPlainEdges(edges, new UpdateFunction(), new MessageFunctionNoEdgeValue(), NUM_ITERATIONS);
        vertexIteration.addBroadcastSetForMessagingFunction(BC_SET_MESSAGES_NAME, bcVar);
View Full Code Here

    }
   
    final String inputPath = args[0];
    final String outputPath = args[1];
   
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
   
    // Set up the Hadoop Input Format
    HadoopInputFormat<LongWritable, Text> hadoopInputFormat = new HadoopInputFormat<LongWritable, Text>(new TextInputFormat(), LongWritable.class, Text.class, new JobConf());
    TextInputFormat.addInputPath(hadoopInputFormat.getJobConf(), new Path(inputPath));
   
    // Create a Flink job with it
    DataSet<Tuple2<LongWritable, Text>> text = env.createInput(hadoopInputFormat);
   
    DataSet<Tuple2<Text, LongWritable>> words =
        text.flatMap(new HadoopMapFunction<LongWritable, Text, Text, LongWritable>(new Tokenizer()))
          .groupBy(0).reduceGroup(new HadoopReduceCombineFunction<Text, LongWritable, Text, LongWritable>(new Counter(), new Counter()));
   
    // Set up Hadoop Output Format
    HadoopOutputFormat<Text, LongWritable> hadoopOutputFormat =
        new HadoopOutputFormat<Text, LongWritable>(new TextOutputFormat<Text, LongWritable>(), new JobConf());
    hadoopOutputFormat.getJobConf().set("mapred.textoutputformat.separator", " ");
    TextOutputFormat.setOutputPath(hadoopOutputFormat.getJobConf(), new Path(outputPath));
   
    // Output & Execute
    words.output(hadoopOutputFormat).setParallelism(1);
    env.execute("Hadoop Compat WordCount");
  }
View Full Code Here

    final String centersPath = args[1];
    final String outputPath = args[2];
    final int numIterations = Integer.parseInt(args[3]);


    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setDegreeOfParallelism(4);

    // get input data
    DataSet<Point> points = env.readCsvFile(pointsPath)
        .fieldDelimiter('|')
        .includeFields(true, true)
        .types(Double.class, Double.class)
        .map(new TuplePointConverter());

    DataSet<Centroid> centroids = env.readCsvFile(centersPath)
        .fieldDelimiter('|')
        .includeFields(true, true, true)
        .types(Integer.class, Double.class, Double.class)
        .map(new TupleCentroidConverter());

    // set number of bulk iterations for KMeans algorithm
    IterativeDataSet<Centroid> loop = centroids.iterate(numIterations);

    DataSet<Centroid> newCentroids = points
      // compute closest centroid for each point
      .map(new SelectNearestCenter()).withBroadcastSet(loop, "centroids")
      // count and sum point coordinates for each centroid
      .map(new CountAppender())
      // !test if key expressions are working!
      .groupBy("field0").reduce(new CentroidAccumulator())
      // compute new centroids from point counts and coordinate sums
      .map(new CentroidAverager());

    // feed new centroids back into next iteration
    DataSet<Centroid> finalCentroids = loop.closeWith(newCentroids);

    DataSet<Tuple2<Integer, Point>> clusteredPoints = points
        // assign points to final clusters
        .map(new SelectNearestCenter()).withBroadcastSet(finalCentroids, "centroids");

    // emit result
    clusteredPoints.writeAsCsv(outputPath, "\n", " ");

    return env.createProgramPlan();
  }
View Full Code Here

TOP

Related Classes of org.apache.flink.api.java.ExecutionEnvironment

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.