Examples of org.apache.flink.api.java.ExecutionEnvironment

org.apache.flink.api.java.ExecutionEnvironment
The ExecutionEnviroment is the context in which a program is executed. A {@link LocalEnvironment} will cause execution in the current JVM, a{@link RemoteEnvironment} will cause execution on a remote setup.
The environment provides methods to control the job execution (such as setting the parallelism) and to interact with the outside world (data access).
Please note that the execution environment needs strong type information for the input and return types of all operations that are executed. This means that the environments needs to know that the return value of an operation is for example a Tuple of String and Integer. Because the Java compiler throws much of the generic type information away, most methods attempt to re- obtain that information using reflection. In certain cases, it may be necessary to manually supply that information to some of the methods. @see LocalEnvironment @see RemoteEnvironment

    }
  }
  
  private Plan getJavaTestPlan(boolean joinPreservesSolutionSet, boolean mapBeforeSolutionDelta) {
    
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setDegreeOfParallelism(DEFAULT_PARALLELISM);
    
    @SuppressWarnings("unchecked")
    DataSet<Tuple3<Long, Long, Long>> solutionSetInput = env.fromElements(new Tuple3<Long, Long, Long>(1L, 2L, 3L)).name("Solution Set");
    @SuppressWarnings("unchecked")
    DataSet<Tuple3<Long, Long, Long>> worksetInput = env.fromElements(new Tuple3<Long, Long, Long>(1L, 2L, 3L)).name("Workset");
    @SuppressWarnings("unchecked")
    DataSet<Tuple3<Long, Long, Long>> invariantInput = env.fromElements(new Tuple3<Long, Long, Long>(1L, 2L, 3L)).name("Invariant Input");
    
    DeltaIteration<Tuple3<Long, Long, Long>, Tuple3<Long, Long, Long>> iter = solutionSetInput.iterateDelta(worksetInput, 100, 1, 2);
    
    
    DataSet<Tuple3<Long, Long, Long>> joinedWithSolutionSet = 
    
    iter.getWorkset().join(invariantInput)
      .where(1, 2)
      .equalTo(1, 2)
      .with(new RichJoinFunction<Tuple3<Long,Long,Long>, Tuple3<Long, Long, Long>, Tuple3<Long,Long,Long>>() {
        public Tuple3<Long, Long, Long> join(Tuple3<Long, Long, Long> first, Tuple3<Long, Long, Long> second) {
          return first;
        }
      })
      .name(JOIN_WITH_INVARIANT_NAME)
    
    .join(iter.getSolutionSet())
      .where(1, 0)
      .equalTo(1, 2)
      .with(new RichJoinFunction<Tuple3<Long, Long, Long>, Tuple3<Long, Long, Long>, Tuple3<Long, Long, Long>>() {
        public Tuple3<Long, Long, Long> join(Tuple3<Long, Long, Long> first, Tuple3<Long, Long, Long> second) {
          return second;
        }
      })
      .name(JOIN_WITH_SOLUTION_SET)
      .withConstantSetSecond(joinPreservesSolutionSet ? new String[] {"0->0", "1->1", "2->2" } : null);
      
    DataSet<Tuple3<Long, Long, Long>> nextWorkset = joinedWithSolutionSet.groupBy(1, 2)
      .reduceGroup(new RichGroupReduceFunction<Tuple3<Long,Long,Long>, Tuple3<Long,Long,Long>>() {
        public void reduce(Iterable<Tuple3<Long, Long, Long>> values, Collector<Tuple3<Long, Long, Long>> out) {}
      })
      .name(NEXT_WORKSET_REDUCER_NAME)
      .withConstantSet("1->1","2->2","0->0");
    
    
    DataSet<Tuple3<Long, Long, Long>> nextSolutionSet = mapBeforeSolutionDelta ?
        joinedWithSolutionSet.map(new RichMapFunction<Tuple3<Long, Long, Long>,Tuple3<Long, Long, Long>>() { public Tuple3<Long, Long, Long> map(Tuple3<Long, Long, Long> value) { return value; } })
          .name(SOLUTION_DELTA_MAPPER_NAME).withConstantSet("0->0","1->1","2->2") :
        joinedWithSolutionSet;
    
    iter.closeWith(nextSolutionSet, nextWorkset)
      .print();
    
    return env.createProgramPlan();
  }

View Full Code Here

  }
  
  @Test
  public void testTwoIterationsWithMapperInbetween() throws Exception {
    try {
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      env.setDegreeOfParallelism(8);
      
      DataSet<Tuple2<Long, Long>> verticesWithInitialId = env.fromElements(new Tuple2<Long, Long>(1L, 2L));
      
      DataSet<Tuple2<Long, Long>> edges = env.fromElements(new Tuple2<Long, Long>(1L, 2L));
      
      DataSet<Tuple2<Long, Long>> bulkResult = doBulkIteration(verticesWithInitialId, edges);
      
      DataSet<Tuple2<Long, Long>> mappedBulk = bulkResult.map(new DummyMap());
      
      DataSet<Tuple2<Long, Long>> depResult = doDeltaIteration(mappedBulk, edges);
      
      depResult.print();
      
      Plan p = env.createProgramPlan();
      OptimizedPlan op = compileNoStats(p);
      
      assertEquals(1, op.getDataSinks().size());
      assertTrue(op.getDataSinks().iterator().next().getInput().getSource() instanceof WorksetIterationPlanNode);

View Full Code Here

  }
  
  @Test
  public void testTwoIterationsDirectlyChained() throws Exception {
    try {
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      env.setDegreeOfParallelism(8);
      
      DataSet<Tuple2<Long, Long>> verticesWithInitialId = env.fromElements(new Tuple2<Long, Long>(1L, 2L));
      
      DataSet<Tuple2<Long, Long>> edges = env.fromElements(new Tuple2<Long, Long>(1L, 2L));
      
      DataSet<Tuple2<Long, Long>> bulkResult = doBulkIteration(verticesWithInitialId, edges);
      
      DataSet<Tuple2<Long, Long>> depResult = doDeltaIteration(bulkResult, edges);
      
      depResult.print();
      
      Plan p = env.createProgramPlan();
      OptimizedPlan op = compileNoStats(p);
      
      assertEquals(1, op.getDataSinks().size());
      assertTrue(op.getDataSinks().iterator().next().getInput().getSource() instanceof WorksetIterationPlanNode);

View Full Code Here

  }
  
  @Test
  public void testTwoWorksetIterationsDirectlyChained() throws Exception {
    try {
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      env.setDegreeOfParallelism(8);
      
      DataSet<Tuple2<Long, Long>> verticesWithInitialId = env.fromElements(new Tuple2<Long, Long>(1L, 2L));
      
      DataSet<Tuple2<Long, Long>> edges = env.fromElements(new Tuple2<Long, Long>(1L, 2L));
      
      DataSet<Tuple2<Long, Long>> firstResult = doDeltaIteration(verticesWithInitialId, edges);
      
      DataSet<Tuple2<Long, Long>> secondResult = doDeltaIteration(firstResult, edges);
      
      secondResult.print();
      
      Plan p = env.createProgramPlan();
      OptimizedPlan op = compileNoStats(p);
      
      assertEquals(1, op.getDataSinks().size());
      assertTrue(op.getDataSinks().iterator().next().getInput().getSource() instanceof WorksetIterationPlanNode);

View Full Code Here

  }
  
  @Test
  public void testIterationPushingWorkOut() throws Exception {
    try {
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      env.setDegreeOfParallelism(8);
      
      DataSet<Tuple2<Long, Long>> input1 = env.readCsvFile("/some/file/path").types(Long.class).map(new DuplicateValue());
      
      DataSet<Tuple2<Long, Long>> input2 = env.readCsvFile("/some/file/path").types(Long.class, Long.class);
      
      doBulkIteration(input1, input2).print();
      
      Plan p = env.createProgramPlan();
      OptimizedPlan op = compileNoStats(p);
      
      assertEquals(1, op.getDataSinks().size());
      assertTrue(op.getDataSinks().iterator().next().getInput().getSource() instanceof BulkIterationPlanNode);

View Full Code Here

public class DeltaIterationDependenciesTest extends CompilerTestBase {


  @Test
  public void testExceptionWhenNewWorksetNotDependentOnWorkset() {
    try {
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();


      DataSet<Tuple2<Long, Long>> input = env.fromElements(new Tuple2<Long, Long>(0L, 0L));


      DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> deltaIteration = input.iterateDelta(input, 10,0);


      DataSet<Tuple2<Long, Long>> delta = deltaIteration.getSolutionSet().join(deltaIteration.getWorkset())
                            .where(0).equalTo(0)
                            .projectFirst(1).projectSecond(1).types(Long.class, Long.class);


      DataSet<Tuple2<Long, Long>> nextWorkset = deltaIteration.getSolutionSet().join(input)
                            .where(0).equalTo(0)
                            .projectFirst(1).projectSecond(1).types(Long.class, Long.class);
      


      DataSet<Tuple2<Long, Long>> result = deltaIteration.closeWith(delta, nextWorkset);


      result.print();
      
      Plan p = env.createProgramPlan();
      try {
        compileNoStats(p);
        fail("Should not be able to compile, since the next workset does not depend on the workset");
      }
      catch (CompilerException e) {

View Full Code Here

public class IterationCompilerTest extends CompilerTestBase {


  @Test
  public void testIdentityIteration() {
    try {
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      env.setDegreeOfParallelism(43);
      
      IterativeDataSet<Long> iteration = env.generateSequence(-4, 1000).iterate(100);
      iteration.closeWith(iteration).print();
      
      Plan p = env.createProgramPlan();
      OptimizedPlan op = compileNoStats(p);
      
      new NepheleJobGraphGenerator().compileJobGraph(op);
    }
    catch (Exception e) {

View Full Code Here

  }
  
  @Test
  public void testEmptyWorksetIteration() {
    try {
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      env.setDegreeOfParallelism(43);
      
      DataSet<Tuple2<Long, Long>> input = env.generateSequence(1, 20)
          .map(new MapFunction<Long, Tuple2<Long, Long>>() {
            @Override
            public Tuple2<Long, Long> map(Long value){ return null; }
          });
          
          
      DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iter = input.iterateDelta(input, 100, 0);
      iter.closeWith(iter.getWorkset(), iter.getWorkset())
        .print();
      
      Plan p = env.createProgramPlan();
      OptimizedPlan op = compileNoStats(p);
      
      new NepheleJobGraphGenerator().compileJobGraph(op);
    }
    catch (Exception e) {

View Full Code Here

  }
  
  @Test
  public void testIterationWithUnionRoot() {
    try {
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      env.setDegreeOfParallelism(43);
      
      IterativeDataSet<Long> iteration = env.generateSequence(-4, 1000).iterate(100);
      
      iteration.closeWith(
          iteration.map(new IdentityMapper<Long>()).union(iteration.map(new IdentityMapper<Long>())))
          .print();
      
      Plan p = env.createProgramPlan();
      OptimizedPlan op = compileNoStats(p);
      
      new NepheleJobGraphGenerator().compileJobGraph(op);
    }
    catch (Exception e) {

View Full Code Here

  }
  
  @Test
  public void testWorksetIterationWithUnionRoot() {
    try {
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      env.setDegreeOfParallelism(43);
      
      DataSet<Tuple2<Long, Long>> input = env.generateSequence(1, 20)
          .map(new MapFunction<Long, Tuple2<Long, Long>>() {
            @Override
            public Tuple2<Long, Long> map(Long value){ return null; }
          });
          
          
      DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iter = input.iterateDelta(input, 100, 0);
      iter.closeWith(
          iter.getWorkset().map(new IdentityMapper<Tuple2<Long,Long>>())
        .union(
          iter.getWorkset().map(new IdentityMapper<Tuple2<Long,Long>>()))
        , iter.getWorkset().map(new IdentityMapper<Tuple2<Long,Long>>())
        .union(
            iter.getWorkset().map(new IdentityMapper<Tuple2<Long,Long>>()))
        )
      .print();
      
      Plan p = env.createProgramPlan();
      OptimizedPlan op = compileNoStats(p);
      
      new NepheleJobGraphGenerator().compileJobGraph(op);
    }
    catch (Exception e) {

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.flink.api.java.ExecutionEnvironment

org.apache.flink.api.avro.AvroOutputFormatTest

org.apache.flink.api.common.cache.DistributedCache.DistributedCacheEntry

org.apache.flink.api.common.operators.CollectionExecutionIterationTest

org.apache.flink.api.common.operators.CollectionExecutionWithBroadcastVariableTest

org.apache.flink.api.java.functions.SemanticPropertiesProjectionTest

org.apache.flink.api.java.functions.SemanticPropertiesTranslationTest

org.apache.flink.api.java.io.AvroInputFormatTypeExtractionTest

org.apache.flink.api.java.io.CsvReader

org.apache.flink.api.java.io.jdbc.example.JDBCExample

org.apache.flink.api.java.io.TextInputFormat

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.