Examples of org.apache.flink.api.java.ExecutionEnvironment

org.apache.flink.api.java.ExecutionEnvironment
The ExecutionEnviroment is the context in which a program is executed. A {@link LocalEnvironment} will cause execution in the current JVM, a{@link RemoteEnvironment} will cause execution on a remote setup.
The environment provides methods to control the job execution (such as setting the parallelism) and to interact with the outside world (data access).
Please note that the execution environment needs strong type information for the input and return types of all operations that are executed. This means that the environments needs to know that the return value of an operation is for example a Tuple of String and Integer. Because the Java compiler throws much of the generic type information away, most methods attempt to re- obtain that information using reflection. In certain cases, it may be necessary to manually supply that information to some of the methods. @see LocalEnvironment @see RemoteEnvironment

public class PipelineBreakerTest extends CompilerTestBase {


  @Test
  public void testPipelineBreakerWithBroadcastVariable() {
    try {
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      env.setDegreeOfParallelism(64);
      
      DataSet<Long> source = env.generateSequence(1, 10).map(new IdentityMapper<Long>());
      
      DataSet<Long> result = source.map(new IdentityMapper<Long>())
                    .map(new IdentityMapper<Long>())
                      .withBroadcastSet(source, "bc");
      
      result.print();
      
      Plan p = env.createProgramPlan();
      OptimizedPlan op = compileNoStats(p);
      
      SinkPlanNode sink = op.getDataSinks().iterator().next();
      SingleInputPlanNode mapper = (SingleInputPlanNode) sink.getInput().getSource();

View Full Code Here

  }
  
  @Test
  public void testPipelineBreakerBroadcastedAllReduce() {
    try {
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      env.setDegreeOfParallelism(64);
      
      DataSet<Long> sourceWithMapper = env.generateSequence(1, 10).map(new IdentityMapper<Long>());
      
      DataSet<Long> bcInput1 = sourceWithMapper
                    .map(new IdentityMapper<Long>())
                    .reduce(new SelectOneReducer<Long>());
      DataSet<Long> bcInput2 = env.generateSequence(1, 10);
      
      DataSet<Long> result = sourceWithMapper
          .map(new IdentityMapper<Long>())
              .withBroadcastSet(bcInput1, "bc1")
              .withBroadcastSet(bcInput2, "bc2");
      
      result.print();
      
      Plan p = env.createProgramPlan();
      OptimizedPlan op = compileNoStats(p);
      
      SinkPlanNode sink = op.getDataSinks().iterator().next();
      SingleInputPlanNode mapper = (SingleInputPlanNode) sink.getInput().getSource();

View Full Code Here

  }
  
  @Test
  public void testPipelineBreakerBroadcastedPartialSolution() {
    try {
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      env.setDegreeOfParallelism(64);
      
      
      DataSet<Long> initialSource = env.generateSequence(1, 10);
      IterativeDataSet<Long> iteration = initialSource.iterate(100);
      
      
      DataSet<Long> sourceWithMapper = env.generateSequence(1, 10).map(new IdentityMapper<Long>());
      
      DataSet<Long> bcInput1 = sourceWithMapper
                    .map(new IdentityMapper<Long>())
                    .reduce(new SelectOneReducer<Long>());
      
      DataSet<Long> result = sourceWithMapper
          .map(new IdentityMapper<Long>())
              .withBroadcastSet(iteration, "bc2")
              .withBroadcastSet(bcInput1, "bc1");
              
      
      iteration.closeWith(result).print();
      
      Plan p = env.createProgramPlan();
      OptimizedPlan op = compileNoStats(p);
      
      SinkPlanNode sink = op.getDataSinks().iterator().next();
      BulkIterationPlanNode iterationPlanNode = (BulkIterationPlanNode) sink.getInput().getSource();
      SingleInputPlanNode mapper = (SingleInputPlanNode) iterationPlanNode.getRootOfStepFunction();

View Full Code Here

  
  @Test
  public void testPilelineBreakerWithCross() {
    try {
      {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setDegreeOfParallelism(64);
        
        DataSet<Long> initialSource = env.generateSequence(1, 10);
        
        Configuration conf= new Configuration();
        conf.setString(PactCompiler.HINT_LOCAL_STRATEGY, PactCompiler.HINT_LOCAL_STRATEGY_NESTEDLOOP_BLOCKED_OUTER_FIRST);
        initialSource
          .map(new IdentityMapper<Long>())
          .cross(initialSource).withParameters(conf)
          .print();
        
        
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        SinkPlanNode sink = op.getDataSinks().iterator().next();
        DualInputPlanNode mapper = (DualInputPlanNode) sink.getInput().getSource();
        
        assertTrue(mapper.getInput1().getTempMode().breaksPipeline());
      }
      
      {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setDegreeOfParallelism(64);
        
        DataSet<Long> initialSource = env.generateSequence(1, 10);
        
        Configuration conf= new Configuration();
        conf.setString(PactCompiler.HINT_LOCAL_STRATEGY, PactCompiler.HINT_LOCAL_STRATEGY_NESTEDLOOP_BLOCKED_OUTER_SECOND);
        initialSource
          .map(new IdentityMapper<Long>())
          .cross(initialSource).withParameters(conf)
          .print();
        
        
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        
        SinkPlanNode sink = op.getDataSinks().iterator().next();
        DualInputPlanNode mapper = (DualInputPlanNode) sink.getInput().getSource();
        
        assertTrue(mapper.getInput2().getTempMode().breaksPipeline());
      }
      
      {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setDegreeOfParallelism(64);
        
        DataSet<Long> initialSource = env.generateSequence(1, 10);
        
        Configuration conf= new Configuration();
        conf.setString(PactCompiler.HINT_LOCAL_STRATEGY, PactCompiler.HINT_LOCAL_STRATEGY_NESTEDLOOP_STREAMED_OUTER_FIRST);
        initialSource
          .map(new IdentityMapper<Long>())
          .cross(initialSource).withParameters(conf)
          .print();
        
        
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        
        SinkPlanNode sink = op.getDataSinks().iterator().next();
        DualInputPlanNode mapper = (DualInputPlanNode) sink.getInput().getSource();
        
        assertTrue(mapper.getInput1().getTempMode().breaksPipeline());
      }
      
      {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setDegreeOfParallelism(64);
        
        DataSet<Long> initialSource = env.generateSequence(1, 10);
        
        Configuration conf= new Configuration();
        conf.setString(PactCompiler.HINT_LOCAL_STRATEGY, PactCompiler.HINT_LOCAL_STRATEGY_NESTEDLOOP_STREAMED_OUTER_SECOND);
        initialSource
          .map(new IdentityMapper<Long>())
          .cross(initialSource).withParameters(conf)
          .print();
        
        
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        
        SinkPlanNode sink = op.getDataSinks().iterator().next();
        DualInputPlanNode mapper = (DualInputPlanNode) sink.getInput().getSource();

View Full Code Here

    }
  }
  
  @Test
  public void testBranchingBroadcastVariable() {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    
    DataSet<String> input1 = env.readTextFile(IN_FILE).name("source1");
    DataSet<String> input2 = env.readTextFile(IN_FILE).name("source2");
    DataSet<String> input3 = env.readTextFile(IN_FILE).name("source3");
    
    DataSet<String> result1 = input1
        .map(new IdentityMapper<String>())
        .reduceGroup(new Top1GroupReducer<String>())
          .withBroadcastSet(input3, "bc");
    
    DataSet<String> result2 = input2
        .map(new IdentityMapper<String>())
        .reduceGroup(new Top1GroupReducer<String>())
          .withBroadcastSet(input3, "bc");
    
    result1.join(result2)
        .where(new IdentityKeyExtractor<String>())
        .equalTo(new IdentityKeyExtractor<String>())
        .with(new RichJoinFunction<String, String, String>() {
          @Override
          public String join(String first, String second) {
            return null;
          }
        })
        .withBroadcastSet(input3, "bc1")
        .withBroadcastSet(input1, "bc2")
        .withBroadcastSet(result1, "bc3")
      .print();
    
    Plan plan = env.createProgramPlan();
    
    try{
      compileNoStats(plan);
    }catch(Exception e){
      e.printStackTrace();

View Full Code Here

    
    if(!parseParameters(args)) {
      return;
    }
    
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();


    // get customer data set: (custkey, name, address, nationkey, acctbal) 
    DataSet<Tuple5<Integer, String, String, Integer, Double>> customers = getCustomerDataSet(env);


    // get orders data set: (orderkey, custkey, orderdate)
    DataSet<Tuple3<Integer, Integer, String>> orders = getOrdersDataSet(env);


    // get lineitem data set: (orderkey, extendedprice, discount, returnflag)
    DataSet<Tuple4<Integer, Double, Double, String>> lineitems = getLineitemDataSet(env);


    // get nation data set: (nationkey, name)
    DataSet<Tuple2<Integer, String>> nations = getNationsDataSet(env);


    // orders filtered by year: (orderkey, custkey)
    DataSet<Tuple2<Integer, Integer>> ordersFilteredByYear =
        // filter by year
        orders.filter(order -> Integer.parseInt(order.f2.substring(0, 4)) > 1990)
        // project fields out that are no longer required
        .project(0,1).types(Integer.class, Integer.class);


    // lineitems filtered by flag: (orderkey, extendedprice, discount)
    DataSet<Tuple3<Integer, Double, Double>> lineitemsFilteredByFlag = 
        // filter by flag
        lineitems.filter(lineitem -> lineitem.f3.equals("R"))
        // project fields out that are no longer required
        .project(0,1,2).types(Integer.class, Double.class, Double.class);


    // join orders with lineitems: (custkey, extendedprice, discount)
    DataSet<Tuple3<Integer, Double, Double>> lineitemsOfCustomerKey = 
        ordersFilteredByYear.joinWithHuge(lineitemsFilteredByFlag)
                  .where(0).equalTo(0)
                  .projectFirst(1).projectSecond(1,2)
                  .types(Integer.class, Double.class, Double.class);


    // aggregate for revenue: (custkey, revenue)
    DataSet<Tuple2<Integer, Double>> revenueOfCustomerKey = lineitemsOfCustomerKey
        // calculate the revenue for each item
        // revenue per item = l_extendedprice * (1 - l_discount)
        .map(i -> new Tuple2<>(i.f0, i.f1 * (1 - i.f2)))
        // aggregate the revenues per item to revenue per customer
        .groupBy(0).sum(1);


    // join customer with nation (custkey, name, address, nationname, acctbal)
    DataSet<Tuple5<Integer, String, String, String, Double>> customerWithNation = customers
            .joinWithTiny(nations)
            .where(3).equalTo(0)
            .projectFirst(0,1,2).projectSecond(1).projectFirst(4)
            .types(Integer.class, String.class, String.class, String.class, Double.class);


    // join customer (with nation) with revenue (custkey, name, address, nationname, acctbal, revenue)
    DataSet<Tuple6<Integer, String, String, String, Double, Double>> customerWithRevenue = 
        customerWithNation.join(revenueOfCustomerKey)
        .where(0).equalTo(0)
        .projectFirst(0,1,2,3,4).projectSecond(1)
        .types(Integer.class, String.class, String.class, String.class, Double.class, Double.class);


    // emit result
    customerWithRevenue.writeAsCsv(outputPath);
    
    // execute program
    env.execute("TPCH Query 10 Example");
    
  }

View Full Code Here

    if(!parseParameters(args)) {
      return;
    }
    
    // set up the execution environment
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    
    // get input data
    DataSet<String> text = getTextDataSet(env);
    
    DataSet<Tuple2<String, Integer>> counts = 
        // normalize and split each line
        text.map(line -> line.toLowerCase().split("\\W+"))
        // convert splitted line in pairs (2-tuples) containing: (word,1)
        .flatMap((String[] tokens, Collector<Tuple2<String, Integer>> out) -> {
          // emit the pairs with non-zero-length words
          Arrays.stream(tokens)
          .filter(t -> t.length() > 0)
          .forEach(t -> out.collect(new Tuple2<>(t, 1)));
        })
        // group by the tuple field "0" and sum up tuple field "1"
        .groupBy(0)
        .sum(1);


    // emit result
    if(fileOutput) {
      counts.writeAsCsv(outputPath, "\n", " ");
    } else {
      counts.print();
    }
    
    // execute program
    env.execute("WordCount Example");
  }

View Full Code Here

public class PartitionPushdownTest extends CompilerTestBase {


  @Test
  public void testPartitioningNotPushedDown() {
    try {
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      
      @SuppressWarnings("unchecked")
      DataSet<Tuple3<Long, Long, Long>> input = env.fromElements(new Tuple3<Long, Long, Long>(0L, 0L, 0L));
      
      input
        .groupBy(0, 1).sum(2)
        .groupBy(0).sum(1)
        .print();
      
      Plan p = env.createProgramPlan();
      OptimizedPlan op = compileNoStats(p);
      
      SinkPlanNode sink = op.getDataSinks().iterator().next();
      
      SingleInputPlanNode agg2Reducer = (SingleInputPlanNode) sink.getInput().getSource();

View Full Code Here

  }
  
  @Test
  public void testPartitioningReused() {
    try {
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      
      @SuppressWarnings("unchecked")
      DataSet<Tuple3<Long, Long, Long>> input = env.fromElements(new Tuple3<Long, Long, Long>(0L, 0L, 0L));
      
      input
        .groupBy(0).sum(1)
        .groupBy(0, 1).sum(2)
        .print();
      
      Plan p = env.createProgramPlan();
      OptimizedPlan op = compileNoStats(p);
      
      SinkPlanNode sink = op.getDataSinks().iterator().next();
      
      SingleInputPlanNode agg2Reducer = (SingleInputPlanNode) sink.getInput().getSource();

View Full Code Here

  
  
  @Test
  public void testRejectPlanIfSolutionSetKeysAndJoinKeysDontMatch() {
    try {
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      env.setDegreeOfParallelism(DEFAULT_PARALLELISM);
      
      @SuppressWarnings("unchecked")
      DataSet<Tuple3<Long, Long, Long>> solutionSetInput = env.fromElements(new Tuple3<Long, Long, Long>(1L, 2L, 3L)).name("Solution Set");
      @SuppressWarnings("unchecked")
      DataSet<Tuple3<Long, Long, Long>> worksetInput = env.fromElements(new Tuple3<Long, Long, Long>(1L, 2L, 3L)).name("Workset");
      @SuppressWarnings("unchecked")
      DataSet<Tuple3<Long, Long, Long>> invariantInput = env.fromElements(new Tuple3<Long, Long, Long>(1L, 2L, 3L)).name("Invariant Input");
      
      DeltaIteration<Tuple3<Long, Long, Long>, Tuple3<Long, Long, Long>> iter = solutionSetInput.iterateDelta(worksetInput, 100, 1, 2);
      
      
      DataSet<Tuple3<Long, Long, Long>> result =

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.flink.api.java.ExecutionEnvironment

org.apache.flink.api.avro.AvroOutputFormatTest

org.apache.flink.api.common.cache.DistributedCache.DistributedCacheEntry

org.apache.flink.api.common.operators.CollectionExecutionIterationTest

org.apache.flink.api.common.operators.CollectionExecutionWithBroadcastVariableTest

org.apache.flink.api.java.functions.SemanticPropertiesProjectionTest

org.apache.flink.api.java.functions.SemanticPropertiesTranslationTest

org.apache.flink.api.java.io.AvroInputFormatTypeExtractionTest

org.apache.flink.api.java.io.CsvReader

org.apache.flink.api.java.io.jdbc.example.JDBCExample

org.apache.flink.api.java.io.TextInputFormat

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.