Package org.apache.flink.api.java

Examples of org.apache.flink.api.java.ExecutionEnvironment


public class PipelineBreakerTest extends CompilerTestBase {

  @Test
  public void testPipelineBreakerWithBroadcastVariable() {
    try {
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      env.setDegreeOfParallelism(64);
     
      DataSet<Long> source = env.generateSequence(1, 10).map(new IdentityMapper<Long>());
     
      DataSet<Long> result = source.map(new IdentityMapper<Long>())
                    .map(new IdentityMapper<Long>())
                      .withBroadcastSet(source, "bc");
     
      result.print();
     
      Plan p = env.createProgramPlan();
      OptimizedPlan op = compileNoStats(p);
     
      SinkPlanNode sink = op.getDataSinks().iterator().next();
      SingleInputPlanNode mapper = (SingleInputPlanNode) sink.getInput().getSource();
     
View Full Code Here


  }
 
  @Test
  public void testPipelineBreakerBroadcastedAllReduce() {
    try {
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      env.setDegreeOfParallelism(64);
     
      DataSet<Long> sourceWithMapper = env.generateSequence(1, 10).map(new IdentityMapper<Long>());
     
      DataSet<Long> bcInput1 = sourceWithMapper
                    .map(new IdentityMapper<Long>())
                    .reduce(new SelectOneReducer<Long>());
      DataSet<Long> bcInput2 = env.generateSequence(1, 10);
     
      DataSet<Long> result = sourceWithMapper
          .map(new IdentityMapper<Long>())
              .withBroadcastSet(bcInput1, "bc1")
              .withBroadcastSet(bcInput2, "bc2");
     
      result.print();
     
      Plan p = env.createProgramPlan();
      OptimizedPlan op = compileNoStats(p);
     
      SinkPlanNode sink = op.getDataSinks().iterator().next();
      SingleInputPlanNode mapper = (SingleInputPlanNode) sink.getInput().getSource();
     
View Full Code Here

  }
 
  @Test
  public void testPipelineBreakerBroadcastedPartialSolution() {
    try {
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      env.setDegreeOfParallelism(64);
     
     
      DataSet<Long> initialSource = env.generateSequence(1, 10);
      IterativeDataSet<Long> iteration = initialSource.iterate(100);
     
     
      DataSet<Long> sourceWithMapper = env.generateSequence(1, 10).map(new IdentityMapper<Long>());
     
      DataSet<Long> bcInput1 = sourceWithMapper
                    .map(new IdentityMapper<Long>())
                    .reduce(new SelectOneReducer<Long>());
     
      DataSet<Long> result = sourceWithMapper
          .map(new IdentityMapper<Long>())
              .withBroadcastSet(iteration, "bc2")
              .withBroadcastSet(bcInput1, "bc1");
             
     
      iteration.closeWith(result).print();
     
      Plan p = env.createProgramPlan();
      OptimizedPlan op = compileNoStats(p);
     
      SinkPlanNode sink = op.getDataSinks().iterator().next();
      BulkIterationPlanNode iterationPlanNode = (BulkIterationPlanNode) sink.getInput().getSource();
      SingleInputPlanNode mapper = (SingleInputPlanNode) iterationPlanNode.getRootOfStepFunction();
View Full Code Here

 
  @Test
  public void testPilelineBreakerWithCross() {
    try {
      {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setDegreeOfParallelism(64);
       
        DataSet<Long> initialSource = env.generateSequence(1, 10);
       
        Configuration conf= new Configuration();
        conf.setString(PactCompiler.HINT_LOCAL_STRATEGY, PactCompiler.HINT_LOCAL_STRATEGY_NESTEDLOOP_BLOCKED_OUTER_FIRST);
        initialSource
          .map(new IdentityMapper<Long>())
          .cross(initialSource).withParameters(conf)
          .print();
       
       
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        SinkPlanNode sink = op.getDataSinks().iterator().next();
        DualInputPlanNode mapper = (DualInputPlanNode) sink.getInput().getSource();
       
        assertTrue(mapper.getInput1().getTempMode().breaksPipeline());
      }
     
      {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setDegreeOfParallelism(64);
       
        DataSet<Long> initialSource = env.generateSequence(1, 10);
       
        Configuration conf= new Configuration();
        conf.setString(PactCompiler.HINT_LOCAL_STRATEGY, PactCompiler.HINT_LOCAL_STRATEGY_NESTEDLOOP_BLOCKED_OUTER_SECOND);
        initialSource
          .map(new IdentityMapper<Long>())
          .cross(initialSource).withParameters(conf)
          .print();
       
       
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
       
        SinkPlanNode sink = op.getDataSinks().iterator().next();
        DualInputPlanNode mapper = (DualInputPlanNode) sink.getInput().getSource();
       
        assertTrue(mapper.getInput2().getTempMode().breaksPipeline());
      }
     
      {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setDegreeOfParallelism(64);
       
        DataSet<Long> initialSource = env.generateSequence(1, 10);
       
        Configuration conf= new Configuration();
        conf.setString(PactCompiler.HINT_LOCAL_STRATEGY, PactCompiler.HINT_LOCAL_STRATEGY_NESTEDLOOP_STREAMED_OUTER_FIRST);
        initialSource
          .map(new IdentityMapper<Long>())
          .cross(initialSource).withParameters(conf)
          .print();
       
       
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
       
        SinkPlanNode sink = op.getDataSinks().iterator().next();
        DualInputPlanNode mapper = (DualInputPlanNode) sink.getInput().getSource();
       
        assertTrue(mapper.getInput1().getTempMode().breaksPipeline());
      }
     
      {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setDegreeOfParallelism(64);
       
        DataSet<Long> initialSource = env.generateSequence(1, 10);
       
        Configuration conf= new Configuration();
        conf.setString(PactCompiler.HINT_LOCAL_STRATEGY, PactCompiler.HINT_LOCAL_STRATEGY_NESTEDLOOP_STREAMED_OUTER_SECOND);
        initialSource
          .map(new IdentityMapper<Long>())
          .cross(initialSource).withParameters(conf)
          .print();
       
       
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
       
        SinkPlanNode sink = op.getDataSinks().iterator().next();
        DualInputPlanNode mapper = (DualInputPlanNode) sink.getInput().getSource();
       
View Full Code Here

    }
  }
 
  @Test
  public void testBranchingBroadcastVariable() {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
   
    DataSet<String> input1 = env.readTextFile(IN_FILE).name("source1");
    DataSet<String> input2 = env.readTextFile(IN_FILE).name("source2");
    DataSet<String> input3 = env.readTextFile(IN_FILE).name("source3");
   
    DataSet<String> result1 = input1
        .map(new IdentityMapper<String>())
        .reduceGroup(new Top1GroupReducer<String>())
          .withBroadcastSet(input3, "bc");
   
    DataSet<String> result2 = input2
        .map(new IdentityMapper<String>())
        .reduceGroup(new Top1GroupReducer<String>())
          .withBroadcastSet(input3, "bc");
   
    result1.join(result2)
        .where(new IdentityKeyExtractor<String>())
        .equalTo(new IdentityKeyExtractor<String>())
        .with(new RichJoinFunction<String, String, String>() {
          @Override
          public String join(String first, String second) {
            return null;
          }
        })
        .withBroadcastSet(input3, "bc1")
        .withBroadcastSet(input1, "bc2")
        .withBroadcastSet(result1, "bc3")
      .print();
   
    Plan plan = env.createProgramPlan();
   
    try{
      compileNoStats(plan);
    }catch(Exception e){
      e.printStackTrace();
View Full Code Here

   
    if(!parseParameters(args)) {
      return;
    }
   
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

    // get customer data set: (custkey, name, address, nationkey, acctbal)
    DataSet<Tuple5<Integer, String, String, Integer, Double>> customers = getCustomerDataSet(env);

    // get orders data set: (orderkey, custkey, orderdate)
    DataSet<Tuple3<Integer, Integer, String>> orders = getOrdersDataSet(env);

    // get lineitem data set: (orderkey, extendedprice, discount, returnflag)
    DataSet<Tuple4<Integer, Double, Double, String>> lineitems = getLineitemDataSet(env);

    // get nation data set: (nationkey, name)
    DataSet<Tuple2<Integer, String>> nations = getNationsDataSet(env);

    // orders filtered by year: (orderkey, custkey)
    DataSet<Tuple2<Integer, Integer>> ordersFilteredByYear =
        // filter by year
        orders.filter(order -> Integer.parseInt(order.f2.substring(0, 4)) > 1990)
        // project fields out that are no longer required
        .project(0,1).types(Integer.class, Integer.class);

    // lineitems filtered by flag: (orderkey, extendedprice, discount)
    DataSet<Tuple3<Integer, Double, Double>> lineitemsFilteredByFlag =
        // filter by flag
        lineitems.filter(lineitem -> lineitem.f3.equals("R"))
        // project fields out that are no longer required
        .project(0,1,2).types(Integer.class, Double.class, Double.class);

    // join orders with lineitems: (custkey, extendedprice, discount)
    DataSet<Tuple3<Integer, Double, Double>> lineitemsOfCustomerKey =
        ordersFilteredByYear.joinWithHuge(lineitemsFilteredByFlag)
                  .where(0).equalTo(0)
                  .projectFirst(1).projectSecond(1,2)
                  .types(Integer.class, Double.class, Double.class);

    // aggregate for revenue: (custkey, revenue)
    DataSet<Tuple2<Integer, Double>> revenueOfCustomerKey = lineitemsOfCustomerKey
        // calculate the revenue for each item
        // revenue per item = l_extendedprice * (1 - l_discount)
        .map(i -> new Tuple2<>(i.f0, i.f1 * (1 - i.f2)))
        // aggregate the revenues per item to revenue per customer
        .groupBy(0).sum(1);

    // join customer with nation (custkey, name, address, nationname, acctbal)
    DataSet<Tuple5<Integer, String, String, String, Double>> customerWithNation = customers
            .joinWithTiny(nations)
            .where(3).equalTo(0)
            .projectFirst(0,1,2).projectSecond(1).projectFirst(4)
            .types(Integer.class, String.class, String.class, String.class, Double.class);

    // join customer (with nation) with revenue (custkey, name, address, nationname, acctbal, revenue)
    DataSet<Tuple6<Integer, String, String, String, Double, Double>> customerWithRevenue =
        customerWithNation.join(revenueOfCustomerKey)
        .where(0).equalTo(0)
        .projectFirst(0,1,2,3,4).projectSecond(1)
        .types(Integer.class, String.class, String.class, String.class, Double.class, Double.class);

    // emit result
    customerWithRevenue.writeAsCsv(outputPath);
   
    // execute program
    env.execute("TPCH Query 10 Example");
   
  }
View Full Code Here

    if(!parseParameters(args)) {
      return;
    }
   
    // set up the execution environment
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
   
    // get input data
    DataSet<String> text = getTextDataSet(env);
   
    DataSet<Tuple2<String, Integer>> counts =
        // normalize and split each line
        text.map(line -> line.toLowerCase().split("\\W+"))
        // convert splitted line in pairs (2-tuples) containing: (word,1)
        .flatMap((String[] tokens, Collector<Tuple2<String, Integer>> out) -> {
          // emit the pairs with non-zero-length words
          Arrays.stream(tokens)
          .filter(t -> t.length() > 0)
          .forEach(t -> out.collect(new Tuple2<>(t, 1)));
        })
        // group by the tuple field "0" and sum up tuple field "1"
        .groupBy(0)
        .sum(1);

    // emit result
    if(fileOutput) {
      counts.writeAsCsv(outputPath, "\n", " ");
    } else {
      counts.print();
    }
   
    // execute program
    env.execute("WordCount Example");
  }
View Full Code Here

public class PartitionPushdownTest extends CompilerTestBase {

  @Test
  public void testPartitioningNotPushedDown() {
    try {
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
     
      @SuppressWarnings("unchecked")
      DataSet<Tuple3<Long, Long, Long>> input = env.fromElements(new Tuple3<Long, Long, Long>(0L, 0L, 0L));
     
      input
        .groupBy(0, 1).sum(2)
        .groupBy(0).sum(1)
        .print();
     
      Plan p = env.createProgramPlan();
      OptimizedPlan op = compileNoStats(p);
     
      SinkPlanNode sink = op.getDataSinks().iterator().next();
     
      SingleInputPlanNode agg2Reducer = (SingleInputPlanNode) sink.getInput().getSource();
View Full Code Here

  }
 
  @Test
  public void testPartitioningReused() {
    try {
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
     
      @SuppressWarnings("unchecked")
      DataSet<Tuple3<Long, Long, Long>> input = env.fromElements(new Tuple3<Long, Long, Long>(0L, 0L, 0L));
     
      input
        .groupBy(0).sum(1)
        .groupBy(0, 1).sum(2)
        .print();
     
      Plan p = env.createProgramPlan();
      OptimizedPlan op = compileNoStats(p);
     
      SinkPlanNode sink = op.getDataSinks().iterator().next();
     
      SingleInputPlanNode agg2Reducer = (SingleInputPlanNode) sink.getInput().getSource();
View Full Code Here

 
 
  @Test
  public void testRejectPlanIfSolutionSetKeysAndJoinKeysDontMatch() {
    try {
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      env.setDegreeOfParallelism(DEFAULT_PARALLELISM);
     
      @SuppressWarnings("unchecked")
      DataSet<Tuple3<Long, Long, Long>> solutionSetInput = env.fromElements(new Tuple3<Long, Long, Long>(1L, 2L, 3L)).name("Solution Set");
      @SuppressWarnings("unchecked")
      DataSet<Tuple3<Long, Long, Long>> worksetInput = env.fromElements(new Tuple3<Long, Long, Long>(1L, 2L, 3L)).name("Workset");
      @SuppressWarnings("unchecked")
      DataSet<Tuple3<Long, Long, Long>> invariantInput = env.fromElements(new Tuple3<Long, Long, Long>(1L, 2L, 3L)).name("Invariant Input");
     
      DeltaIteration<Tuple3<Long, Long, Long>, Tuple3<Long, Long, Long>> iter = solutionSetInput.iterateDelta(worksetInput, 100, 1, 2);
     
     
      DataSet<Tuple3<Long, Long, Long>> result =
View Full Code Here

TOP

Related Classes of org.apache.flink.api.java.ExecutionEnvironment

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.