Examples of eu.stratosphere.api.java.ExecutionEnvironment

eu.stratosphere.api.java.ExecutionEnvironment
The ExecutionEnviroment is the context in which a program is executed. A {@link LocalEnvironment} will cause execution in the current JVM, a{@link RemoteEnvironment} will cause execution on a remote setup.
The environment provides methods to control the job execution (such as setting the parallelism) and to interact with the outside world (data access).
Please note that the execution environment needs strong type information for the input and return types of all operations that are executed. This means that the environments needs to know that the return value of an operation is for example a Tuple of String and Integer. Because the Java compiler throws much of the generic type information away, most methods attempt to re- obtain that information using reflection. In certain cases, it may be necessary to manually supply that information to some of the methods. @see LocalEnvironment @see RemoteEnvironment

  }
  
  @Test
  public void testGroupedReduceWithFieldPositionKeyCombinable() {
    try {
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      env.setDegreeOfParallelism(8);
      
      DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class)
        .name("source").setParallelism(6);
      
      ReduceGroupOperator<Tuple2<String, Double>, Tuple2<String, Double>> reduced = data
          .groupBy(1)
          .reduceGroup(new GroupReduceFunction<Tuple2<String, Double>, Tuple2<String, Double>>() {
        public void reduce(Iterator<Tuple2<String, Double>> values, Collector<Tuple2<String, Double>> out) {}
      }).name("reducer");
      
      reduced.setCombinable(true);
      reduced.print().name("sink");
      
      Plan p = env.createProgramPlan();
      OptimizedPlan op = compileNoStats(p);
      
      OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
      
      // get the original nodes

View Full Code Here


  @Test
  public void translateAggregate() {
    try {
      final int DOP = 8;
      ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(DOP);
      
      @SuppressWarnings("unchecked")
      DataSet<Tuple3<Double, StringValue, Long>> initialData = 
          env.fromElements(new Tuple3<Double, StringValue, Long>(3.141592, new StringValue("foobar"), new Long(77)));
      
      initialData.groupBy(0).aggregate(Aggregations.MIN, 1).and(Aggregations.SUM, 2).print();
      
      Plan p = env.createProgramPlan();
      
      GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();
      
      GroupReduceOperatorBase<?, ?, ?> reducer = (GroupReduceOperatorBase<?, ?, ?>) sink.getInput();

View Full Code Here

  }
  
  @Test
  public void testGroupedReduceWithSelectorFunctionKeyNoncombinable() {
    try {
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      env.setDegreeOfParallelism(8);
      
      DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class)
        .name("source").setParallelism(6);
      
      data
        .groupBy(new KeySelector<Tuple2<String,Double>, String>() { 
          public String getKey(Tuple2<String, Double> value) { return value.f0; }
        })
        .reduceGroup(new GroupReduceFunction<Tuple2<String, Double>, Tuple2<String, Double>>() {
        public void reduce(Iterator<Tuple2<String, Double>> values, Collector<Tuple2<String, Double>> out) {}
      }).name("reducer")
      .print().name("sink");
      
      Plan p = env.createProgramPlan();
      OptimizedPlan op = compileNoStats(p);
      
      OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
      
      // get the original nodes

View Full Code Here

  }
  
  @Test
  public void testGroupedReduceWithSelectorFunctionKeyCombinable() {
    try {
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      env.setDegreeOfParallelism(8);
      
      DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class)
        .name("source").setParallelism(6);
      
      ReduceGroupOperator<Tuple2<String, Double>, Tuple2<String, Double>> reduced = data
        .groupBy(new KeySelector<Tuple2<String,Double>, String>() { 
          public String getKey(Tuple2<String, Double> value) { return value.f0; }
        })
        .reduceGroup(new GroupReduceFunction<Tuple2<String, Double>, Tuple2<String, Double>>() {
        public void reduce(Iterator<Tuple2<String, Double>> values, Collector<Tuple2<String, Double>> out) {}
      }).name("reducer");
      
      reduced.setCombinable(true);
      reduced.print().name("sink");
      
      Plan p = env.createProgramPlan();
      OptimizedPlan op = compileNoStats(p);
      
      OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
      
      // get the original nodes

View Full Code Here

  private final List<Long> emptyLongData = new ArrayList<Long>();
  
  @Test
  public void testFieldsAggregate() {
    
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo);


    // should work
    try {
      tupleDs.aggregate(Aggregations.SUM, 1);
    } catch(Exception e) {
      Assert.fail();
    }
    
    // should not work: index out of bounds
    try {
      tupleDs.aggregate(Aggregations.SUM, 10);
      Assert.fail();
    } catch(IllegalArgumentException iae) {
      // we're good here
    } catch(Exception e) {
      Assert.fail();
    }
    
    // should not work: not applied to tuple dataset
    DataSet<Long> longDs = env.fromCollection(emptyLongData, BasicTypeInfo.LONG_TYPE_INFO);
    try {
      longDs.aggregate(Aggregations.MIN, 1);
      Assert.fail();
    } catch(InvalidProgramException uoe) {
      // we're good here

View Full Code Here

  }
  
  @Test
  public void testAggregationTypes() {
    try {
      final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo);
      
      // should work: multiple aggregates
      tupleDs.aggregate(Aggregations.SUM, 0).and(Aggregations.MIN, 4);


      // should work: nested aggregates

View Full Code Here

  }


  @Test
  public void testCrossProjection1() {


    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo);
    DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 = env.fromCollection(emptyTupleData, tupleTypeInfo);


    // should work
    try {
      ds1.cross(ds2)
        .projectFirst(0)

View Full Code Here

  }


  @Test
  public void testCrossProjection2() {


    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo);
    DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 = env.fromCollection(emptyTupleData, tupleTypeInfo);


    // should work
    try {
      ds1.cross(ds2)
        .projectFirst(0,3)

View Full Code Here

  }


  @Test
  public void testCrossProjection3() {


    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo);
    DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 = env.fromCollection(emptyTupleData, tupleTypeInfo);


    // should work
    try {
      ds1.cross(ds2)
        .projectFirst(0)

View Full Code Here

  }


  @Test
  public void testCrossProjection4() {


    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo);
    DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 = env.fromCollection(emptyTupleData, tupleTypeInfo);


    // should work
    try {
      ds1.cross(ds2)
        .projectFirst(0,2)

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of eu.stratosphere.api.java.ExecutionEnvironment

$.Job

$.WordCountJob

eu.stratosphere.api.avro.testjar.AvroExternalJarProgram

eu.stratosphere.api.java.functions.SemanticPropertiesTranslationTest

eu.stratosphere.api.java.io.CsvReader

eu.stratosphere.api.java.io.jdbc.example.JDBCExample

eu.stratosphere.api.java.io.TextInputFormat

eu.stratosphere.api.java.io.TextValueInputFormat

eu.stratosphere.api.java.operator.AggregateOperatorTest

eu.stratosphere.api.java.operator.CoGroupOperatorTest

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.