Examples of eu.stratosphere.api.java.ExecutionEnvironment

eu.stratosphere.api.java.ExecutionEnvironment
The ExecutionEnviroment is the context in which a program is executed. A {@link LocalEnvironment} will cause execution in the current JVM, a{@link RemoteEnvironment} will cause execution on a remote setup.
The environment provides methods to control the job execution (such as setting the parallelism) and to interact with the outside world (data access).
Please note that the execution environment needs strong type information for the input and return types of all operations that are executed. This means that the environments needs to know that the return value of an operation is for example a Tuple of String and Integer. Because the Java compiler throws much of the generic type information away, most methods attempt to re- obtain that information using reflection. In certain cases, it may be necessary to manually supply that information to some of the methods. @see LocalEnvironment @see RemoteEnvironment

   * A mapper that preserves fields 0, 1, 2 of a tuple data set.
   */
  @Test
  public void translateUnaryFunctionAnnotationTuples() {
    try {
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      
      @SuppressWarnings("unchecked")
      DataSet<Tuple3<Long, String, Integer>> input = env.fromElements(new Tuple3<Long, String, Integer>(3l, "test", 42));
      input.map(new IndividualConstantMapper<Long, String, Integer>()).print();
      
      Plan plan = env.createProgramPlan();
      
      GenericDataSinkBase<?> sink = plan.getDataSinks().iterator().next();
      MapOperatorBase<?, ?, ?> mapper = (MapOperatorBase<?, ?, ?>) sink.getInput();
      
      SingleInputSemanticProperties semantics = mapper.getSemanticProperties();

View Full Code Here

   * A join that preserves tuple fields from both sides.
   */
  @Test
  public void translateBinaryFunctionAnnotationTuples() {
    try {
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      
      @SuppressWarnings("unchecked")
      DataSet<Tuple2<Long, String>> input1 = env.fromElements(new Tuple2<Long, String>(3l, "test"));
      @SuppressWarnings("unchecked")
      DataSet<Tuple2<Long, Double>> input2 = env.fromElements(new Tuple2<Long, Double>(3l, 3.1415));
      
      input1.join(input2).where(0).equalTo(0).with(new ForwardingTupleJoin<Long, String, Long, Double>())
        .print();
      
      Plan plan = env.createProgramPlan();
      
      GenericDataSinkBase<?> sink = plan.getDataSinks().iterator().next();
      JoinOperatorBase<?, ?, ?, ?> join = (JoinOperatorBase<?, ?, ?, ?>) sink.getInput();
      
      DualInputSemanticProperties semantics = join.getSemanticProperties();

View Full Code Here

public class GroupReduceCompilationTest extends CompilerTestBase implements java.io.Serializable {


  @Test
  public void testAllGroupReduceNoCombiner() {
    try {
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      env.setDegreeOfParallelism(8);
      
      DataSet<Double> data = env.fromElements(0.2, 0.3, 0.4, 0.5).name("source");
      
      data.reduceGroup(new GroupReduceFunction<Double, Double>() {
        public void reduce(Iterator<Double> values, Collector<Double> out) {}
      }).name("reducer")
      .print().name("sink");
      
      Plan p = env.createProgramPlan();
      OptimizedPlan op = compileNoStats(p);
      
      OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);

View Full Code Here


  @Test
  public void translateNonGroupedReduce() {
    try {
      final int DOP = 8;
      ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(DOP);
      
      DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env);
      
      initialData.reduce(new ReduceFunction<Tuple3<Double,StringValue,LongValue>>() {
        public Tuple3<Double, StringValue, LongValue> reduce(Tuple3<Double, StringValue, LongValue> value1, Tuple3<Double, StringValue, LongValue> value2) {
          return value1;
        }
      }).print();
      
      Plan p = env.createProgramPlan();
      
      GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();
      
      ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput();

View Full Code Here

  
  @Test
  public void translateGroupedReduceNoMapper() {
    try {
      final int DOP = 8;
      ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(DOP);
      
      DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env);
      
      initialData
        .groupBy(2)
        .reduce(new ReduceFunction<Tuple3<Double,StringValue,LongValue>>() {
          public Tuple3<Double, StringValue, LongValue> reduce(Tuple3<Double, StringValue, LongValue> value1, Tuple3<Double, StringValue, LongValue> value2) {
            return value1;
          }
        })
        .print();
      
      Plan p = env.createProgramPlan();
      
      GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();
      
      ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput();

View Full Code Here

public class SpargelCompilerTest extends CompilerTestBase {


//  @Test
  public void testSpargelCompiler() {
    try {
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      env.setDegreeOfParallelism(DEFAULT_PARALLELISM);
      // compose test program
      {
        DataSet<Long> vertexIds = env.generateSequence(1, 2);
        
        @SuppressWarnings("unchecked")
        DataSet<Tuple2<Long, Long>> edges = env.fromElements(new Tuple2<Long, Long>(1L, 2L));
        
        DataSet<Tuple2<Long, Long>> initialVertices = vertexIds.map(new IdAssigner());
        DataSet<Tuple2<Long, Long>> result = initialVertices.runOperation(VertexCentricIteration.withPlainEdges(edges, new CCUpdater(), new CCMessager(), 100));
        
        result.print();
      }
      
      Plan p = env.createProgramPlan("Spargel Connected Components");
      OptimizedPlan op = compileNoStats(p);
      
      // check the sink
      SinkPlanNode sink = op.getDataSinks().iterator().next();
      assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());

View Full Code Here

  
  @Test
  public void translateGroupedReduceWithkeyExtractor() {
    try {
      final int DOP = 8;
      ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(DOP);
      
      DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env);
      
      initialData
        .groupBy(new KeySelector<Tuple3<Double,StringValue,LongValue>, StringValue>() {
          public StringValue getKey(Tuple3<Double, StringValue, LongValue> value) {
            return value.f1;
          }
        })
        .reduce(new ReduceFunction<Tuple3<Double,StringValue,LongValue>>() {
          public Tuple3<Double, StringValue, LongValue> reduce(Tuple3<Double, StringValue, LongValue> value1, Tuple3<Double, StringValue, LongValue> value2) {
            return value1;
          }
        }).setParallelism(4)
        .print();
      
      Plan p = env.createProgramPlan();
      
      GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();
      
      
      MapOperatorBase<?, ?, ?> keyProjector = (MapOperatorBase<?, ?, ?>) sink.getInput();

View Full Code Here

  }
  
  @Test
  public void testAllReduceWithCombiner() {
    try {
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      env.setDegreeOfParallelism(8);
      
      DataSet<Long> data = env.generateSequence(1, 8000000).name("source");
      
      ReduceGroupOperator<Long, Long> reduced = data.reduceGroup(new GroupReduceFunction<Long, Long>() {
        public void reduce(Iterator<Long> values, Collector<Long> out) {}
      }).name("reducer");
      
      reduced.setCombinable(true);
      reduced.print().name("sink");
      
      Plan p = env.createProgramPlan();
      OptimizedPlan op = compileNoStats(p);
      
      OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
      
      // get the original nodes

View Full Code Here

  public void testSpargelCompilerWithBroadcastVariable() {
    try {
      final String BC_VAR_NAME = "borat variable";
      
      
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      env.setDegreeOfParallelism(DEFAULT_PARALLELISM);
      // compose test program
      {
        DataSet<Long> bcVar = env.fromElements(1L);
        
        DataSet<Long> vertexIds = env.generateSequence(1, 2);
        
        @SuppressWarnings("unchecked")
        DataSet<Tuple2<Long, Long>> edges = env.fromElements(new Tuple2<Long, Long>(1L, 2L));
        
        DataSet<Tuple2<Long, Long>> initialVertices = vertexIds.map(new IdAssigner());
        
        VertexCentricIteration<Long, Long, Long, ?> vcIter = VertexCentricIteration.withPlainEdges(edges, new CCUpdater(), new CCMessager(), 100);
        vcIter.addBroadcastSetForMessagingFunction(BC_VAR_NAME, bcVar);
        vcIter.addBroadcastSetForUpdateFunction(BC_VAR_NAME, bcVar);
        
        DataSet<Tuple2<Long, Long>> result = initialVertices.runOperation(vcIter);
        
        result.print();
      }
      
      Plan p = env.createProgramPlan("Spargel Connected Components");
      OptimizedPlan op = compileNoStats(p);
      
      // check the sink
      SinkPlanNode sink = op.getDataSinks().iterator().next();
      assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());

View Full Code Here

  
  
  @Test
  public void testGroupedReduceWithFieldPositionKeyNonCombinable() {
    try {
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      env.setDegreeOfParallelism(8);
      
      DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class)
        .name("source").setParallelism(6);
      
      data
        .groupBy(1)
        .reduceGroup(new GroupReduceFunction<Tuple2<String, Double>, Tuple2<String, Double>>() {
        public void reduce(Iterator<Tuple2<String, Double>> values, Collector<Tuple2<String, Double>> out) {}
      }).name("reducer")
      .print().name("sink");
      
      Plan p = env.createProgramPlan();
      OptimizedPlan op = compileNoStats(p);
      
      OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
      
      // get the original nodes

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of eu.stratosphere.api.java.ExecutionEnvironment

$.Job

$.WordCountJob

eu.stratosphere.api.avro.testjar.AvroExternalJarProgram

eu.stratosphere.api.java.functions.SemanticPropertiesTranslationTest

eu.stratosphere.api.java.io.CsvReader

eu.stratosphere.api.java.io.jdbc.example.JDBCExample

eu.stratosphere.api.java.io.TextInputFormat

eu.stratosphere.api.java.io.TextValueInputFormat

eu.stratosphere.api.java.operator.AggregateOperatorTest

eu.stratosphere.api.java.operator.CoGroupOperatorTest

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.