Examples of eu.stratosphere.api.java.ExecutionEnvironment

eu.stratosphere.api.java.ExecutionEnvironment
The ExecutionEnviroment is the context in which a program is executed. A {@link LocalEnvironment} will cause execution in the current JVM, a{@link RemoteEnvironment} will cause execution on a remote setup.
The environment provides methods to control the job execution (such as setting the parallelism) and to interact with the outside world (data access).
Please note that the execution environment needs strong type information for the input and return types of all operations that are executed. This means that the environments needs to know that the return value of an operation is for example a Tuple of String and Integer. Because the Java compiler throws much of the generic type information away, most methods attempt to re- obtain that information using reflection. In certain cases, it may be necessary to manually supply that information to some of the methods. @see LocalEnvironment @see RemoteEnvironment

    if(!parseParameters(args)) {
      return;
    }
    
    // set up execution environment
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  
    // read input data
    DataSet<Edge> edges = getEdgeDataSet(env);
    
    // project edges by vertex id
    DataSet<Edge> edgesById = edges
        .map(new EdgeByIdProjector());
    
    DataSet<Triad> triangles = edgesById
        // build triads
        .groupBy(Edge.V1).sortGroup(Edge.V2, Order.ASCENDING).reduceGroup(new TriadBuilder())
        // filter triads
        .join(edgesById).where(Triad.V2, Triad.V3).equalTo(Edge.V1, Edge.V2).with(new TriadFilter());


    // emit result
    if(fileOutput) {
      triangles.writeAsCsv(outputPath, "\n", ",");
    } else {
      triangles.print();
    }


    // execute program
    env.execute("Basic Triangle Enumeration Example");


  }

View Full Code Here

public class ReduceCompilationTest extends CompilerTestBase implements java.io.Serializable {


  @Test
  public void testAllReduceNoCombiner() {
    try {
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      env.setDegreeOfParallelism(8);
      
      DataSet<Double> data = env.fromElements(0.2, 0.3, 0.4, 0.5).name("source");
      
      data.reduce(new ReduceFunction<Double>() {
        
        @Override
        public Double reduce(Double value1, Double value2){
          return value1 + value2;
        }
      }).name("reducer")
      .print().name("sink");
      
      Plan p = env.createProgramPlan();
      OptimizedPlan op = compileNoStats(p);
      
      OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);

View Full Code Here

  }
  
  @Test
  public void testAllReduceWithCombiner() {
    try {
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      env.setDegreeOfParallelism(8);
      
      DataSet<Long> data = env.generateSequence(1, 8000000).name("source");
      
      data.reduce(new ReduceFunction<Long>() {
        
        @Override
        public Long reduce(Long value1, Long value2){
          return value1 + value2;
        }
      }).name("reducer")
      .print().name("sink");
      
      Plan p = env.createProgramPlan();
      OptimizedPlan op = compileNoStats(p);
      
      OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
      
      // get the original nodes

View Full Code Here

  }
  
  @Test
  public void testGroupedReduceWithFieldPositionKey() {
    try {
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      env.setDegreeOfParallelism(8);
      
      DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class)
        .name("source").setParallelism(6);
      
      data
        .groupBy(1)
        .reduce(new ReduceFunction<Tuple2<String,Double>>() {
        @Override
        public Tuple2<String, Double> reduce(Tuple2<String, Double> value1, Tuple2<String, Double> value2){
          return null;
        }
      }).name("reducer")
      .print().name("sink");
      
      Plan p = env.createProgramPlan();
      OptimizedPlan op = compileNoStats(p);
      
      OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
      
      // get the original nodes

View Full Code Here

  }
  
  @Test
  public void testGroupedReduceWithSelectorFunctionKey() {
    try {
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      env.setDegreeOfParallelism(8);
      
      DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class)
        .name("source").setParallelism(6);
      
      data
        .groupBy(new KeySelector<Tuple2<String,Double>, String>() { 
          public String getKey(Tuple2<String, Double> value) { return value.f0; }
        })
        .reduce(new ReduceFunction<Tuple2<String,Double>>() {
        @Override
        public Tuple2<String, Double> reduce(Tuple2<String, Double> value1, Tuple2<String, Double> value2){
          return null;
        }
      }).name("reducer")
      .print().name("sink");
      
      Plan p = env.createProgramPlan();
      OptimizedPlan op = compileNoStats(p);
      
      OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
      
      // get the original nodes

View Full Code Here

//  }
  
  public static void main(String[] args) throws Exception {
    String inputPath = args[0];
    
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    
    DataSet<MyUser> input = env.createInput(new AvroInputFormat<MyUser>(new Path(inputPath), MyUser.class));
  
    DataSet<Tuple2<String, MyUser>> result = input.map(new NameExtractor()).groupBy(0).reduce(new NameGrouper());
    
    result.output(new DiscardingOuputFormat<Tuple2<String,MyUser>>());
    env.execute();
  }

View Full Code Here

public class JDBCExample {


  public static void main(String[] args) throws Exception {
    prepareTestDb();


    ExecutionEnvironment environment = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<Tuple5> source
        = environment.createInput(JDBCInputFormat.buildJDBCInputFormat()
            .setDrivername("org.apache.derby.jdbc.EmbeddedDriver")
            .setDBUrl("jdbc:derby:memory:ebookshop")
            .setQuery("select * from books")
            .finish(),
            new TupleTypeInfo(Tuple5.class, INT_TYPE_INFO, STRING_TYPE_INFO, STRING_TYPE_INFO, DOUBLE_TYPE_INFO, INT_TYPE_INFO)
        );


    source.output(JDBCOutputFormat.buildJDBCOutputFormat()
        .setDrivername("org.apache.derby.jdbc.EmbeddedDriver")
        .setDBUrl("jdbc:derby:memory:ebookshop")
        .setQuery("insert into newbooks (id,title,author,price,qty) values (?,?,?,?,?)")
        .finish());
    environment.execute();
  }

View Full Code Here

      final int NUM_ITERATIONS = 13;
      
      final int DEFAULT_DOP= 133;
      final int ITERATION_DOP = 77;
      
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      
      // ------------ construct the test program ------------------
      {
        env.setDegreeOfParallelism(DEFAULT_DOP);
        
        @SuppressWarnings("unchecked")
        DataSet<Tuple3<Double, Long, String>> initialSolutionSet = env.fromElements(new Tuple3<Double, Long, String>(3.44, 5L, "abc"));
  
        @SuppressWarnings("unchecked")
        DataSet<Tuple2<Double, String>> initialWorkSet = env.fromElements(new Tuple2<Double, String>(1.23, "abc"));
        
        DeltaIteration<Tuple3<Double, Long, String>, Tuple2<Double, String>> iteration = initialSolutionSet.iterateDelta(initialWorkSet, NUM_ITERATIONS, ITERATION_KEYS);
        iteration.name(ITERATION_NAME).parallelism(ITERATION_DOP);
        
        iteration.registerAggregator(AGGREGATOR_NAME, LongSumAggregator.class);
        
        // test that multiple workset consumers are supported
        DataSet<Tuple2<Double, String>> worksetSelfJoin = 
          iteration.getWorkset()
            .map(new IdentityMapper<Tuple2<Double,String>>())
            .join(iteration.getWorkset()).where(1).equalTo(1).projectFirst(0, 1).types(Double.class, String.class);
        
        DataSet<Tuple3<Double, Long, String>> joined = worksetSelfJoin.join(iteration.getSolutionSet()).where(1).equalTo(2).with(new SolutionWorksetJoin());


        DataSet<Tuple3<Double, Long, String>> result = iteration.closeWith(
            joined,
            joined.map(new NextWorksetMapper()).name(BEFORE_NEXT_WORKSET_MAP));
        
        result.print();
        result.writeAsText("/dev/null");
      }
      
      
      Plan p = env.createProgramPlan(JOB_NAME);
      
      // ------------- validate the plan ----------------
      assertEquals(JOB_NAME, p.getJobName());
      assertEquals(DEFAULT_DOP, p.getDefaultParallelism());

View Full Code Here

  }
  
  @Test
  public void testRejectWhenSolutionSetKeysDontMatch() {
    try {
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      
      @SuppressWarnings("unchecked")
      DataSet<Tuple3<Double, Long, String>> initialSolutionSet = env.fromElements(new Tuple3<Double, Long, String>(3.44, 5L, "abc"));


      @SuppressWarnings("unchecked")
      DataSet<Tuple2<Double, String>> initialWorkSet = env.fromElements(new Tuple2<Double, String>(1.23, "abc"));
      
      DeltaIteration<Tuple3<Double, Long, String>, Tuple2<Double, String>> iteration = initialSolutionSet.iterateDelta(initialWorkSet, 10, 1);
      
      try {
        iteration.getWorkset().join(iteration.getSolutionSet()).where(1).equalTo(2);

View Full Code Here

   * A mapper that preserves all fields over a tuple data set.
   */
  @Test
  public void translateUnaryFunctionAnnotationTuplesWildCard() {
    try {
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      
      @SuppressWarnings("unchecked")
      DataSet<Tuple3<Long, String, Integer>> input = env.fromElements(new Tuple3<Long, String, Integer>(3l, "test", 42));
      input.map(new WildcardConstantMapper<Tuple3<Long,String,Integer>>()).print();
      
      Plan plan = env.createProgramPlan();
      
      GenericDataSinkBase<?> sink = plan.getDataSinks().iterator().next();
      MapOperatorBase<?, ?, ?> mapper = (MapOperatorBase<?, ?, ?>) sink.getInput();
      
      SingleInputSemanticProperties semantics = mapper.getSemanticProperties();

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of eu.stratosphere.api.java.ExecutionEnvironment

$.Job

$.WordCountJob

eu.stratosphere.api.avro.testjar.AvroExternalJarProgram

eu.stratosphere.api.java.functions.SemanticPropertiesTranslationTest

eu.stratosphere.api.java.io.CsvReader

eu.stratosphere.api.java.io.jdbc.example.JDBCExample

eu.stratosphere.api.java.io.TextInputFormat

eu.stratosphere.api.java.io.TextValueInputFormat

eu.stratosphere.api.java.operator.AggregateOperatorTest

eu.stratosphere.api.java.operator.CoGroupOperatorTest

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.