Package eu.stratosphere.api.java

Examples of eu.stratosphere.api.java.ExecutionEnvironment


    if(!parseParameters(args)) {
      return;
    }
   
    // set up execution environment
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
 
    // read input data
    DataSet<Edge> edges = getEdgeDataSet(env);
   
    // project edges by vertex id
    DataSet<Edge> edgesById = edges
        .map(new EdgeByIdProjector());
   
    DataSet<Triad> triangles = edgesById
        // build triads
        .groupBy(Edge.V1).sortGroup(Edge.V2, Order.ASCENDING).reduceGroup(new TriadBuilder())
        // filter triads
        .join(edgesById).where(Triad.V2, Triad.V3).equalTo(Edge.V1, Edge.V2).with(new TriadFilter());

    // emit result
    if(fileOutput) {
      triangles.writeAsCsv(outputPath, "\n", ",");
    } else {
      triangles.print();
    }

    // execute program
    env.execute("Basic Triangle Enumeration Example");

  }
View Full Code Here


public class ReduceCompilationTest extends CompilerTestBase implements java.io.Serializable {

  @Test
  public void testAllReduceNoCombiner() {
    try {
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      env.setDegreeOfParallelism(8);
     
      DataSet<Double> data = env.fromElements(0.2, 0.3, 0.4, 0.5).name("source");
     
      data.reduce(new ReduceFunction<Double>() {
       
        @Override
        public Double reduce(Double value1, Double value2){
          return value1 + value2;
        }
      }).name("reducer")
      .print().name("sink");
     
      Plan p = env.createProgramPlan();
      OptimizedPlan op = compileNoStats(p);
     
      OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
     
     
View Full Code Here

  }
 
  @Test
  public void testAllReduceWithCombiner() {
    try {
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      env.setDegreeOfParallelism(8);
     
      DataSet<Long> data = env.generateSequence(1, 8000000).name("source");
     
      data.reduce(new ReduceFunction<Long>() {
       
        @Override
        public Long reduce(Long value1, Long value2){
          return value1 + value2;
        }
      }).name("reducer")
      .print().name("sink");
     
      Plan p = env.createProgramPlan();
      OptimizedPlan op = compileNoStats(p);
     
      OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
     
      // get the original nodes
View Full Code Here

  }
 
  @Test
  public void testGroupedReduceWithFieldPositionKey() {
    try {
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      env.setDegreeOfParallelism(8);
     
      DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class)
        .name("source").setParallelism(6);
     
      data
        .groupBy(1)
        .reduce(new ReduceFunction<Tuple2<String,Double>>() {
        @Override
        public Tuple2<String, Double> reduce(Tuple2<String, Double> value1, Tuple2<String, Double> value2){
          return null;
        }
      }).name("reducer")
      .print().name("sink");
     
      Plan p = env.createProgramPlan();
      OptimizedPlan op = compileNoStats(p);
     
      OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
     
      // get the original nodes
View Full Code Here

  }
 
  @Test
  public void testGroupedReduceWithSelectorFunctionKey() {
    try {
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      env.setDegreeOfParallelism(8);
     
      DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class)
        .name("source").setParallelism(6);
     
      data
        .groupBy(new KeySelector<Tuple2<String,Double>, String>() {
          public String getKey(Tuple2<String, Double> value) { return value.f0; }
        })
        .reduce(new ReduceFunction<Tuple2<String,Double>>() {
        @Override
        public Tuple2<String, Double> reduce(Tuple2<String, Double> value1, Tuple2<String, Double> value2){
          return null;
        }
      }).name("reducer")
      .print().name("sink");
     
      Plan p = env.createProgramPlan();
      OptimizedPlan op = compileNoStats(p);
     
      OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
     
      // get the original nodes
View Full Code Here

//  }
 
  public static void main(String[] args) throws Exception {
    String inputPath = args[0];
   
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
   
    DataSet<MyUser> input = env.createInput(new AvroInputFormat<MyUser>(new Path(inputPath), MyUser.class));
 
    DataSet<Tuple2<String, MyUser>> result = input.map(new NameExtractor()).groupBy(0).reduce(new NameGrouper());
   
    result.output(new DiscardingOuputFormat<Tuple2<String,MyUser>>());
    env.execute();
  }
View Full Code Here

public class JDBCExample {

  public static void main(String[] args) throws Exception {
    prepareTestDb();

    ExecutionEnvironment environment = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<Tuple5> source
        = environment.createInput(JDBCInputFormat.buildJDBCInputFormat()
            .setDrivername("org.apache.derby.jdbc.EmbeddedDriver")
            .setDBUrl("jdbc:derby:memory:ebookshop")
            .setQuery("select * from books")
            .finish(),
            new TupleTypeInfo(Tuple5.class, INT_TYPE_INFO, STRING_TYPE_INFO, STRING_TYPE_INFO, DOUBLE_TYPE_INFO, INT_TYPE_INFO)
        );

    source.output(JDBCOutputFormat.buildJDBCOutputFormat()
        .setDrivername("org.apache.derby.jdbc.EmbeddedDriver")
        .setDBUrl("jdbc:derby:memory:ebookshop")
        .setQuery("insert into newbooks (id,title,author,price,qty) values (?,?,?,?,?)")
        .finish());
    environment.execute();
  }
View Full Code Here

      final int NUM_ITERATIONS = 13;
     
      final int DEFAULT_DOP= 133;
      final int ITERATION_DOP = 77;
     
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
     
      // ------------ construct the test program ------------------
      {
        env.setDegreeOfParallelism(DEFAULT_DOP);
       
        @SuppressWarnings("unchecked")
        DataSet<Tuple3<Double, Long, String>> initialSolutionSet = env.fromElements(new Tuple3<Double, Long, String>(3.44, 5L, "abc"));
 
        @SuppressWarnings("unchecked")
        DataSet<Tuple2<Double, String>> initialWorkSet = env.fromElements(new Tuple2<Double, String>(1.23, "abc"));
       
        DeltaIteration<Tuple3<Double, Long, String>, Tuple2<Double, String>> iteration = initialSolutionSet.iterateDelta(initialWorkSet, NUM_ITERATIONS, ITERATION_KEYS);
        iteration.name(ITERATION_NAME).parallelism(ITERATION_DOP);
       
        iteration.registerAggregator(AGGREGATOR_NAME, LongSumAggregator.class);
       
        // test that multiple workset consumers are supported
        DataSet<Tuple2<Double, String>> worksetSelfJoin =
          iteration.getWorkset()
            .map(new IdentityMapper<Tuple2<Double,String>>())
            .join(iteration.getWorkset()).where(1).equalTo(1).projectFirst(0, 1).types(Double.class, String.class);
       
        DataSet<Tuple3<Double, Long, String>> joined = worksetSelfJoin.join(iteration.getSolutionSet()).where(1).equalTo(2).with(new SolutionWorksetJoin());

        DataSet<Tuple3<Double, Long, String>> result = iteration.closeWith(
            joined,
            joined.map(new NextWorksetMapper()).name(BEFORE_NEXT_WORKSET_MAP));
       
        result.print();
        result.writeAsText("/dev/null");
      }
     
     
      Plan p = env.createProgramPlan(JOB_NAME);
     
      // ------------- validate the plan ----------------
      assertEquals(JOB_NAME, p.getJobName());
      assertEquals(DEFAULT_DOP, p.getDefaultParallelism());
     
View Full Code Here

  }
 
  @Test
  public void testRejectWhenSolutionSetKeysDontMatch() {
    try {
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
     
      @SuppressWarnings("unchecked")
      DataSet<Tuple3<Double, Long, String>> initialSolutionSet = env.fromElements(new Tuple3<Double, Long, String>(3.44, 5L, "abc"));

      @SuppressWarnings("unchecked")
      DataSet<Tuple2<Double, String>> initialWorkSet = env.fromElements(new Tuple2<Double, String>(1.23, "abc"));
     
      DeltaIteration<Tuple3<Double, Long, String>, Tuple2<Double, String>> iteration = initialSolutionSet.iterateDelta(initialWorkSet, 10, 1);
     
      try {
        iteration.getWorkset().join(iteration.getSolutionSet()).where(1).equalTo(2);
View Full Code Here

   * A mapper that preserves all fields over a tuple data set.
   */
  @Test
  public void translateUnaryFunctionAnnotationTuplesWildCard() {
    try {
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
     
      @SuppressWarnings("unchecked")
      DataSet<Tuple3<Long, String, Integer>> input = env.fromElements(new Tuple3<Long, String, Integer>(3l, "test", 42));
      input.map(new WildcardConstantMapper<Tuple3<Long,String,Integer>>()).print();
     
      Plan plan = env.createProgramPlan();
     
      GenericDataSinkBase<?> sink = plan.getDataSinks().iterator().next();
      MapOperatorBase<?, ?, ?> mapper = (MapOperatorBase<?, ?, ?>) sink.getInput();
     
      SingleInputSemanticProperties semantics = mapper.getSemanticProperties();
View Full Code Here

TOP

Related Classes of eu.stratosphere.api.java.ExecutionEnvironment

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.