Package eu.stratosphere.api.java

Examples of eu.stratosphere.api.java.ExecutionEnvironment


   * A mapper that preserves fields 0, 1, 2 of a tuple data set.
   */
  @Test
  public void translateUnaryFunctionAnnotationTuples() {
    try {
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
     
      @SuppressWarnings("unchecked")
      DataSet<Tuple3<Long, String, Integer>> input = env.fromElements(new Tuple3<Long, String, Integer>(3l, "test", 42));
      input.map(new IndividualConstantMapper<Long, String, Integer>()).print();
     
      Plan plan = env.createProgramPlan();
     
      GenericDataSinkBase<?> sink = plan.getDataSinks().iterator().next();
      MapOperatorBase<?, ?, ?> mapper = (MapOperatorBase<?, ?, ?>) sink.getInput();
     
      SingleInputSemanticProperties semantics = mapper.getSemanticProperties();
View Full Code Here


   * A join that preserves tuple fields from both sides.
   */
  @Test
  public void translateBinaryFunctionAnnotationTuples() {
    try {
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
     
      @SuppressWarnings("unchecked")
      DataSet<Tuple2<Long, String>> input1 = env.fromElements(new Tuple2<Long, String>(3l, "test"));
      @SuppressWarnings("unchecked")
      DataSet<Tuple2<Long, Double>> input2 = env.fromElements(new Tuple2<Long, Double>(3l, 3.1415));
     
      input1.join(input2).where(0).equalTo(0).with(new ForwardingTupleJoin<Long, String, Long, Double>())
        .print();
     
      Plan plan = env.createProgramPlan();
     
      GenericDataSinkBase<?> sink = plan.getDataSinks().iterator().next();
      JoinOperatorBase<?, ?, ?, ?> join = (JoinOperatorBase<?, ?, ?, ?>) sink.getInput();
     
      DualInputSemanticProperties semantics = join.getSemanticProperties();
View Full Code Here

public class GroupReduceCompilationTest extends CompilerTestBase implements java.io.Serializable {

  @Test
  public void testAllGroupReduceNoCombiner() {
    try {
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      env.setDegreeOfParallelism(8);
     
      DataSet<Double> data = env.fromElements(0.2, 0.3, 0.4, 0.5).name("source");
     
      data.reduceGroup(new GroupReduceFunction<Double, Double>() {
        public void reduce(Iterator<Double> values, Collector<Double> out) {}
      }).name("reducer")
      .print().name("sink");
     
      Plan p = env.createProgramPlan();
      OptimizedPlan op = compileNoStats(p);
     
      OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
     
     
View Full Code Here

  @Test
  public void translateNonGroupedReduce() {
    try {
      final int DOP = 8;
      ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(DOP);
     
      DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env);
     
      initialData.reduce(new ReduceFunction<Tuple3<Double,StringValue,LongValue>>() {
        public Tuple3<Double, StringValue, LongValue> reduce(Tuple3<Double, StringValue, LongValue> value1, Tuple3<Double, StringValue, LongValue> value2) {
          return value1;
        }
      }).print();
     
      Plan p = env.createProgramPlan();
     
      GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();
     
      ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput();
     
View Full Code Here

 
  @Test
  public void translateGroupedReduceNoMapper() {
    try {
      final int DOP = 8;
      ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(DOP);
     
      DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env);
     
      initialData
        .groupBy(2)
        .reduce(new ReduceFunction<Tuple3<Double,StringValue,LongValue>>() {
          public Tuple3<Double, StringValue, LongValue> reduce(Tuple3<Double, StringValue, LongValue> value1, Tuple3<Double, StringValue, LongValue> value2) {
            return value1;
          }
        })
        .print();
     
      Plan p = env.createProgramPlan();
     
      GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();
     
      ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput();
     
View Full Code Here

public class SpargelCompilerTest extends CompilerTestBase {

//  @Test
  public void testSpargelCompiler() {
    try {
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      env.setDegreeOfParallelism(DEFAULT_PARALLELISM);
      // compose test program
      {
        DataSet<Long> vertexIds = env.generateSequence(1, 2);
       
        @SuppressWarnings("unchecked")
        DataSet<Tuple2<Long, Long>> edges = env.fromElements(new Tuple2<Long, Long>(1L, 2L));
       
        DataSet<Tuple2<Long, Long>> initialVertices = vertexIds.map(new IdAssigner());
        DataSet<Tuple2<Long, Long>> result = initialVertices.runOperation(VertexCentricIteration.withPlainEdges(edges, new CCUpdater(), new CCMessager(), 100));
       
        result.print();
      }
     
      Plan p = env.createProgramPlan("Spargel Connected Components");
      OptimizedPlan op = compileNoStats(p);
     
      // check the sink
      SinkPlanNode sink = op.getDataSinks().iterator().next();
      assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
View Full Code Here

 
  @Test
  public void translateGroupedReduceWithkeyExtractor() {
    try {
      final int DOP = 8;
      ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(DOP);
     
      DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env);
     
      initialData
        .groupBy(new KeySelector<Tuple3<Double,StringValue,LongValue>, StringValue>() {
          public StringValue getKey(Tuple3<Double, StringValue, LongValue> value) {
            return value.f1;
          }
        })
        .reduce(new ReduceFunction<Tuple3<Double,StringValue,LongValue>>() {
          public Tuple3<Double, StringValue, LongValue> reduce(Tuple3<Double, StringValue, LongValue> value1, Tuple3<Double, StringValue, LongValue> value2) {
            return value1;
          }
        }).setParallelism(4)
        .print();
     
      Plan p = env.createProgramPlan();
     
      GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();
     
     
      MapOperatorBase<?, ?, ?> keyProjector = (MapOperatorBase<?, ?, ?>) sink.getInput();
View Full Code Here

  }
 
  @Test
  public void testAllReduceWithCombiner() {
    try {
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      env.setDegreeOfParallelism(8);
     
      DataSet<Long> data = env.generateSequence(1, 8000000).name("source");
     
      ReduceGroupOperator<Long, Long> reduced = data.reduceGroup(new GroupReduceFunction<Long, Long>() {
        public void reduce(Iterator<Long> values, Collector<Long> out) {}
      }).name("reducer");
     
      reduced.setCombinable(true);
      reduced.print().name("sink");
     
      Plan p = env.createProgramPlan();
      OptimizedPlan op = compileNoStats(p);
     
      OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
     
      // get the original nodes
View Full Code Here

  public void testSpargelCompilerWithBroadcastVariable() {
    try {
      final String BC_VAR_NAME = "borat variable";
     
     
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      env.setDegreeOfParallelism(DEFAULT_PARALLELISM);
      // compose test program
      {
        DataSet<Long> bcVar = env.fromElements(1L);
       
        DataSet<Long> vertexIds = env.generateSequence(1, 2);
       
        @SuppressWarnings("unchecked")
        DataSet<Tuple2<Long, Long>> edges = env.fromElements(new Tuple2<Long, Long>(1L, 2L));
       
        DataSet<Tuple2<Long, Long>> initialVertices = vertexIds.map(new IdAssigner());
       
        VertexCentricIteration<Long, Long, Long, ?> vcIter = VertexCentricIteration.withPlainEdges(edges, new CCUpdater(), new CCMessager(), 100);
        vcIter.addBroadcastSetForMessagingFunction(BC_VAR_NAME, bcVar);
        vcIter.addBroadcastSetForUpdateFunction(BC_VAR_NAME, bcVar);
       
        DataSet<Tuple2<Long, Long>> result = initialVertices.runOperation(vcIter);
       
        result.print();
      }
     
      Plan p = env.createProgramPlan("Spargel Connected Components");
      OptimizedPlan op = compileNoStats(p);
     
      // check the sink
      SinkPlanNode sink = op.getDataSinks().iterator().next();
      assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
View Full Code Here

 
 
  @Test
  public void testGroupedReduceWithFieldPositionKeyNonCombinable() {
    try {
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      env.setDegreeOfParallelism(8);
     
      DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class)
        .name("source").setParallelism(6);
     
      data
        .groupBy(1)
        .reduceGroup(new GroupReduceFunction<Tuple2<String, Double>, Tuple2<String, Double>>() {
        public void reduce(Iterator<Tuple2<String, Double>> values, Collector<Tuple2<String, Double>> out) {}
      }).name("reducer")
      .print().name("sink");
     
      Plan p = env.createProgramPlan();
      OptimizedPlan op = compileNoStats(p);
     
      OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
     
      // get the original nodes
View Full Code Here

TOP

Related Classes of eu.stratosphere.api.java.ExecutionEnvironment

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.