Package eu.stratosphere.api.java

Examples of eu.stratosphere.api.java.ExecutionEnvironment


  }
 
  @Test
  public void testGroupedReduceWithFieldPositionKeyCombinable() {
    try {
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      env.setDegreeOfParallelism(8);
     
      DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class)
        .name("source").setParallelism(6);
     
      ReduceGroupOperator<Tuple2<String, Double>, Tuple2<String, Double>> reduced = data
          .groupBy(1)
          .reduceGroup(new GroupReduceFunction<Tuple2<String, Double>, Tuple2<String, Double>>() {
        public void reduce(Iterator<Tuple2<String, Double>> values, Collector<Tuple2<String, Double>> out) {}
      }).name("reducer");
     
      reduced.setCombinable(true);
      reduced.print().name("sink");
     
      Plan p = env.createProgramPlan();
      OptimizedPlan op = compileNoStats(p);
     
      OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
     
      // get the original nodes
View Full Code Here


  @Test
  public void translateAggregate() {
    try {
      final int DOP = 8;
      ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(DOP);
     
      @SuppressWarnings("unchecked")
      DataSet<Tuple3<Double, StringValue, Long>> initialData =
          env.fromElements(new Tuple3<Double, StringValue, Long>(3.141592, new StringValue("foobar"), new Long(77)));
     
      initialData.groupBy(0).aggregate(Aggregations.MIN, 1).and(Aggregations.SUM, 2).print();
     
      Plan p = env.createProgramPlan();
     
      GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();
     
      GroupReduceOperatorBase<?, ?, ?> reducer = (GroupReduceOperatorBase<?, ?, ?>) sink.getInput();
     
View Full Code Here

  }
 
  @Test
  public void testGroupedReduceWithSelectorFunctionKeyNoncombinable() {
    try {
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      env.setDegreeOfParallelism(8);
     
      DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class)
        .name("source").setParallelism(6);
     
      data
        .groupBy(new KeySelector<Tuple2<String,Double>, String>() {
          public String getKey(Tuple2<String, Double> value) { return value.f0; }
        })
        .reduceGroup(new GroupReduceFunction<Tuple2<String, Double>, Tuple2<String, Double>>() {
        public void reduce(Iterator<Tuple2<String, Double>> values, Collector<Tuple2<String, Double>> out) {}
      }).name("reducer")
      .print().name("sink");
     
      Plan p = env.createProgramPlan();
      OptimizedPlan op = compileNoStats(p);
     
      OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
     
      // get the original nodes
View Full Code Here

  }
 
  @Test
  public void testGroupedReduceWithSelectorFunctionKeyCombinable() {
    try {
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      env.setDegreeOfParallelism(8);
     
      DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class)
        .name("source").setParallelism(6);
     
      ReduceGroupOperator<Tuple2<String, Double>, Tuple2<String, Double>> reduced = data
        .groupBy(new KeySelector<Tuple2<String,Double>, String>() {
          public String getKey(Tuple2<String, Double> value) { return value.f0; }
        })
        .reduceGroup(new GroupReduceFunction<Tuple2<String, Double>, Tuple2<String, Double>>() {
        public void reduce(Iterator<Tuple2<String, Double>> values, Collector<Tuple2<String, Double>> out) {}
      }).name("reducer");
     
      reduced.setCombinable(true);
      reduced.print().name("sink");
     
      Plan p = env.createProgramPlan();
      OptimizedPlan op = compileNoStats(p);
     
      OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
     
      // get the original nodes
View Full Code Here

  private final List<Long> emptyLongData = new ArrayList<Long>();
 
  @Test
  public void testFieldsAggregate() {
   
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo);

    // should work
    try {
      tupleDs.aggregate(Aggregations.SUM, 1);
    } catch(Exception e) {
      Assert.fail();
    }
   
    // should not work: index out of bounds
    try {
      tupleDs.aggregate(Aggregations.SUM, 10);
      Assert.fail();
    } catch(IllegalArgumentException iae) {
      // we're good here
    } catch(Exception e) {
      Assert.fail();
    }
   
    // should not work: not applied to tuple dataset
    DataSet<Long> longDs = env.fromCollection(emptyLongData, BasicTypeInfo.LONG_TYPE_INFO);
    try {
      longDs.aggregate(Aggregations.MIN, 1);
      Assert.fail();
    } catch(InvalidProgramException uoe) {
      // we're good here
View Full Code Here

  }
 
  @Test
  public void testAggregationTypes() {
    try {
      final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo);
     
      // should work: multiple aggregates
      tupleDs.aggregate(Aggregations.SUM, 0).and(Aggregations.MIN, 4);

      // should work: nested aggregates
View Full Code Here

  }

  @Test
  public void testCrossProjection1() {

    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo);
    DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 = env.fromCollection(emptyTupleData, tupleTypeInfo);

    // should work
    try {
      ds1.cross(ds2)
        .projectFirst(0)
View Full Code Here

  }

  @Test
  public void testCrossProjection2() {

    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo);
    DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 = env.fromCollection(emptyTupleData, tupleTypeInfo);

    // should work
    try {
      ds1.cross(ds2)
        .projectFirst(0,3)
View Full Code Here

  }

  @Test
  public void testCrossProjection3() {

    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo);
    DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 = env.fromCollection(emptyTupleData, tupleTypeInfo);

    // should work
    try {
      ds1.cross(ds2)
        .projectFirst(0)
View Full Code Here

  }

  @Test
  public void testCrossProjection4() {

    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo);
    DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 = env.fromCollection(emptyTupleData, tupleTypeInfo);

    // should work
    try {
      ds1.cross(ds2)
        .projectFirst(0,2)
View Full Code Here

TOP

Related Classes of eu.stratosphere.api.java.ExecutionEnvironment

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.