switch(progId) {
case 1: {
/*
* Test standard counting with combiner
*/
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple2<IntWritable, IntWritable>> ds = HadoopTestData.getKVPairDataSet(env).
map(new MapFunction<Tuple2<IntWritable, Text>, Tuple2<IntWritable, IntWritable>>() {
private static final long serialVersionUID = 1L;
Tuple2<IntWritable,IntWritable> outT = new Tuple2<IntWritable,IntWritable>();
@Override
public Tuple2<IntWritable, IntWritable> map(Tuple2<IntWritable, Text> v)
throws Exception {
outT.f0 = new IntWritable(v.f0.get() / 6);
outT.f1 = new IntWritable(1);
return outT;
}
});
DataSet<Tuple2<IntWritable, IntWritable>> counts = ds.
groupBy(0).
reduceGroup(new HadoopReduceCombineFunction<IntWritable, IntWritable, IntWritable, IntWritable>(
new SumReducer(), new SumReducer()));
counts.writeAsText(resultPath);
env.execute();
// return expected result
return "(0,5)\n"+
"(1,6)\n" +
"(2,6)\n" +
"(3,4)\n";
}
case 2: {
/*
* Test ungrouped Hadoop reducer
*/
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple2<IntWritable, IntWritable>> ds = HadoopTestData.getKVPairDataSet(env).
map(new MapFunction<Tuple2<IntWritable, Text>, Tuple2<IntWritable, IntWritable>>() {
private static final long serialVersionUID = 1L;
Tuple2<IntWritable,IntWritable> outT = new Tuple2<IntWritable,IntWritable>();
@Override
public Tuple2<IntWritable, IntWritable> map(Tuple2<IntWritable, Text> v)
throws Exception {
outT.f0 = new IntWritable(0);
outT.f1 = v.f0;
return outT;
}
});
DataSet<Tuple2<IntWritable, IntWritable>> sum = ds.
reduceGroup(new HadoopReduceCombineFunction<IntWritable, IntWritable, IntWritable, IntWritable>(
new SumReducer(), new SumReducer()));
sum.writeAsText(resultPath);
env.execute();
// return expected result
return "(0,231)\n";
}
case 3: {
/* Test combiner */
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple2<IntWritable, IntWritable>> ds = HadoopTestData.getKVPairDataSet(env).
map(new MapFunction<Tuple2<IntWritable, Text>, Tuple2<IntWritable, IntWritable>>() {
private static final long serialVersionUID = 1L;
Tuple2<IntWritable,IntWritable> outT = new Tuple2<IntWritable,IntWritable>();
@Override
public Tuple2<IntWritable, IntWritable> map(Tuple2<IntWritable, Text> v)
throws Exception {
outT.f0 = v.f0;
outT.f1 = new IntWritable(1);
return outT;
}
});
DataSet<Tuple2<IntWritable, IntWritable>> counts = ds.
groupBy(0).
reduceGroup(new HadoopReduceCombineFunction<IntWritable, IntWritable, IntWritable, IntWritable>(
new SumReducer(), new KeyChangingReducer()));
counts.writeAsText(resultPath);
env.execute();
// return expected result
return "(0,5)\n"+
"(1,6)\n" +
"(2,5)\n" +
"(3,5)\n";
}
case 4: {
/*
* Test configuration via JobConf
*/
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
JobConf conf = new JobConf();
conf.set("my.cntPrefix", "Hello");
DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env).
map(new MapFunction<Tuple2<IntWritable, Text>, Tuple2<IntWritable, Text>>() {
private static final long serialVersionUID = 1L;
@Override
public Tuple2<IntWritable, Text> map(Tuple2<IntWritable, Text> v)
throws Exception {
v.f0 = new IntWritable(v.f0.get() % 5);
return v;
}
});
DataSet<Tuple2<IntWritable, IntWritable>> hellos = ds.
groupBy(0).
reduceGroup(new HadoopReduceFunction<IntWritable, Text, IntWritable, IntWritable>(
new ConfigurableCntReducer(), conf));
hellos.writeAsText(resultPath);
env.execute();
// return expected result
return "(0,0)\n"+
"(1,0)\n" +
"(2,1)\n" +