public void testAsGroupByValue() throws Exception {
FileSystem.get(new Configuration()).delete(new Path("/tmp/input"), true);
FileSystem.get(new Configuration()).delete(new Path("/tmp/output"), true);
Tap t = new Hfs(new ProtobufScheme("value", Example.Person.class), "/tmp/input");
TupleEntryCollector tec = t.openForWrite(new HadoopFlowProcess(new JobConf()));
HashSet<Tuple> expectedTuples = new HashSet<Tuple>(){{
add(new Tuple(Example.Person.newBuilder().setName("bryan").setId(1).build()));
add(new Tuple(Example.Person.newBuilder().setName("lucas").setId(2).build()));
}};
for (Tuple tuple : expectedTuples) {
tec.add(tuple);
}
tec.close();
Pipe inPipe = new Pipe("input");
Pipe injectedPipe = new Each(inPipe, Fields.NONE, new Insert(new Fields("key"), 7), new Fields("key", "value"));
Pipe groupByPipe = new GroupBy(injectedPipe, new Fields("key"));
Hfs sink = new Hfs(new ProtobufScheme("value", Example.Person.class), "/tmp/output");
Map<Object, Object> properties = new HashMap<Object, Object>(){{
put("io.serializations", new JobConf().get("io.serializations") + "," + ProtobufSerialization.class.getName());
}};
new HadoopFlowConnector(properties).connect(t, sink, groupByPipe).complete();
TupleEntryIterator tei = sink.openForRead(new HadoopFlowProcess(new JobConf()));
Set<Tuple> tuples = new HashSet<Tuple>();
while (tei.hasNext()) {
tuples.add(tei.next().getTupleCopy());
}