mr.addIntermediateSchema(getSchema());
mr.setGroupByFields("my_avro");
// here the custom comparator that groups by "topic,word" is used.
MyAvroComparator customComp = new MyAvroComparator(getAvroSchema(), "topic", "word");
mr.setOrderBy(new OrderBy().add("my_avro", Order.ASC, customComp));
mr.addInput(new Path(args[0]), new HadoopInputFormat(TextInputFormat.class), new TokenizeMapper());
// We'll use a TupleOutputFormat with the same schema than the intermediate schema
mr.setTupleOutput(new Path(args[1]), getSchema());
mr.setTupleReducer(new CountReducer());
mr.setTupleCombiner(new CountReducer());