TupleMRBuilder mr = new TupleMRBuilder(conf, "Pangool Topical Word Count");
mr.addIntermediateSchema(getSchema());
// We will count each (topicId, word) pair
// Note that the order in which we defined the fields of the Schema is not relevant here
mr.setGroupByFields("topic", "word");
mr.addInput(new Path(args[0]), new HadoopInputFormat(TextInputFormat.class), new TokenizeMapper());
// We'll use a TupleOutputFormat with the same schema than the intermediate schema
mr.setTupleOutput(new Path(args[1]), getSchema());
mr.setTupleReducer(new CountReducer());
mr.setTupleCombiner(new CountReducer());