// Note that the order in which we defined the fields of the Schema is not relevant here
cg.setGroupByFields("topic", "word");
// Here we instantiate a mapper with stop words:
// Note that we don't need to use the DistributedCache for that becasuse mappers, reducers, etc themselves are instantiable
StopWordMapper mapper = new StopWordMapper(stopWords);
cg.addInput(new Path(args[0]), new HadoopInputFormat(TextInputFormat.class), mapper);
// We'll use a TupleOutputFormat with the same schema than the intermediate schema
cg.setTupleOutput(new Path(args[1]), TopicalWordCount.getSchema());
cg.setTupleReducer(new CountReducer());
cg.setTupleCombiner(new CountReducer());