System.err.println();
GenericOptionsParser.printGenericCommandUsage(System.err);
return 1;
}
// Create an object to coordinate pipeline creation and execution.
Pipeline pipeline = new MRPipeline(AverageBytesByIP.class, getConf());
// Reference a given text file as a collection of Strings.
PCollection<String> lines = pipeline.readTextFile(args[0]);
// Aggregator used for summing up response size and count
Aggregator<Pair<Long, Long>> agg = pairAggregator(SUM_LONGS(), SUM_LONGS());
// Table of (ip, sum(response size), count)
PTable<String, Pair<Long, Long>> remoteAddrResponseSize = lines
.parallelDo(extractResponseSize,
Writables.tableOf(Writables.strings(), Writables.pairs(Writables.longs(), Writables.longs()))).groupByKey()
.combineValues(agg);
// Calculate average response size by ip address
PTable<String, Double> avgs = remoteAddrResponseSize.parallelDo(calulateAverage,
Writables.tableOf(Writables.strings(), Writables.doubles()));
// write the result to a text file
pipeline.writeTextFile(avgs, args[1]);
// Execute the pipeline as a MapReduce.
PipelineResult result = pipeline.done();
return result.succeeded() ? 0 : 1;
}