System.err.println();
GenericOptionsParser.printGenericCommandUsage(System.err);
return 1;
}
// Create an object to coordinate pipeline creation and execution.
Pipeline pipeline = new MRPipeline(SecondarySortExample.class, getConf());
// Reference a given text file as a collection of Strings.
PCollection<String> lines = pipeline.readTextFile(args[0]);
// Define a function that parses each line in a PCollection of Strings into
// a pair of pairs, the first of which will be grouped by (first member) and
// the sorted by (second memeber). The second pair is payload which can be
// passed in an Iterable object.
PTable<String, Pair<Long, String>> pairs = lines.parallelDo("extract_records",
new DoFn<String, Pair<String, Pair<Long, String>>>() {
@Override
public void process(String line, Emitter<Pair<String, Pair<Long, String>>> emitter) {
int i = 0;
String key = "";
long timestamp = 0;
String value = "";
for (String element : INPUT_SPLITTER.split(line)) {
switch (++i) {
case 1:
key = element;
break;
case 2:
try {
timestamp = Long.parseLong(element);
} catch (NumberFormatException e) {
System.out.println("Timestamp not in long format '" + line + "'");
this.increment(COUNTERS.CORRUPT_TIMESTAMP);
}
break;
case 3:
value = element;
break;
default:
System.err.println("i = " + i + " should never happen!");
break;
}
}
if (i == 3) {
Long sortby = new Long(timestamp);
emitter.emit(Pair.of(key, Pair.of(sortby, value)));
} else {
this.increment(COUNTERS.CORRUPT_LINE);
}
}}, Avros.tableOf(Avros.strings(), Avros.pairs(Avros.longs(), Avros.strings())));
// The output of the above input will be (with one reducer):
// one : [[-10,garbage],[-5,10],[1,1],[2,-3]]
// three : [[0,-1]]
// two : [[1,7,9],[2,6],[4,5]]
SecondarySort.sortAndApply(pairs,
new DoFn<Pair<String, Iterable<Pair<Long, String>>>, String>() {
final StringBuilder sb = new StringBuilder();
@Override
public void process(Pair<String, Iterable<Pair<Long, String>>> input, Emitter<String> emitter) {
sb.setLength(0);
sb.append(input.first());
sb.append(" : [");
boolean first = true;
for(Pair<Long, String> pair : input.second()) {
if (first) {
first = false;
} else {
sb.append(',');
}
sb.append(pair);
}
sb.append("]");
emitter.emit(sb.toString());
}
}, Writables.strings()).write(To.textFile(args[1]));
// Execute the pipeline as a MapReduce.
return pipeline.done().succeeded() ? 0 : 1;
}