import static org.junit.Assert.assertEquals;
public class SparkSecondarySortIT extends CrunchTestSupport implements Serializable {
@Test
public void testSecondarySort() throws Exception {
Pipeline p = new SparkPipeline("local", "secondarysort");
String inputFile = tempDir.copyResourceFileName("secondary_sort_input.txt");
PTable<String, Pair<Integer, Integer>> in = p.read(From.textFile(inputFile))
.parallelDo(new MapFn<String, Pair<String, Pair<Integer, Integer>>>() {
@Override
public Pair<String, Pair<Integer, Integer>> map(String input) {
String[] pieces = input.split(",");
return Pair.of(pieces[0],
Pair.of(Integer.valueOf(pieces[1].trim()), Integer.valueOf(pieces[2].trim())));
}
}, tableOf(strings(), pairs(ints(), ints())));
Iterable<String> lines = SecondarySort.sortAndApply(in, new MapFn<Pair<String, Iterable<Pair<Integer, Integer>>>, String>() {
@Override
public String map(Pair<String, Iterable<Pair<Integer, Integer>>> input) {
Joiner j = Joiner.on(',');
return j.join(input.first(), j.join(input.second()));
}
}, strings()).materialize();
assertEquals(ImmutableList.of("one,[-5,10],[1,1],[2,-3]", "three,[0,-1]", "two,[1,7],[2,6],[4,5]"),
ImmutableList.copyOf(lines));
p.done();
}