*/
public static <T, N extends Number> PCollection<Pair<Integer, T>> groupedWeightedReservoirSample(
PTable<Integer, Pair<T, N>> input,
int[] sampleSizes,
Long seed) {
PTypeFamily ptf = input.getTypeFamily();
PType<T> ttype = (PType<T>) input.getPTableType().getValueType().getSubTypes().get(0);
PTableType<Integer, Pair<Double, T>> ptt = ptf.tableOf(ptf.ints(),
ptf.pairs(ptf.doubles(), ttype));
return input.parallelDo("Initial reservoir sampling", new ReservoirSampleFn<T, N>(sampleSizes, seed, ttype), ptt)
.groupByKey(1)
.combineValues(new WRSCombineFn<T>(sampleSizes, ttype))
.parallelDo("Extract sampled values", new MapFn<Pair<Integer, Pair<Double, T>>, Pair<Integer, T>>() {
@Override
public Pair<Integer, T> map(Pair<Integer, Pair<Double, T>> p) {
return Pair.of(p.first(), p.second().second());
}
}, ptf.pairs(ptf.ints(), ttype));
}