throws Exception {
assert mWork.getAliasToWork().keySet().size() == 1;
String alias = mWork.getAliases().get(0);
Operator<?> topOp = mWork.getAliasToWork().get(alias);
PartitionDesc partDesc = mWork.getAliasToPartnInfo().get(alias);
ArrayList<String> paths = mWork.getPaths();
ArrayList<PartitionDesc> parts = mWork.getPartitionDescs();
Path onePath = new Path(paths.get(0));
String tmpPath = context.getCtx().getExternalTmpFileURI(onePath.toUri());
Path partitionFile = new Path(tmpPath, ".partitions");
ShimLoader.getHadoopShims().setTotalOrderPartitionFile(job, partitionFile);
PartitionKeySampler sampler = new PartitionKeySampler();
if (mWork.getSamplingType() == MapWork.SAMPLING_ON_PREV_MR) {
console.printInfo("Use sampling data created in previous MR");
// merges sampling data from previous MR and make paritition keys for total sort
for (String path : paths) {
Path inputPath = new Path(path);
FileSystem fs = inputPath.getFileSystem(job);
for (FileStatus status : fs.globStatus(new Path(inputPath, ".sampling*"))) {
sampler.addSampleFile(status.getPath(), job);
}
}
} else if (mWork.getSamplingType() == MapWork.SAMPLING_ON_START) {
console.printInfo("Creating sampling data..");
assert topOp instanceof TableScanOperator;
TableScanOperator ts = (TableScanOperator) topOp;
FetchWork fetchWork;
if (!partDesc.isPartitioned()) {
assert paths.size() == 1;
fetchWork = new FetchWork(paths.get(0), partDesc.getTableDesc());
} else {
fetchWork = new FetchWork(paths, parts, partDesc.getTableDesc());
}
fetchWork.setSource(ts);
// random sampling
FetchOperator fetcher = PartitionKeySampler.createSampler(fetchWork, conf, job, ts);