UserGroupInformation.setConfiguration(conf);
String user = UserGroupInformation.getCurrentUser().getShortUserName();
TezConfiguration tezConf = new TezConfiguration(conf);
TezClient tezClient = new TezClient(tezConf);
ApplicationId appId = tezClient.createApplication();
OrderedWordCount instance = new OrderedWordCount();
FileSystem fs = FileSystem.get(conf);
String stagingDirStr = Path.SEPARATOR + "user" + Path.SEPARATOR
+ user + Path.SEPARATOR+ ".staging" + Path.SEPARATOR
+ Path.SEPARATOR + appId.toString();
Path stagingDir = new Path(stagingDirStr);
tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDirStr);
stagingDir = fs.makeQualified(stagingDir);
TokenCache.obtainTokensForNamenodes(instance.credentials, new Path[] {stagingDir}, conf);
TezClientUtils.ensureStagingDirExists(tezConf, stagingDir);
tezConf.set(TezConfiguration.TEZ_AM_JAVA_OPTS,
MRHelpers.getMRAMJavaOpts(conf));
// No need to add jar containing this class as assumed to be part of
// the tez jars.
// TEZ-674 Obtain tokens based on the Input / Output paths. For now assuming staging dir
// is the same filesystem as the one used for Input/Output.
TezSession tezSession = null;
AMConfiguration amConfig = new AMConfiguration(null,
null, tezConf, instance.credentials);
if (useTezSession) {
LOG.info("Creating Tez Session");
TezSessionConfiguration sessionConfig =
new TezSessionConfiguration(amConfig, tezConf);
tezSession = new TezSession("OrderedWordCountSession", appId,
sessionConfig);
tezSession.start();
LOG.info("Created Tez Session");
}
DAGStatus dagStatus = null;
DAGClient dagClient = null;
String[] vNames = { "initialmap", "intermediate_reducer",
"finalreduce" };
Set<StatusGetOpts> statusGetOpts = EnumSet.of(StatusGetOpts.GET_COUNTERS);
try {
for (int dagIndex = 1; dagIndex <= inputPaths.size(); ++dagIndex) {
if (dagIndex != 1
&& interJobSleepTimeout > 0) {
try {
LOG.info("Sleeping between jobs, sleepInterval="
+ (interJobSleepTimeout/1000));
Thread.sleep(interJobSleepTimeout);
} catch (InterruptedException e) {
LOG.info("Main thread interrupted. Breaking out of job loop");
break;
}
}
String inputPath = inputPaths.get(dagIndex-1);
String outputPath = outputPaths.get(dagIndex-1);
if (fs.exists(new Path(outputPath))) {
throw new FileAlreadyExistsException("Output directory "
+ outputPath + " already exists");
}
LOG.info("Running OrderedWordCount DAG"
+ ", dagIndex=" + dagIndex
+ ", inputPath=" + inputPath
+ ", outputPath=" + outputPath);
Map<String, LocalResource> localResources =
new TreeMap<String, LocalResource>();
DAG dag = instance.createDAG(fs, conf, localResources,
stagingDir, dagIndex, inputPath, outputPath,
generateSplitsInClient);
boolean doPreWarm = dagIndex == 1 && useTezSession
&& conf.getBoolean("PRE_WARM_SESSION", true);
int preWarmNumContainers = 0;
if (doPreWarm) {
preWarmNumContainers = conf.getInt("PRE_WARM_NUM_CONTAINERS", 0);
if (preWarmNumContainers <= 0) {
doPreWarm = false;
}
}
if (doPreWarm) {
LOG.info("Pre-warming Session");
VertexLocationHint vertexLocationHint =
new VertexLocationHint(null);
ProcessorDescriptor sleepProcDescriptor =
new ProcessorDescriptor(SleepProcessor.class.getName());
SleepProcessor.SleepProcessorConfig sleepProcessorConfig =
new SleepProcessor.SleepProcessorConfig(4000);
sleepProcDescriptor.setUserPayload(
sleepProcessorConfig.toUserPayload());
PreWarmContext context = new PreWarmContext(sleepProcDescriptor,
dag.getVertex("initialmap").getTaskResource(), preWarmNumContainers,
vertexLocationHint);
Map<String, LocalResource> contextLocalRsrcs =
new TreeMap<String, LocalResource>();
contextLocalRsrcs.putAll(
dag.getVertex("initialmap").getTaskLocalResources());
Map<String, String> contextEnv = new TreeMap<String, String>();
contextEnv.putAll(dag.getVertex("initialmap").getTaskEnvironment());
String contextJavaOpts =
dag.getVertex("initialmap").getJavaOpts();
context
.setLocalResources(contextLocalRsrcs)
.setJavaOpts(contextJavaOpts)
.setEnvironment(contextEnv);
tezSession.preWarm(context);
}
if (useTezSession) {
LOG.info("Waiting for TezSession to get into ready state");
waitForTezSessionReady(tezSession);
LOG.info("Submitting DAG to Tez Session, dagIndex=" + dagIndex);
dagClient = tezSession.submitDAG(dag);
LOG.info("Submitted DAG to Tez Session, dagIndex=" + dagIndex);
} else {
LOG.info("Submitting DAG as a new Tez Application");
dagClient = tezClient.submitDAGApplication(dag, amConfig);
}
while (true) {
dagStatus = dagClient.getDAGStatus(statusGetOpts);
if (dagStatus.getState() == DAGStatus.State.RUNNING ||