ApplicationBundler appBundler = new ApplicationBundler(ImmutableList.of("org.apache.hadoop"),
ImmutableList.of("org.apache.hadoop.hbase",
"org.apache.hadoop.hive"));
Id.Program programId = context.getProgram().getId();
Location jobJar =
locationFactory.create(String.format("%s.%s.%s.%s.%s.jar",
ProgramType.MAPREDUCE.name().toLowerCase(),
programId.getAccountId(), programId.getApplicationId(),
programId.getId(), context.getRunId().getId()));
LOG.debug("Creating Job jar: {}", jobJar.toURI());
Set<Class<?>> classes = Sets.newHashSet();
classes.add(MapReduce.class);
classes.add(DataSetOutputFormat.class);
classes.add(DataSetInputFormat.class);
classes.add(TextStreamInputFormat.class);
classes.add(MapperWrapper.class);
classes.add(ReducerWrapper.class);
Job jobConf = context.getHadoopJob();
try {
Class<? extends InputFormat<?, ?>> inputFormatClass = jobConf.getInputFormatClass();
LOG.info("InputFormat class: {} {}", inputFormatClass, inputFormatClass.getClassLoader());
classes.add(inputFormatClass);
} catch (Throwable t) {
LOG.info("InputFormat class not found: {}", t.getMessage(), t);
// Ignore
}
try {
Class<? extends OutputFormat<?, ?>> outputFormatClass = jobConf.getOutputFormatClass();
LOG.info("OutputFormat class: {} {}", outputFormatClass, outputFormatClass.getClassLoader());
classes.add(outputFormatClass);
} catch (Throwable t) {
LOG.info("OutputFormat class not found: {}", t.getMessage(), t);
// Ignore
}
try {
Class<?> hbaseTableUtilClass = new HBaseTableUtilFactory().get().getClass();
classes.add(hbaseTableUtilClass);
} catch (ProvisionException e) {
LOG.warn("Not including HBaseTableUtil classes in submitted Job Jar since they are not available");
}
ClassLoader oldCLassLoader = Thread.currentThread().getContextClassLoader();
Thread.currentThread().setContextClassLoader(jobConf.getConfiguration().getClassLoader());
appBundler.createBundle(jobJar, classes);
Thread.currentThread().setContextClassLoader(oldCLassLoader);
LOG.info("Built MapReduce Job Jar at {}", jobJar.toURI());
return jobJar;
}