String jobId = jobSubmitClient.getNewJobId();
Path submitJobDir = new Path(job.getSystemDir(), jobId);
FileSystem fs = getFs();
LOG.debug("default FileSystem: " + fs.getUri());
fs.delete(submitJobDir);
FileSystem.mkdirs(fs, submitJobDir, new FsPermission(JOB_DIR_PERMISSION));
Path submitJobFile = new Path(submitJobDir, "job.xml");
Path submitJarFile = new Path(submitJobDir, "job.jar");
Path submitSplitFile = new Path(submitJobDir, "job.split");
// set the timestamps of the archives and files
URI[] tarchives = DistributedCache.getCacheArchives(job);
if (tarchives != null) {
StringBuffer archiveTimestamps =
new StringBuffer(String.valueOf(DistributedCache.getTimestamp(job, tarchives[0])));
for (int i = 1; i < tarchives.length; i++) {
archiveTimestamps.append(",");
archiveTimestamps.append(String.valueOf(DistributedCache.getTimestamp(job, tarchives[i])));
}
DistributedCache.setArchiveTimestamps(job, archiveTimestamps.toString());
}
URI[] tfiles = DistributedCache.getCacheFiles(job);
if (tfiles != null) {
StringBuffer fileTimestamps =
new StringBuffer(String.valueOf(DistributedCache.getTimestamp(job, tfiles[0])));
for (int i = 1; i < tfiles.length; i++) {
fileTimestamps.append(",");
fileTimestamps.append(String.valueOf(DistributedCache.getTimestamp(job, tfiles[i])));
}
DistributedCache.setFileTimestamps(job, fileTimestamps.toString());
}
String originalJarPath = job.getJar();
short replication = (short)job.getInt("mapred.submit.replication", 10);
if (originalJarPath != null) { // copy jar to JobTracker's fs
// use jar name if job is not named.
if ("".equals(job.getJobName())){
job.setJobName(new Path(originalJarPath).getName());
}
job.setJar(submitJarFile.toString());
fs.copyFromLocalFile(new Path(originalJarPath), submitJarFile);
fs.setReplication(submitJarFile, replication);
fs.setPermission(submitJarFile, new FsPermission(JOB_FILE_PERMISSION));
} else {
LOG.warn("No job jar file set. User classes may not be found. "+
"See JobConf(Class) or JobConf#setJar(String).");
}
// Set the user's name and working directory
job.setUser(ugi.getUserName());
if (job.getWorkingDirectory() == null) {
job.setWorkingDirectory(fs.getWorkingDirectory());
}
// Check the input specification
job.getInputFormat().validateInput(job);
// Check the output specification
job.getOutputFormat().checkOutputSpecs(fs, job);
// Create the splits for the job
LOG.debug("Creating splits at " + fs.makeQualified(submitSplitFile));
InputSplit[] splits =
job.getInputFormat().getSplits(job, job.getNumMapTasks());
// sort the splits into order based on size, so that the biggest
// go first
Arrays.sort(splits, new Comparator<InputSplit>() {
public int compare(InputSplit a, InputSplit b) {
try {
long left = a.getLength();
long right = b.getLength();
if (left == right) {
return 0;
} else if (left < right) {
return 1;
} else {
return -1;
}
} catch (IOException ie) {
throw new RuntimeException("Problem getting input split size",
ie);
}
}
});
// write the splits to a file for the job tracker
FSDataOutputStream out = FileSystem.create(fs,
submitSplitFile, new FsPermission(JOB_FILE_PERMISSION));
try {
writeSplitsFile(splits, out);
} finally {
out.close();
}
job.set("mapred.job.split.file", submitSplitFile.toString());
job.setNumMapTasks(splits.length);
// Write job file to JobTracker's fs
out = FileSystem.create(fs, submitJobFile,
new FsPermission(JOB_FILE_PERMISSION));
try {
job.write(out);
} finally {
out.close();