FileSystem fs = null;
try {
fs = path.getFileSystem(conf);
} catch (IOException e) {
LOG.error("Could not retrieve FileSystem object to check for existing path", e);
throw new CrunchRuntimeException(e);
}
boolean exists = false;
boolean successful = false;
long lastModForTarget = -1;
try {
exists = fs.exists(path);
if (exists) {
successful = fs.exists(getSuccessIndicator());
lastModForTarget = SourceTargetHelper.getLastModifiedAt(fs, path);
}
} catch (IOException e) {
LOG.error("Exception checking existence of path: " + path, e);
throw new CrunchRuntimeException(e);
}
if (exists) {
switch (strategy) {
case DEFAULT:
LOG.error("Path " + path + " already exists!");
throw new CrunchRuntimeException("Path already exists: " + path);
case OVERWRITE:
LOG.info("Removing data at existing path: " + path);
try {
fs.delete(path, true);
} catch (IOException e) {
LOG.error("Exception thrown removing data at path: " + path, e);
}
break;
case APPEND:
LOG.info("Adding output files to existing path: " + path);
break;
case CHECKPOINT:
if (successful && lastModForTarget > lastModForSource) {
LOG.info("Re-starting pipeline from checkpoint path: " + path);
break;
} else {
if (!successful) {
LOG.info("_SUCCESS file not found, Removing data at existing checkpoint path: " + path);
} else {
LOG.info("Source data has recent updates. Removing data at existing checkpoint path: " + path);
}
try {
fs.delete(path, true);
} catch (IOException e) {
LOG.error("Exception thrown removing data at checkpoint path: " + path, e);
}
return false;
}
default:
throw new CrunchRuntimeException("Unknown WriteMode: " + strategy);
}
} else {
LOG.info("Will write output files to new path: " + path);
}
return exists;