Map<String, String> partSpec = simpleDesc.getPartSpec();
Partition p = db.getPartition(tbl, partSpec, false);
if (tbl.getTableType() != TableType.MANAGED_TABLE) {
throw new HiveException("ARCHIVE can only be performed on managed tables");
}
if (p == null) {
throw new HiveException("Specified partition does not exist");
}
if (isArchived(p)) {
// If there were a failure right after the metadata was updated in an
// archiving operation, it's possible that the original, unarchived files
// weren't deleted.
Path originalDir = new Path(getOriginalLocation(p));
Path leftOverIntermediateOriginal = new Path(originalDir.getParent(),
originalDir.getName() + INTERMEDIATE_ORIGINAL_DIR_SUFFIX);
if (pathExists(leftOverIntermediateOriginal)) {
console.printInfo("Deleting " + leftOverIntermediateOriginal +
" left over from a previous archiving operation");
deleteDir(leftOverIntermediateOriginal);
}
throw new HiveException("Specified partition is already archived");
}
Path originalDir = p.getPartitionPath();
Path intermediateArchivedDir = new Path(originalDir.getParent(),
originalDir.getName() + INTERMEDIATE_ARCHIVED_DIR_SUFFIX);
Path intermediateOriginalDir = new Path(originalDir.getParent(),
originalDir.getName() + INTERMEDIATE_ORIGINAL_DIR_SUFFIX);
String archiveName = "data.har";
FileSystem fs = null;
try {
fs = originalDir.getFileSystem(conf);
} catch (IOException e) {
throw new HiveException(e);
}
// The following steps seem roundabout, but they are meant to aid in
// recovery if a failure occurs and to keep a consistent state in the FS
// Steps:
// 1. Create the archive in a temporary folder
// 2. Move the archive dir to an intermediate dir that is in at the same
// dir as the original partition dir. Call the new dir
// intermediate-archive.
// 3. Rename the original partition dir to an intermediate dir. Call the
// renamed dir intermediate-original
// 4. Rename intermediate-archive to the original partition dir
// 5. Change the metadata
// 6. Delete the original partition files in intermediate-original
// The original partition files are deleted after the metadata change
// because the presence of those files are used to indicate whether
// the original partition directory contains archived or unarchived files.
// Create an archived version of the partition in a directory ending in
// ARCHIVE_INTERMEDIATE_DIR_SUFFIX that's the same level as the partition,
// if it does not already exist. If it does exist, we assume the dir is good
// to use as the move operation that created it is atomic.
if (!pathExists(intermediateArchivedDir) &&
!pathExists(intermediateOriginalDir)) {
// First create the archive in a tmp dir so that if the job fails, the
// bad files don't pollute the filesystem
Path tmpDir = new Path(driverContext.getCtx().getExternalTmpFileURI(originalDir.toUri()), "partlevel");
console.printInfo("Creating " + archiveName + " for " + originalDir.toString());
console.printInfo("in " + tmpDir);
console.printInfo("Please wait... (this may take a while)");
// Create the Hadoop archive
HadoopShims shim = ShimLoader.getHadoopShims();
int ret=0;
try {
ret = shim.createHadoopArchive(conf, originalDir, tmpDir, archiveName);
} catch (Exception e) {
throw new HiveException(e);
}
if (ret != 0) {
throw new HiveException("Error while creating HAR");
}
// Move from the tmp dir to an intermediate directory, in the same level as
// the partition directory. e.g. .../hr=12-intermediate-archived
try {
console.printInfo("Moving " + tmpDir + " to " + intermediateArchivedDir);
if (pathExists(intermediateArchivedDir)) {
throw new HiveException("The intermediate archive directory already exists.");
}
fs.rename(tmpDir, intermediateArchivedDir);
} catch (IOException e) {
throw new HiveException("Error while moving tmp directory");
}
} else {
if (pathExists(intermediateArchivedDir)) {
console.printInfo("Intermediate archive directory " + intermediateArchivedDir +
" already exists. Assuming it contains an archived version of the partition");
}
}
// If we get to here, we know that we've archived the partition files, but
// they may be in the original partition location, or in the intermediate
// original dir.
// Move the original parent directory to the intermediate original directory
// if the move hasn't been made already
if (!pathExists(intermediateOriginalDir)) {
console.printInfo("Moving " + originalDir + " to " +
intermediateOriginalDir);
moveDir(fs, originalDir, intermediateOriginalDir);
} else {
console.printInfo(intermediateOriginalDir + " already exists. " +
"Assuming it contains the original files in the partition");
}
// If there's a failure from here to when the metadata is updated,
// there will be no data in the partition, or an error while trying to read
// the partition (if the archive files have been moved to the original
// partition directory.) But re-running the archive command will allow
// recovery
// Move the intermediate archived directory to the original parent directory
if (!pathExists(originalDir)) {
console.printInfo("Moving " + intermediateArchivedDir + " to " +
originalDir);
moveDir(fs, intermediateArchivedDir, originalDir);
} else {
console.printInfo(originalDir + " already exists. " +
"Assuming it contains the archived version of the partition");
}
// Record this change in the metastore
try {
boolean parentSettable =
conf.getBoolVar(HiveConf.ConfVars.HIVEHARPARENTDIRSETTABLE);
// dirInArchive is the directory within the archive that has all the files
// for this partition. With older versions of Hadoop, archiving a
// a directory would produce the same directory structure
// in the archive. So if you created myArchive.har of /tmp/myDir, the
// files in /tmp/myDir would be located under myArchive.har/tmp/myDir/*
// In this case, dirInArchive should be tmp/myDir
// With newer versions of Hadoop, the parent directory could be specified.
// Assuming the parent directory was set to /tmp/myDir when creating the
// archive, the files can be found under myArchive.har/*
// In this case, dirInArchive should be empty
String dirInArchive = "";
if (!parentSettable) {
dirInArchive = originalDir.toUri().getPath();
if(dirInArchive.length() > 1 && dirInArchive.charAt(0)=='/') {
dirInArchive = dirInArchive.substring(1);
}
}
setArchived(p, originalDir, dirInArchive, archiveName);
db.alterPartition(tblName, p);
} catch (Exception e) {
throw new HiveException("Unable to change the partition info for HAR", e);
}
// If a failure occurs here, the directory containing the original files
// will not be deleted. The user will run ARCHIVE again to clear this up
deleteDir(intermediateOriginalDir);