String tblName = simpleDesc.getTableName();
Table tbl = db.getTable(dbName, tblName);
if (tbl.getTableType() != TableType.MANAGED_TABLE) {
throw new HiveException("ARCHIVE can only be performed on managed tables");
}
Map<String, String> partSpec = simpleDesc.getPartSpec();
PartSpecInfo partSpecInfo = PartSpecInfo.create(tbl, partSpec);
List<Partition> partitions = db.getPartitions(tbl, partSpec);
Path originalDir = null;
// when we have partial partitions specification we must assume partitions
// lie in standard place - if they were in custom locations putting
// them into one archive would involve mass amount of copying
// in full partition specification case we allow custom locations
// to keep backward compatibility
if (partitions.isEmpty()) {
throw new HiveException("No partition matches the specification");
} else if(partSpecInfo.values.size() != tbl.getPartCols().size()) {
// for partial specifications we need partitions to follow the scheme
for(Partition p: partitions){
if(partitionInCustomLocation(tbl, p)) {
String message = String.format("ARCHIVE cannot run for partition " +
"groups with custom locations like %s", p.getLocation());
throw new HiveException(message);
}
}
originalDir = partSpecInfo.createPath(tbl);
} else {
Partition p = partitions.get(0);
// partition can be archived if during recovery
if(ArchiveUtils.isArchived(p)) {
originalDir = new Path(getOriginalLocation(p));
} else {
originalDir = p.getPartitionPath();
}
}
Path intermediateArchivedDir = new Path(originalDir.getParent(),
originalDir.getName() + INTERMEDIATE_ARCHIVED_DIR_SUFFIX);
Path intermediateOriginalDir = new Path(originalDir.getParent(),
originalDir.getName() + INTERMEDIATE_ORIGINAL_DIR_SUFFIX);
console.printInfo("intermediate.archived is " + intermediateArchivedDir.toString());
console.printInfo("intermediate.original is " + intermediateOriginalDir.toString());
String archiveName = "data.har";
FileSystem fs = null;
try {
fs = originalDir.getFileSystem(conf);
} catch (IOException e) {
throw new HiveException(e);
}
URI archiveUri = (new Path(originalDir, archiveName)).toUri();
URI originalUri = ArchiveUtils.addSlash(originalDir.toUri());
ArchiveUtils.HarPathHelper harHelper = new ArchiveUtils.HarPathHelper(
conf, archiveUri, originalUri);
// we checked if partitions matching specification are marked as archived
// in the metadata; if they are and their levels are the same as we would
// set it later it means previous run failed and we have to do the recovery;
// if they are different, we throw an error
for(Partition p: partitions) {
if(ArchiveUtils.isArchived(p)) {
if(ArchiveUtils.getArchivingLevel(p) != partSpecInfo.values.size()) {
String name = ArchiveUtils.getPartialName(p, ArchiveUtils.getArchivingLevel(p));
String m = String.format("Conflict with existing archive %s", name);
throw new HiveException(m);
} else {
throw new HiveException("Partition(s) already archived");
}
}
}
boolean recovery = false;
if (pathExists(intermediateArchivedDir)
|| pathExists(intermediateOriginalDir)) {
recovery = true;
console.printInfo("Starting recovery after failed ARCHIVE");
}
// The following steps seem roundabout, but they are meant to aid in
// recovery if a failure occurs and to keep a consistent state in the FS
// Steps:
// 1. Create the archive in a temporary folder
// 2. Move the archive dir to an intermediate dir that is in at the same
// dir as the original partition dir. Call the new dir
// intermediate-archive.
// 3. Rename the original partition dir to an intermediate dir. Call the
// renamed dir intermediate-original
// 4. Rename intermediate-archive to the original partition dir
// 5. Change the metadata
// 6. Delete the original partition files in intermediate-original
// The original partition files are deleted after the metadata change
// because the presence of those files are used to indicate whether
// the original partition directory contains archived or unarchived files.
// Create an archived version of the partition in a directory ending in
// ARCHIVE_INTERMEDIATE_DIR_SUFFIX that's the same level as the partition,
// if it does not already exist. If it does exist, we assume the dir is good
// to use as the move operation that created it is atomic.
HadoopShims shim = ShimLoader.getHadoopShims();
if (!pathExists(intermediateArchivedDir) &&
!pathExists(intermediateOriginalDir)) {
// First create the archive in a tmp dir so that if the job fails, the
// bad files don't pollute the filesystem
Path tmpPath = new Path(driverContext.getCtx()
.getExternalTmpFileURI(originalDir.toUri()), "partlevel");
console.printInfo("Creating " + archiveName +
" for " + originalDir.toString());
console.printInfo("in " + tmpPath);
console.printInfo("Please wait... (this may take a while)");
// Create the Hadoop archive
int ret=0;
try {
int maxJobNameLen = conf.getIntVar(HiveConf.ConfVars.HIVEJOBNAMELENGTH);
String jobname = String.format("Archiving %s@%s",
tbl.getTableName(), partSpecInfo.getName());
jobname = Utilities.abbreviate(jobname, maxJobNameLen - 6);
conf.setVar(HiveConf.ConfVars.HADOOPJOBNAME, jobname);
ret = shim.createHadoopArchive(conf, originalDir, tmpPath, archiveName);
} catch (Exception e) {
throw new HiveException(e);
}
if (ret != 0) {
throw new HiveException("Error while creating HAR");
}
// Move from the tmp dir to an intermediate directory, in the same level as
// the partition directory. e.g. .../hr=12-intermediate-archived
try {
console.printInfo("Moving " + tmpPath + " to " + intermediateArchivedDir);
if (pathExists(intermediateArchivedDir)) {
throw new HiveException("The intermediate archive directory already exists.");
}
fs.rename(tmpPath, intermediateArchivedDir);
} catch (IOException e) {
throw new HiveException("Error while moving tmp directory");
}
} else {
if (pathExists(intermediateArchivedDir)) {
console.printInfo("Intermediate archive directory " + intermediateArchivedDir +
" already exists. Assuming it contains an archived version of the partition");
}
}
// If we get to here, we know that we've archived the partition files, but
// they may be in the original partition location, or in the intermediate
// original dir.
// Move the original parent directory to the intermediate original directory
// if the move hasn't been made already
if (!pathExists(intermediateOriginalDir)) {
console.printInfo("Moving " + originalDir + " to " +
intermediateOriginalDir);
moveDir(fs, originalDir, intermediateOriginalDir);
} else {
console.printInfo(intermediateOriginalDir + " already exists. " +
"Assuming it contains the original files in the partition");
}
// If there's a failure from here to when the metadata is updated,
// there will be no data in the partition, or an error while trying to read
// the partition (if the archive files have been moved to the original
// partition directory.) But re-running the archive command will allow
// recovery
// Move the intermediate archived directory to the original parent directory
if (!pathExists(originalDir)) {
console.printInfo("Moving " + intermediateArchivedDir + " to " +
originalDir);
moveDir(fs, intermediateArchivedDir, originalDir);
} else {
console.printInfo(originalDir + " already exists. " +
"Assuming it contains the archived version of the partition");
}
// Record this change in the metastore
try {
for(Partition p: partitions) {
URI originalPartitionUri = ArchiveUtils.addSlash(p.getPartitionPath().toUri());
URI test = p.getPartitionPath().toUri();
URI harPartitionDir = harHelper.getHarUri(originalPartitionUri, shim);
Path harPath = new Path(harPartitionDir.getScheme(),
harPartitionDir.getAuthority(),
harPartitionDir.getPath()); // make in Path to ensure no slash at the end
setArchived(p, harPath, partSpecInfo.values.size());
db.alterPartition(tblName, p);
}
} catch (Exception e) {
throw new HiveException("Unable to change the partition info for HAR", e);
}
// If a failure occurs here, the directory containing the original files
// will not be deleted. The user will run ARCHIVE again to clear this up
if(pathExists(intermediateOriginalDir)) {