if (tbl.getTableType() != TableType.MANAGED_TABLE) {
throw new HiveException("ARCHIVE can only be performed on managed tables");
}
Map<String, String> partSpec = simpleDesc.getPartSpec();
PartSpecInfo partSpecInfo = PartSpecInfo.create(tbl, partSpec);
List<Partition> partitions = db.getPartitions(tbl, partSpec);
Path originalDir = null;
// when we have partial partitions specification we must assume partitions
// lie in standard place - if they were in custom locations putting
// them into one archive would involve mass amount of copying
// in full partition specification case we allow custom locations
// to keep backward compatibility
if (partitions.isEmpty()) {
throw new HiveException("No partition matches the specification");
} else if(partSpecInfo.values.size() != tbl.getPartCols().size()) {
// for partial specifications we need partitions to follow the scheme
for(Partition p: partitions){
if(partitionInCustomLocation(tbl, p)) {
String message = String.format("ARCHIVE cannot run for partition " +
"groups with custom locations like %s", p.getLocation());
throw new HiveException(message);
}
}
originalDir = partSpecInfo.createPath(tbl);
} else {
Partition p = partitions.get(0);
// partition can be archived if during recovery
if(ArchiveUtils.isArchived(p)) {
originalDir = new Path(getOriginalLocation(p));
} else {
originalDir = p.getPartitionPath();
}
}
Path intermediateArchivedDir = new Path(originalDir.getParent(),
originalDir.getName() + INTERMEDIATE_ARCHIVED_DIR_SUFFIX);
Path intermediateOriginalDir = new Path(originalDir.getParent(),
originalDir.getName() + INTERMEDIATE_ORIGINAL_DIR_SUFFIX);
console.printInfo("intermediate.archived is " + intermediateArchivedDir.toString());
console.printInfo("intermediate.original is " + intermediateOriginalDir.toString());
String archiveName = "data.har";
FileSystem fs = null;
try {
fs = originalDir.getFileSystem(conf);
} catch (IOException e) {
throw new HiveException(e);
}
URI archiveUri = (new Path(originalDir, archiveName)).toUri();
URI originalUri = ArchiveUtils.addSlash(originalDir.toUri());
ArchiveUtils.HarPathHelper harHelper = new ArchiveUtils.HarPathHelper(
conf, archiveUri, originalUri);
// we checked if partitions matching specification are marked as archived
// in the metadata; if they are and their levels are the same as we would
// set it later it means previous run failed and we have to do the recovery;
// if they are different, we throw an error
for(Partition p: partitions) {
if(ArchiveUtils.isArchived(p)) {
if(ArchiveUtils.getArchivingLevel(p) != partSpecInfo.values.size()) {
String name = ArchiveUtils.getPartialName(p, ArchiveUtils.getArchivingLevel(p));
String m = String.format("Conflict with existing archive %s", name);
throw new HiveException(m);
} else {
throw new HiveException("Partition(s) already archived");
}
}
}
boolean recovery = false;
if (pathExists(intermediateArchivedDir)
|| pathExists(intermediateOriginalDir)) {
recovery = true;
console.printInfo("Starting recovery after failed ARCHIVE");
}
// The following steps seem roundabout, but they are meant to aid in
// recovery if a failure occurs and to keep a consistent state in the FS
// Steps:
// 1. Create the archive in a temporary folder
// 2. Move the archive dir to an intermediate dir that is in at the same
// dir as the original partition dir. Call the new dir
// intermediate-archive.
// 3. Rename the original partition dir to an intermediate dir. Call the
// renamed dir intermediate-original
// 4. Rename intermediate-archive to the original partition dir
// 5. Change the metadata
// 6. Delete the original partition files in intermediate-original
// The original partition files are deleted after the metadata change
// because the presence of those files are used to indicate whether
// the original partition directory contains archived or unarchived files.
// Create an archived version of the partition in a directory ending in
// ARCHIVE_INTERMEDIATE_DIR_SUFFIX that's the same level as the partition,
// if it does not already exist. If it does exist, we assume the dir is good
// to use as the move operation that created it is atomic.
HadoopShims shim = ShimLoader.getHadoopShims();
if (!pathExists(intermediateArchivedDir) &&
!pathExists(intermediateOriginalDir)) {
// First create the archive in a tmp dir so that if the job fails, the
// bad files don't pollute the filesystem
Path tmpPath = new Path(driverContext.getCtx()
.getExternalTmpFileURI(originalDir.toUri()), "partlevel");
console.printInfo("Creating " + archiveName +
" for " + originalDir.toString());
console.printInfo("in " + tmpPath);
console.printInfo("Please wait... (this may take a while)");
// Create the Hadoop archive
int ret=0;
try {
int maxJobNameLen = conf.getIntVar(HiveConf.ConfVars.HIVEJOBNAMELENGTH);
String jobname = String.format("Archiving %s@%s",
tbl.getTableName(), partSpecInfo.getName());
jobname = Utilities.abbreviate(jobname, maxJobNameLen - 6);
conf.setVar(HiveConf.ConfVars.HADOOPJOBNAME, jobname);
ret = shim.createHadoopArchive(conf, originalDir, tmpPath, archiveName);
} catch (Exception e) {
throw new HiveException(e);