i++;
}
}
// Next we do this for tables and partitions
LoadTableDesc tbd = work.getLoadTableWork();
if (tbd != null) {
StringBuilder mesg = new StringBuilder("Loading data to table ")
.append( tbd.getTable().getTableName());
if (tbd.getPartitionSpec().size() > 0) {
mesg.append(" partition (");
Map<String, String> partSpec = tbd.getPartitionSpec();
for (String key: partSpec.keySet()) {
mesg.append(key).append('=').append(partSpec.get(key)).append(", ");
}
mesg.setLength(mesg.length()-2);
mesg.append(')');
}
String mesg_detail = " from " + tbd.getSourcePath();
console.printInfo(mesg.toString(), mesg_detail);
Table table = db.getTable(tbd.getTable().getTableName());
if (work.getCheckFileFormat()) {
// Get all files from the src directory
FileStatus[] dirs;
ArrayList<FileStatus> files;
FileSystem srcFs; // source filesystem
try {
srcFs = tbd.getSourcePath().getFileSystem(conf);
dirs = srcFs.globStatus(tbd.getSourcePath());
files = new ArrayList<FileStatus>();
for (int i = 0; (dirs != null && i < dirs.length); i++) {
files.addAll(Arrays.asList(srcFs.listStatus(dirs[i].getPath())));
// We only check one file, so exit the loop when we have at least
// one.
if (files.size() > 0) {
break;
}
}
} catch (IOException e) {
throw new HiveException(
"addFiles: filesystem error in check phase", e);
}
if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVECHECKFILEFORMAT)) {
// Check if the file format of the file matches that of the table.
boolean flag = HiveFileFormatUtils.checkInputFormat(
srcFs, conf, tbd.getTable().getInputFileFormatClass(), files);
if (!flag) {
throw new HiveException(
"Wrong file format. Please check the file's format.");
}
}
}
// Create a data container
DataContainer dc = null;
if (tbd.getPartitionSpec().size() == 0) {
dc = new DataContainer(table.getTTable());
db.loadTable(tbd.getSourcePath(), tbd.getTable()
.getTableName(), tbd.getReplace(), tbd.getHoldDDLTime(), work.isSrcLocal(),
isSkewedStoredAsDirs(tbd),
work.getLoadTableWork().getWriteType() != AcidUtils.Operation.NOT_ACID);
if (work.getOutputs() != null) {
work.getOutputs().add(new WriteEntity(table,
(tbd.getReplace() ? WriteEntity.WriteType.INSERT_OVERWRITE :
WriteEntity.WriteType.INSERT)));
}
} else {
LOG.info("Partition is: " + tbd.getPartitionSpec().toString());
// Check if the bucketing and/or sorting columns were inferred
List<BucketCol> bucketCols = null;
List<SortCol> sortCols = null;
int numBuckets = -1;
Task task = this;
String path = tbd.getSourcePath().toUri().toString();
// Find the first ancestor of this MoveTask which is some form of map reduce task
// (Either standard, local, or a merge)
while (task.getParentTasks() != null && task.getParentTasks().size() == 1) {
task = (Task)task.getParentTasks().get(0);
// If it was a merge task or a local map reduce task, nothing can be inferred
if (task instanceof MergeFileTask || task instanceof MapredLocalTask) {
break;
}
// If it's a standard map reduce task, check what, if anything, it inferred about
// the directory this move task is moving
if (task instanceof MapRedTask) {
MapredWork work = (MapredWork)task.getWork();
MapWork mapWork = work.getMapWork();
bucketCols = mapWork.getBucketedColsByDirectory().get(path);
sortCols = mapWork.getSortedColsByDirectory().get(path);
if (work.getReduceWork() != null) {
numBuckets = work.getReduceWork().getNumReduceTasks();
}
if (bucketCols != null || sortCols != null) {
// This must be a final map reduce task (the task containing the file sink
// operator that writes the final output)
assert work.isFinalMapRed();
}
break;
}
// If it's a move task, get the path the files were moved from, this is what any
// preceding map reduce task inferred information about, and moving does not invalidate
// those assumptions
// This can happen when a conditional merge is added before the final MoveTask, but the
// condition for merging is not met, see GenMRFileSink1.
if (task instanceof MoveTask) {
if (((MoveTask)task).getWork().getLoadFileWork() != null) {
path = ((MoveTask)task).getWork().getLoadFileWork().getSourcePath().toUri().toString();
}
}
}
// deal with dynamic partitions
DynamicPartitionCtx dpCtx = tbd.getDPCtx();
if (dpCtx != null && dpCtx.getNumDPCols() > 0) { // dynamic partitions
List<LinkedHashMap<String, String>> dps = Utilities.getFullDPSpecs(conf, dpCtx);
// publish DP columns to its subscribers
if (dps != null && dps.size() > 0) {
pushFeed(FeedType.DYNAMIC_PARTITIONS, dps);
}
long startTime = System.currentTimeMillis();
// load the list of DP partitions and return the list of partition specs
// TODO: In a follow-up to HIVE-1361, we should refactor loadDynamicPartitions
// to use Utilities.getFullDPSpecs() to get the list of full partSpecs.
// After that check the number of DPs created to not exceed the limit and
// iterate over it and call loadPartition() here.
// The reason we don't do inside HIVE-1361 is the latter is large and we
// want to isolate any potential issue it may introduce.
Map<Map<String, String>, Partition> dp =
db.loadDynamicPartitions(
tbd.getSourcePath(),
tbd.getTable().getTableName(),
tbd.getPartitionSpec(),
tbd.getReplace(),
dpCtx.getNumDPCols(),
tbd.getHoldDDLTime(),
isSkewedStoredAsDirs(tbd),
work.getLoadTableWork().getWriteType() != AcidUtils.Operation.NOT_ACID);
console.printInfo("\t Time taken for load dynamic partitions : " +
(System.currentTimeMillis() - startTime));
if (dp.size() == 0 && conf.getBoolVar(HiveConf.ConfVars.HIVE_ERROR_ON_EMPTY_PARTITION)) {
throw new HiveException("This query creates no partitions." +
" To turn off this error, set hive.error.on.empty.partition=false.");
}
startTime = System.currentTimeMillis();
// for each partition spec, get the partition
// and put it to WriteEntity for post-exec hook
for(Map.Entry<Map<String, String>, Partition> entry : dp.entrySet()) {
Partition partn = entry.getValue();
if (bucketCols != null || sortCols != null) {
updatePartitionBucketSortColumns(table, partn, bucketCols, numBuckets, sortCols);
}
WriteEntity enty = new WriteEntity(partn,
(tbd.getReplace() ? WriteEntity.WriteType.INSERT_OVERWRITE :
WriteEntity.WriteType.INSERT));
if (work.getOutputs() != null) {
work.getOutputs().add(enty);
}
// Need to update the queryPlan's output as well so that post-exec hook get executed.
// This is only needed for dynamic partitioning since for SP the the WriteEntity is
// constructed at compile time and the queryPlan already contains that.
// For DP, WriteEntity creation is deferred at this stage so we need to update
// queryPlan here.
if (queryPlan.getOutputs() == null) {
queryPlan.setOutputs(new HashSet<WriteEntity>());
}
queryPlan.getOutputs().add(enty);
// update columnar lineage for each partition
dc = new DataContainer(table.getTTable(), partn.getTPartition());
// Don't set lineage on delete as we don't have all the columns
if (SessionState.get() != null &&
work.getLoadTableWork().getWriteType() != AcidUtils.Operation.DELETE &&
work.getLoadTableWork().getWriteType() != AcidUtils.Operation.UPDATE) {
SessionState.get().getLineageState().setLineage(tbd.getSourcePath(), dc,
table.getCols());
}
console.printInfo("\tLoading partition " + entry.getKey());
}
console.printInfo("\t Time taken for adding to write entity : " +
(System.currentTimeMillis() - startTime));
dc = null; // reset data container to prevent it being added again.
} else { // static partitions
List<String> partVals = MetaStoreUtils.getPvals(table.getPartCols(),
tbd.getPartitionSpec());
db.validatePartitionNameCharacters(partVals);
db.loadPartition(tbd.getSourcePath(), tbd.getTable().getTableName(),
tbd.getPartitionSpec(), tbd.getReplace(), tbd.getHoldDDLTime(),
tbd.getInheritTableSpecs(), isSkewedStoredAsDirs(tbd), work.isSrcLocal(),
work.getLoadTableWork().getWriteType() != AcidUtils.Operation.NOT_ACID);
Partition partn = db.getPartition(table, tbd.getPartitionSpec(),
false);
if (bucketCols != null || sortCols != null) {
updatePartitionBucketSortColumns(table, partn, bucketCols,
numBuckets, sortCols);
}
dc = new DataContainer(table.getTTable(), partn.getTPartition());
// add this partition to post-execution hook
if (work.getOutputs() != null) {
work.getOutputs().add(new WriteEntity(partn,
(tbd.getReplace() ? WriteEntity.WriteType.INSERT_OVERWRITE
: WriteEntity.WriteType.INSERT)));
}
}
}
if (SessionState.get() != null && dc != null) {
// If we are doing an update or a delete the number of columns in the table will not
// match the number of columns in the file sink. For update there will be one too many
// (because of the ROW__ID), and in the case of the delete there will be just the
// ROW__ID, which we don't need to worry about from a lineage perspective.
List<FieldSchema> tableCols = null;
switch (work.getLoadTableWork().getWriteType()) {
case DELETE:
case UPDATE:
// Pass an empty list as no columns will be written to the file.
// TODO I should be able to make this work for update
tableCols = new ArrayList<FieldSchema>();
break;
default:
tableCols = table.getCols();
break;
}
SessionState.get().getLineageState().setLineage(tbd.getSourcePath(), dc, tableCols);
}
releaseLocks(tbd);
}
return 0;