/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.exec;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Stack;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.FileUtils;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils;
import org.apache.hadoop.hive.ql.io.HiveKey;
import org.apache.hadoop.hive.ql.io.HiveOutputFormat;
import org.apache.hadoop.hive.ql.io.HivePartitioner;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
import org.apache.hadoop.hive.ql.plan.PlanUtils;
import org.apache.hadoop.hive.ql.plan.api.OperatorType;
import org.apache.hadoop.hive.ql.stats.StatsPublisher;
import org.apache.hadoop.hive.ql.stats.StatsSetupConst;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.SerDeStats;
import org.apache.hadoop.hive.serde2.Serializer;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.SubStructObjectInspector;
import org.apache.hadoop.hive.shims.ShimLoader;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.util.ReflectionUtils;
/**
* File Sink operator implementation.
**/
public class FileSinkOperator extends TerminalOperator<FileSinkDesc> implements
Serializable {
protected transient HashMap<String, FSPaths> valToPaths;
protected transient int numDynParts;
protected transient List<String> dpColNames;
protected transient DynamicPartitionCtx dpCtx;
protected transient boolean isCompressed;
protected transient Path parent;
protected transient HiveOutputFormat<?, ?> hiveOutputFormat;
protected transient Path specPath;
protected transient int dpStartCol; // start column # for DP columns
protected transient List<String> dpVals; // array of values corresponding to DP columns
protected transient List<Object> dpWritables;
protected transient RecordWriter[] rowOutWriters; // row specific RecordWriters
protected transient int maxPartitions;
private static final transient String[] FATAL_ERR_MSG = {
null, // counter value 0 means no error
"Number of dynamic partitions exceeded hive.exec.max.dynamic.partitions.pernode."
};
/**
* RecordWriter.
*
*/
public static interface RecordWriter {
void write(Writable w) throws IOException;
void close(boolean abort) throws IOException;
}
public class FSPaths implements Cloneable {
Path tmpPath;
Path taskOutputTempPath;
Path[] outPaths;
Path[] finalPaths;
RecordWriter[] outWriters;
Stat stat;
public FSPaths() {
}
public FSPaths(Path specPath) {
tmpPath = Utilities.toTempPath(specPath);
taskOutputTempPath = Utilities.toTaskTempPath(specPath);
outPaths = new Path[numFiles];
finalPaths = new Path[numFiles];
outWriters = new RecordWriter[numFiles];
stat = new Stat();
}
/**
* Append a subdirectory to the tmp path.
*
* @param dp
* subdirecgtory name
*/
public void appendTmpPath(String dp) {
tmpPath = new Path(tmpPath, dp);
}
/**
* Update OutPath according to tmpPath.
*/
public Path getTaskOutPath(String taskId) {
return getOutPath(taskId, this.taskOutputTempPath);
}
/**
* Update OutPath according to tmpPath.
*/
public Path getOutPath(String taskId) {
return getOutPath(taskId, this.tmpPath);
}
/**
* Update OutPath according to tmpPath.
*/
public Path getOutPath(String taskId, Path tmp) {
return new Path(tmp, Utilities.toTempPath(taskId));
}
/**
* Update the final paths according to tmpPath.
*/
public Path getFinalPath(String taskId) {
return getFinalPath(taskId, this.tmpPath, null);
}
/**
* Update the final paths according to tmpPath.
*/
public Path getFinalPath(String taskId, Path tmpPath, String extension) {
if (extension != null) {
return new Path(tmpPath, taskId + extension);
} else {
return new Path(tmpPath, taskId);
}
}
public void setOutWriters(RecordWriter[] out) {
outWriters = out;
}
public RecordWriter[] getOutWriters() {
return outWriters;
}
public void closeWriters(boolean abort) throws HiveException {
for (int idx = 0; idx < outWriters.length; idx++) {
if (outWriters[idx] != null) {
try {
outWriters[idx].close(abort);
updateProgress();
} catch (IOException e) {
throw new HiveException(e);
}
}
}
}
private void commit(FileSystem fs) throws HiveException {
for (int idx = 0; idx < outPaths.length; ++idx) {
try {
if (bDynParts && !fs.exists(finalPaths[idx].getParent())) {
fs.mkdirs(finalPaths[idx].getParent());
}
if (!fs.rename(outPaths[idx], finalPaths[idx])) {
throw new HiveException("Unable to rename output from: " +
outPaths[idx] + " to: " + finalPaths[idx]);
}
updateProgress();
} catch (IOException e) {
throw new HiveException("Unable to rename output from: " +
outPaths[idx] + " to: " + finalPaths[idx], e);
}
}
}
public void abortWriters(FileSystem fs, boolean abort, boolean delete) throws HiveException {
for (int idx = 0; idx < outWriters.length; idx++) {
if (outWriters[idx] != null) {
try {
outWriters[idx].close(abort);
if (delete) {
fs.delete(outPaths[idx], true);
}
updateProgress();
} catch (IOException e) {
throw new HiveException(e);
}
}
}
}
} // class FSPaths
private static final long serialVersionUID = 1L;
protected transient FileSystem fs;
protected transient Serializer serializer;
protected transient BytesWritable commonKey = new BytesWritable();
protected transient TableIdEnum tabIdEnum = null;
private transient LongWritable row_count;
private transient boolean isNativeTable = true;
/**
* The evaluators for the multiFile sprayer. If the table under consideration has 1000 buckets,
* it is not a good idea to start so many reducers - if the maximum number of reducers is 100,
* each reducer can write 10 files - this way we effectively get 1000 files.
*/
private transient ExprNodeEvaluator[] partitionEval;
private transient int totalFiles;
private transient int numFiles;
private transient boolean multiFileSpray;
private transient final Map<Integer, Integer> bucketMap = new HashMap<Integer, Integer>();
private transient ObjectInspector[] partitionObjectInspectors;
private transient HivePartitioner<HiveKey, Object> prtner;
private transient final HiveKey key = new HiveKey();
private transient Configuration hconf;
private transient FSPaths fsp;
private transient boolean bDynParts;
private transient SubStructObjectInspector subSetOI;
private transient int timeOut; // JT timeout in msec.
private transient long lastProgressReport = System.currentTimeMillis();
/**
* TableIdEnum.
*
*/
public static enum TableIdEnum {
TABLE_ID_1_ROWCOUNT,
TABLE_ID_2_ROWCOUNT,
TABLE_ID_3_ROWCOUNT,
TABLE_ID_4_ROWCOUNT,
TABLE_ID_5_ROWCOUNT,
TABLE_ID_6_ROWCOUNT,
TABLE_ID_7_ROWCOUNT,
TABLE_ID_8_ROWCOUNT,
TABLE_ID_9_ROWCOUNT,
TABLE_ID_10_ROWCOUNT,
TABLE_ID_11_ROWCOUNT,
TABLE_ID_12_ROWCOUNT,
TABLE_ID_13_ROWCOUNT,
TABLE_ID_14_ROWCOUNT,
TABLE_ID_15_ROWCOUNT;
}
protected transient boolean autoDelete = false;
protected transient JobConf jc;
Class<? extends Writable> outputClass;
String taskId;
private boolean filesCreated = false;
@Override
protected void initializeOp(Configuration hconf) throws HiveException {
try {
this.hconf = hconf;
filesCreated = false;
isNativeTable = !conf.getTableInfo().isNonNative();
multiFileSpray = conf.isMultiFileSpray();
totalFiles = conf.getTotalFiles();
numFiles = conf.getNumFiles();
dpCtx = conf.getDynPartCtx();
valToPaths = new HashMap<String, FSPaths>();
taskId = Utilities.getTaskId(hconf);
specPath = new Path(conf.getDirName());
fs = specPath.getFileSystem(hconf);
hiveOutputFormat = conf.getTableInfo().getOutputFileFormatClass().newInstance();
isCompressed = conf.getCompressed();
parent = Utilities.toTempPath(conf.getDirName());
serializer = (Serializer) conf.getTableInfo().getDeserializerClass().newInstance();
serializer.initialize(null, conf.getTableInfo().getProperties());
outputClass = serializer.getSerializedClass();
// Timeout is chosen to make sure that even if one iteration takes more than
// half of the script.timeout but less than script.timeout, we will still
// be able to report progress.
timeOut = hconf.getInt("mapred.healthChecker.script.timeout", 600000) / 2;
if (hconf instanceof JobConf) {
jc = (JobConf) hconf;
} else {
// test code path
jc = new JobConf(hconf, ExecDriver.class);
}
if (multiFileSpray) {
partitionEval = new ExprNodeEvaluator[conf.getPartitionCols().size()];
int i = 0;
for (ExprNodeDesc e : conf.getPartitionCols()) {
partitionEval[i++] = ExprNodeEvaluatorFactory.get(e);
}
partitionObjectInspectors = initEvaluators(partitionEval, outputObjInspector);
prtner = (HivePartitioner<HiveKey, Object>) ReflectionUtils.newInstance(
jc.getPartitionerClass(), null);
}
int id = conf.getDestTableId();
if ((id != 0) && (id <= TableIdEnum.values().length)) {
String enumName = "TABLE_ID_" + String.valueOf(id) + "_ROWCOUNT";
tabIdEnum = TableIdEnum.valueOf(enumName);
row_count = new LongWritable();
statsMap.put(tabIdEnum, row_count);
}
if (dpCtx != null) {
dpSetup();
}
if (!bDynParts) {
fsp = new FSPaths(specPath);
// Create all the files - this is required because empty files need to be created for
// empty buckets
// createBucketFiles(fsp);
valToPaths.put("", fsp); // special entry for non-DP case
}
initializeChildren(hconf);
} catch (HiveException e) {
throw e;
} catch (Exception e) {
e.printStackTrace();
throw new HiveException(e);
}
}
/**
* Set up for dynamic partitioning including a new ObjectInspector for the output row.
*/
private void dpSetup() {
this.bDynParts = false;
this.numDynParts = dpCtx.getNumDPCols();
this.dpColNames = dpCtx.getDPColNames();
this.maxPartitions = dpCtx.getMaxPartitionsPerNode();
assert numDynParts == dpColNames.size() : "number of dynamic paritions should be the same as the size of DP mapping";
if (dpColNames != null && dpColNames.size() > 0) {
this.bDynParts = true;
assert inputObjInspectors.length == 1 : "FileSinkOperator should have 1 parent, but it has "
+ inputObjInspectors.length;
StructObjectInspector soi = (StructObjectInspector) inputObjInspectors[0];
// remove the last dpMapping.size() columns from the OI
List<? extends StructField> fieldOI = soi.getAllStructFieldRefs();
ArrayList<ObjectInspector> newFieldsOI = new ArrayList<ObjectInspector>();
ArrayList<String> newFieldsName = new ArrayList<String>();
this.dpStartCol = 0;
for (StructField sf : fieldOI) {
String fn = sf.getFieldName();
if (!dpCtx.getInputToDPCols().containsKey(fn)) {
newFieldsOI.add(sf.getFieldObjectInspector());
newFieldsName.add(sf.getFieldName());
this.dpStartCol++;
}
}
assert newFieldsOI.size() > 0 : "new Fields ObjectInspector is empty";
this.subSetOI = new SubStructObjectInspector(soi, 0, this.dpStartCol);
this.dpVals = new ArrayList<String>(numDynParts);
this.dpWritables = new ArrayList<Object>(numDynParts);
}
}
private void createBucketFiles(FSPaths fsp) throws HiveException {
try {
int filesIdx = 0;
Set<Integer> seenBuckets = new HashSet<Integer>();
for (int idx = 0; idx < totalFiles; idx++) {
if (this.getExecContext() != null && this.getExecContext().getFileId() != -1) {
LOG.info("replace taskId from execContext ");
taskId = Utilities.replaceTaskIdFromFilename(taskId, this.getExecContext().getFileId());
LOG.info("new taskId: FS " + taskId);
assert !multiFileSpray;
assert totalFiles == 1;
}
if (multiFileSpray) {
key.setHashCode(idx);
// Does this hashcode belong to this reducer
int numReducers = totalFiles / numFiles;
if (numReducers > 1) {
int currReducer = Integer.valueOf(Utilities.getTaskIdFromFilename(Utilities
.getTaskId(hconf)));
int reducerIdx = prtner.getPartition(key, null, numReducers);
if (currReducer != reducerIdx) {
continue;
}
}
int bucketNum = prtner.getBucket(key, null, totalFiles);
if (seenBuckets.contains(bucketNum)) {
continue;
}
seenBuckets.add(bucketNum);
bucketMap.put(bucketNum, filesIdx);
taskId = Utilities.replaceTaskIdFromFilename(Utilities.getTaskId(hconf), bucketNum);
}
if (isNativeTable) {
fsp.finalPaths[filesIdx] = fsp.getFinalPath(taskId);
LOG.info("Final Path: FS " + fsp.finalPaths[filesIdx]);
fsp.outPaths[filesIdx] = fsp.getTaskOutPath(taskId);
LOG.info("Writing to temp file: FS " + fsp.outPaths[filesIdx]);
} else {
fsp.finalPaths[filesIdx] = fsp.outPaths[filesIdx] = specPath;
}
try {
// The reason to keep these instead of using
// OutputFormat.getRecordWriter() is that
// getRecordWriter does not give us enough control over the file name that
// we create.
String extension = Utilities.getFileExtension(jc, isCompressed,
hiveOutputFormat);
if (!bDynParts) {
fsp.finalPaths[filesIdx] = fsp.getFinalPath(taskId, parent, extension);
} else {
fsp.finalPaths[filesIdx] = fsp.getFinalPath(taskId, fsp.tmpPath, extension);
}
} catch (Exception e) {
e.printStackTrace();
throw new HiveException(e);
}
LOG.info("New Final Path: FS " + fsp.finalPaths[filesIdx]);
if (isNativeTable) {
try {
// in recent hadoop versions, use deleteOnExit to clean tmp files.
autoDelete = ShimLoader.getHadoopShims().fileSystemDeleteOnExit(
fs, fsp.outPaths[filesIdx]);
} catch (IOException e) {
throw new HiveException(e);
}
}
Utilities.copyTableJobPropertiesToConf(conf.getTableInfo(), jc);
// only create bucket files only if no dynamic partitions,
// buckets of dynamic partitions will be created for each newly created partition
fsp.outWriters[filesIdx] = HiveFileFormatUtils.getHiveRecordWriter(
jc, conf.getTableInfo(), outputClass, conf, fsp.outPaths[filesIdx]);
// increment the CREATED_FILES counter
if (reporter != null) {
reporter.incrCounter(ProgressCounter.CREATED_FILES, 1);
}
filesIdx++;
}
assert filesIdx == numFiles;
// in recent hadoop versions, use deleteOnExit to clean tmp files.
if (isNativeTable) {
autoDelete = ShimLoader.getHadoopShims().fileSystemDeleteOnExit(fs, fsp.outPaths[0]);
}
} catch (HiveException e) {
throw e;
} catch (Exception e) {
e.printStackTrace();
throw new HiveException(e);
}
filesCreated = true;
}
/**
* Report status to JT so that JT won't kill this task if closing takes too long
* due to too many files to close and the NN is overloaded.
*
* @param lastUpdateTime
* the time (msec) that progress update happened.
* @return true if a new progress update is reported, false otherwise.
*/
private boolean updateProgress() {
if (reporter != null &&
(System.currentTimeMillis() - lastProgressReport) > timeOut) {
reporter.progress();
lastProgressReport = System.currentTimeMillis();
return true;
} else {
return false;
}
}
Writable recordValue;
@Override
public void processOp(Object row, int tag) throws HiveException {
if (!bDynParts && !filesCreated) {
createBucketFiles(fsp);
}
// Since File Sink is a terminal operator, forward is not called - so,
// maintain the number of output rows explicitly
if (counterNameToEnum != null) {
++outputRows;
if (outputRows % 1000 == 0) {
incrCounter(numOutputRowsCntr, outputRows);
outputRows = 0;
}
}
try {
updateProgress();
// if DP is enabled, get the final output writers and prepare the real output row
assert inputObjInspectors[0].getCategory() == ObjectInspector.Category.STRUCT : "input object inspector is not struct";
FSPaths fpaths;
if (bDynParts) {
// copy the DP column values from the input row to dpVals
dpVals.clear();
dpWritables.clear();
ObjectInspectorUtils.partialCopyToStandardObject(dpWritables, row, dpStartCol, numDynParts,
(StructObjectInspector) inputObjInspectors[0], ObjectInspectorCopyOption.WRITABLE);
// get a set of RecordWriter based on the DP column values
// pass the null value along to the escaping process to determine what the dir should be
for (Object o : dpWritables) {
if (o == null || o.toString().length() == 0) {
dpVals.add(dpCtx.getDefaultPartitionName());
} else {
dpVals.add(o.toString());
}
}
// use SubStructObjectInspector to serialize the non-partitioning columns in the input row
recordValue = serializer.serialize(row, subSetOI);
fpaths = getDynOutPaths(dpVals);
} else {
fpaths = fsp;
// use SerDe to serialize r, and write it out
recordValue = serializer.serialize(row, inputObjInspectors[0]);
}
rowOutWriters = fpaths.outWriters;
if (conf.isGatherStats()) {
if (HiveConf.getBoolVar(hconf, HiveConf.ConfVars.HIVE_STATS_COLLECT_RAWDATASIZE)) {
SerDeStats stats = serializer.getSerDeStats();
if (stats != null) {
fpaths.stat.addToStat(StatsSetupConst.RAW_DATA_SIZE, stats.getRawDataSize());
}
}
fpaths.stat.addToStat(StatsSetupConst.ROW_COUNT, 1);
}
if (row_count != null) {
row_count.set(row_count.get() + 1);
}
if (!multiFileSpray) {
rowOutWriters[0].write(recordValue);
} else {
int keyHashCode = 0;
for (int i = 0; i < partitionEval.length; i++) {
Object o = partitionEval[i].evaluate(row);
keyHashCode = keyHashCode * 31
+ ObjectInspectorUtils.hashCode(o, partitionObjectInspectors[i]);
}
key.setHashCode(keyHashCode);
int bucketNum = prtner.getBucket(key, null, totalFiles);
int idx = bucketMap.get(bucketNum);
rowOutWriters[idx].write(recordValue);
}
} catch (IOException e) {
throw new HiveException(e);
} catch (SerDeException e) {
throw new HiveException(e);
}
}
private FSPaths getDynOutPaths(List<String> row) throws HiveException {
FSPaths fp;
// get the path corresponding to the dynamic partition columns,
String dpDir = getDynPartDirectory(row, dpColNames, numDynParts);
if (dpDir != null) {
FSPaths fsp2 = valToPaths.get(dpDir);
if (fsp2 == null) {
// check # of dp
if (valToPaths.size() > maxPartitions) {
// throw fatal error
incrCounter(fatalErrorCntr, 1);
fatalError = true;
LOG.error("Fatal error was thrown due to exceeding number of dynamic partitions");
}
fsp2 = new FSPaths(specPath);
fsp2.tmpPath = new Path(fsp2.tmpPath, dpDir);
fsp2.taskOutputTempPath = new Path(fsp2.taskOutputTempPath, dpDir);
createBucketFiles(fsp2);
valToPaths.put(dpDir, fsp2);
}
fp = fsp2;
} else {
fp = fsp;
}
return fp;
}
// given the current input row, the mapping for input col info to dp columns, and # of dp cols,
// return the relative path corresponding to the row.
// e.g., ds=2008-04-08/hr=11
private String getDynPartDirectory(List<String> row, List<String> dpColNames, int numDynParts) {
assert row.size() == numDynParts && numDynParts == dpColNames.size() : "data length is different from num of DP columns";
return FileUtils.makePartName(dpColNames, row);
}
@Override
protected void fatalErrorMessage(StringBuilder errMsg, long counterCode) {
errMsg.append("Operator ").append(getOperatorId()).append(" (id=").append(id).append("): ");
errMsg.append(counterCode > FATAL_ERR_MSG.length - 1 ?
"fatal error." :
FATAL_ERR_MSG[(int) counterCode]);
// number of partitions exceeds limit, list all the partition names
if (counterCode > 0) {
errMsg.append(lsDir());
}
}
// sample the partitions that are generated so that users have a sense of what's causing the error
private String lsDir() {
String specPath = conf.getDirName();
// need to get a JobConf here because it's not passed through at client side
JobConf jobConf = new JobConf(ExecDriver.class);
Path tmpPath = Utilities.toTempPath(specPath);
StringBuilder sb = new StringBuilder("\n");
try {
DynamicPartitionCtx dpCtx = conf.getDynPartCtx();
int numDP = dpCtx.getNumDPCols();
FileSystem fs = tmpPath.getFileSystem(jobConf);
FileStatus[] status = Utilities.getFileStatusRecurse(tmpPath, numDP, fs);
sb.append("Sample of ")
.append(Math.min(status.length, 100))
.append(" partitions created under ")
.append(tmpPath.toString())
.append(":\n");
for (int i = 0; i < status.length; ++i) {
sb.append("\t.../");
sb.append(getPartitionSpec(status[i].getPath(), numDP))
.append("\n");
}
sb.append("...\n");
} catch (Exception e) {
// cannot get the subdirectories, just return the root directory
sb.append(tmpPath).append("...\n").append(e.getMessage());
e.printStackTrace();
} finally {
return sb.toString();
}
}
private String getPartitionSpec(Path path, int level) {
Stack<String> st = new Stack<String>();
Path p = path;
for (int i = 0; i < level; ++i) {
st.push(p.getName());
p = p.getParent();
}
StringBuilder sb = new StringBuilder();
while (!st.empty()) {
sb.append(st.pop());
}
return sb.toString();
}
@Override
public void closeOp(boolean abort) throws HiveException {
if (!bDynParts && !filesCreated) {
createBucketFiles(fsp);
}
lastProgressReport = System.currentTimeMillis();
if (!abort) {
for (FSPaths fsp : valToPaths.values()) {
fsp.closeWriters(abort);
if (isNativeTable) {
fsp.commit(fs);
}
}
// Only publish stats if this operator's flag was set to gather stats
if (conf.isGatherStats()) {
publishStats();
}
} else {
// Will come here if an Exception was thrown in map() or reduce().
// Hadoop always call close() even if an Exception was thrown in map() or
// reduce().
for (FSPaths fsp : valToPaths.values()) {
fsp.abortWriters(fs, abort, !autoDelete && isNativeTable);
}
}
}
/**
* @return the name of the operator
*/
@Override
public String getName() {
return "FS";
}
@Override
public void jobClose(Configuration hconf, boolean success, JobCloseFeedBack feedBack)
throws HiveException {
try {
if ((conf != null) && isNativeTable) {
String specPath = conf.getDirName();
DynamicPartitionCtx dpCtx = conf.getDynPartCtx();
Utilities.mvFileToFinalPath(specPath, hconf, success, LOG, dpCtx, conf);
}
} catch (IOException e) {
throw new HiveException(e);
}
super.jobClose(hconf, success, feedBack);
}
@Override
public OperatorType getType() {
return OperatorType.FILESINK;
}
@Override
public void augmentPlan() {
PlanUtils.configureOutputJobPropertiesForStorageHandler(
getConf().getTableInfo());
}
public void checkOutputSpecs(FileSystem ignored, JobConf job) throws IOException {
if (hiveOutputFormat == null) {
try {
hiveOutputFormat = conf.getTableInfo().getOutputFileFormatClass().newInstance();
} catch (Exception ex) {
throw new IOException(ex);
}
}
Utilities.copyTableJobPropertiesToConf(conf.getTableInfo(), job);
if (conf.getTableInfo().isNonNative()) {
//check the ouput specs only if it is a storage handler (native tables's outputformats does
//not set the job's output properties correctly)
try {
hiveOutputFormat.checkOutputSpecs(ignored, job);
} catch (NoSuchMethodError e) {
//For BC, ignore this for now, but leave a log message
LOG.warn("HiveOutputFormat should implement checkOutputSpecs() method`");
}
}
}
private void publishStats() {
// Initializing a stats publisher
StatsPublisher statsPublisher = Utilities.getStatsPublisher(jc);
if (statsPublisher == null) {
// just return, stats gathering should not block the main query
LOG.error("StatsPublishing error: StatsPublisher is not initialized.");
return;
}
if (!statsPublisher.connect(hconf)) {
// just return, stats gathering should not block the main query
LOG.error("StatsPublishing error: cannot connect to database");
return;
}
String taskID = Utilities.getTaskIdFromFilename(Utilities.getTaskId(hconf));
String spSpec = conf.getStaticSpec() != null ? conf.getStaticSpec() : "";
for (String fspKey : valToPaths.keySet()) {
FSPaths fspValue = valToPaths.get(fspKey);
String key;
// construct the key(fileID) to insert into the intermediate stats table
if (fspKey == "") {
// for non-partitioned/static partitioned table, the key for temp storage is
// common key prefix + static partition spec + taskID
key = conf.getStatsAggPrefix() + spSpec + taskID;
} else {
// for partitioned table, the key is
// common key prefix + static partition spec + DynamicPartSpec + taskID
key = conf.getStatsAggPrefix() + spSpec + fspKey + Path.SEPARATOR + taskID;
}
Map<String, String> statsToPublish = new HashMap<String, String>();
for (String statType : fspValue.stat.getStoredStats()) {
statsToPublish.put(statType, Long.toString(fspValue.stat.getStat(statType)));
}
statsPublisher.publishStat(key, statsToPublish);
}
statsPublisher.closeConnection();
}
}