private void createMap4Merge(FileSinkOperator fsInput, GenMRProcContext ctx, String finalName) throws SemanticException {
//
// 1. create the operator tree
//
ParseContext parseCtx = ctx.getParseCtx();
FileSinkDesc fsInputDesc = fsInput.getConf();
// Create a TableScan operator
RowSchema inputRS = fsInput.getSchema();
Operator<? extends Serializable> tsMerge = OperatorFactory.get(TableScanDesc.class, inputRS);
// Create a FileSink operator
TableDesc ts = (TableDesc) fsInputDesc.getTableInfo().clone();
FileSinkDesc fsOutputDesc = new FileSinkDesc(finalName, ts,
parseCtx.getConf().getBoolVar(HiveConf.ConfVars.COMPRESSRESULT));
FileSinkOperator fsOutput = (FileSinkOperator) OperatorFactory.getAndMakeChild(
fsOutputDesc, inputRS, tsMerge);
// If the input FileSinkOperator is a dynamic partition enabled, the tsMerge input schema
// needs to include the partition column, and the fsOutput should have
// a DynamicPartitionCtx to indicate that it needs to dynamically partitioned.
DynamicPartitionCtx dpCtx = fsInputDesc.getDynPartCtx();
if (dpCtx != null && dpCtx.getNumDPCols() > 0) {
// adding DP ColumnInfo to the RowSchema signature
ArrayList<ColumnInfo> signature = inputRS.getSignature();
String tblAlias = fsInputDesc.getTableInfo().getTableName();
LinkedHashMap<String, String> colMap = new LinkedHashMap<String, String>();
StringBuilder partCols = new StringBuilder();
for (String dpCol: dpCtx.getDPColNames()) {
ColumnInfo colInfo = new ColumnInfo(dpCol,
TypeInfoFactory.stringTypeInfo, // all partition column type should be string
tblAlias, true); // partition column is virtual column
signature.add(colInfo);
colMap.put(dpCol, dpCol); // input and output have the same column name
partCols.append(dpCol).append('/');
}
partCols.setLength(partCols.length()-1); // remove the last '/'
inputRS.setSignature(signature);
// create another DynamicPartitionCtx, which has a different input-to-DP column mapping
DynamicPartitionCtx dpCtx2 = new DynamicPartitionCtx(dpCtx);
dpCtx2.setInputToDPCols(colMap);
fsOutputDesc.setDynPartCtx(dpCtx2);
// update the FileSinkOperator to include partition columns
fsInputDesc.getTableInfo().getProperties().setProperty(
org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_PARTITION_COLUMNS,
partCols.toString()); // list of dynamic partition column names
} else {
// non-partitioned table
fsInputDesc.getTableInfo().getProperties().remove(
org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_PARTITION_COLUMNS);
}
//
// 2. Constructing a conditional task consisting of a move task and a map reduce task
//
MapRedTask currTask = (MapRedTask) ctx.getCurrTask();
MoveWork dummyMv = new MoveWork(null, null, null,
new LoadFileDesc(fsInputDesc.getDirName(), finalName, true, null, null), false);
MapredWork cplan;
if(parseCtx.getConf().getBoolVar(HiveConf.ConfVars.
HIVEMERGERCFILEBLOCKLEVEL) &&
fsInputDesc.getTableInfo().getInputFileFormatClass().
equals(RCFileInputFormat.class)) {
// Check if InputFormatClass is valid
String inputFormatClass = parseCtx.getConf().
getVar(HiveConf.ConfVars.HIVEMERGEINPUTFORMATBLOCKLEVEL);
try {
Class c = (Class <? extends InputFormat>) Class.forName(inputFormatClass);
LOG.info("RCFile format- Using block level merge");