" INSERT INTO TABLE " + tblName + " PARTITION(bkt=1) " +
"SELECT a, b where a >= 2", driver);
execSelectAndDumpData("select * from " + tblName, driver, "Dumping data for " +
tblName + " after load:");
CompactionTxnHandler txnHandler = new CompactionTxnHandler(conf);
CompactionInfo ci = new CompactionInfo("default", tblName, "bkt=0", CompactionType.MAJOR);
LOG.debug("List of stats columns before analyze Part1: " + txnHandler.findColumnsWithStats(ci));
Worker.StatsUpdater su = Worker.StatsUpdater.init(ci, colNames, conf,
System.getProperty("user.name"));
su.gatherStats();//compute stats before compaction
LOG.debug("List of stats columns after analyze Part1: " + txnHandler.findColumnsWithStats(ci));
CompactionInfo ciPart2 = new CompactionInfo("default", tblName, "bkt=1", CompactionType.MAJOR);
LOG.debug("List of stats columns before analyze Part2: " + txnHandler.findColumnsWithStats(ci));
su = Worker.StatsUpdater.init(ciPart2, colNames, conf, System.getProperty("user.name"));
su.gatherStats();//compute stats before compaction
LOG.debug("List of stats columns after analyze Part2: " + txnHandler.findColumnsWithStats(ci));
//now make sure we get the stats we expect for partition we are going to add data to later
Map<String, List<ColumnStatisticsObj>> stats = msClient.getPartitionColumnStatistics(ci.dbname,
ci.tableName, Arrays.asList(ci.partName), colNames);
List<ColumnStatisticsObj> colStats = stats.get(ci.partName);
Assert.assertNotNull("No stats found for partition " + ci.partName, colStats);
Assert.assertEquals("Expected column 'a' at index 0", "a", colStats.get(0).getColName());
Assert.assertEquals("Expected column 'b' at index 1", "b", colStats.get(1).getColName());
LongColumnStatsData colAStats = colStats.get(0).getStatsData().getLongStats();
Assert.assertEquals("lowValue a", 1, colAStats.getLowValue());
Assert.assertEquals("highValue a", 1, colAStats.getHighValue());
Assert.assertEquals("numNulls a", 0, colAStats.getNumNulls());
Assert.assertEquals("numNdv a", 1, colAStats.getNumDVs());
StringColumnStatsData colBStats = colStats.get(1).getStatsData().getStringStats();
Assert.assertEquals("maxColLen b", 3, colBStats.getMaxColLen());
Assert.assertEquals("avgColLen b", 3.0, colBStats.getAvgColLen());
Assert.assertEquals("numNulls b", 0, colBStats.getNumNulls());
Assert.assertEquals("nunDVs", 2, colBStats.getNumDVs());
//now save stats for partition we won't modify
stats = msClient.getPartitionColumnStatistics(ciPart2.dbname,
ciPart2.tableName, Arrays.asList(ciPart2.partName), colNames);
colStats = stats.get(ciPart2.partName);
LongColumnStatsData colAStatsPart2 = colStats.get(0).getStatsData().getLongStats();
StringColumnStatsData colBStatsPart2 = colStats.get(1).getStatsData().getStringStats();
HiveEndPoint endPt = new HiveEndPoint(null, ci.dbname, ci.tableName, Arrays.asList("0"));
DelimitedInputWriter writer = new DelimitedInputWriter(new String[] {"a","b"},",", endPt);
/*next call will eventually end up in HiveEndPoint.createPartitionIfNotExists() which
makes an operation on Driver
* and starts it's own CliSessionState and then closes it, which removes it from ThreadLoacal;
* thus the session
* created in this class is gone after this; I fixed it in HiveEndPoint*/
StreamingConnection connection = endPt.newConnection(true);
TransactionBatch txnBatch = connection.fetchTransactionBatch(2, writer);
txnBatch.beginNextTransaction();
Assert.assertEquals(TransactionBatch.TxnState.OPEN, txnBatch.getCurrentTransactionState());
txnBatch.write("50,Kiev".getBytes());
txnBatch.write("51,St. Petersburg".getBytes());
txnBatch.write("44,Boston".getBytes());
txnBatch.commit();
txnBatch.beginNextTransaction();
txnBatch.write("52,Tel Aviv".getBytes());
txnBatch.write("53,Atlantis".getBytes());
txnBatch.write("53,Boston".getBytes());
txnBatch.commit();
txnBatch.close();
connection.close();
execSelectAndDumpData("select * from " + ci.getFullTableName(), driver, ci.getFullTableName());
//so now we have written some new data to bkt=0 and it shows up
CompactionRequest rqst = new CompactionRequest(ci.dbname, ci.tableName, CompactionType.MAJOR);
rqst.setPartitionname(ci.partName);
txnHandler.compact(rqst);
Worker t = new Worker();
t.setThreadId((int) t.getId());
t.setHiveConf(conf);
AtomicBoolean stop = new AtomicBoolean();
AtomicBoolean looped = new AtomicBoolean();
stop.set(true);
t.init(stop, looped);
t.run();
ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest());
List<ShowCompactResponseElement> compacts = rsp.getCompacts();
Assert.assertEquals(1, compacts.size());
Assert.assertEquals("ready for cleaning", compacts.get(0).getState());
stats = msClient.getPartitionColumnStatistics(ci.dbname, ci.tableName,