private static void logSnapshotCompleteToZK(
long txnId,
boolean snapshotSuccess,
Map<String, Map<Integer, Pair<Long, Long>>> exportSequenceNumbers) {
ZooKeeper zk = VoltDB.instance().getHostMessenger().getZK();
// Timeout after 10 minutes
final long endTime = System.currentTimeMillis() + TimeUnit.MINUTES.toMillis(10);
final String snapshotPath = VoltZK.completed_snapshots + "/" + txnId;
boolean success = false;
while (!success) {
if (System.currentTimeMillis() > endTime) {
VoltDB.crashLocalVoltDB("Timed out logging snapshot completion to ZK");
}
Stat stat = new Stat();
byte data[] = null;
try {
data = zk.getData(snapshotPath, false, stat);
} catch (NoNodeException e) {
// The MPI creates the snapshot completion node asynchronously,
// if the node doesn't exist yet, retry
continue;
} catch (Exception e) {
VoltDB.crashLocalVoltDB("This ZK get should never fail", true, e);
}
if (data == null) {
VoltDB.crashLocalVoltDB("Data should not be null if the node exists", false, null);
}
try {
JSONObject jsonObj = new JSONObject(new String(data, "UTF-8"));
if (jsonObj.getLong("txnId") != txnId) {
VoltDB.crashLocalVoltDB("TxnId should match", false, null);
}
int remainingHosts = jsonObj.getInt("hostCount") - 1;
jsonObj.put("hostCount", remainingHosts);
jsonObj.put("didSucceed", snapshotSuccess);
if (!snapshotSuccess) {
jsonObj.put("isTruncation", false);
}
mergeExportSequenceNumbers(jsonObj, exportSequenceNumbers);
zk.setData(snapshotPath, jsonObj.toString(4).getBytes("UTF-8"), stat.getVersion());
} catch (KeeperException.BadVersionException e) {
continue;
} catch (Exception e) {
VoltDB.crashLocalVoltDB("This ZK call should never fail", true, e);
}
success = true;
}
/*
* If we are running without command logging there will be no consumer for
* the completed snapshot messages. Consume them here to bound space usage in ZK.
*/
try {
TreeSet<String> snapshots = new TreeSet<String>(zk.getChildren(VoltZK.completed_snapshots, false));
while (snapshots.size() > 30) {
try {
zk.delete(VoltZK.completed_snapshots + "/" + snapshots.first(), -1);
} catch (NoNodeException e) {}
catch (Exception e) {
VoltDB.crashLocalVoltDB(
"Deleting a snapshot completion record from ZK should only fail with NoNodeException", true, e);
}