NodeDiscoveryType discovery = (NodeDiscoveryType)options.get(DISCOVER);
// connect to src cluster.
String[] srcParts = options.get(SRC).toString().split(":", -1);
final AstyanaxContext<Keyspace> srcContext = connect(srcParts[0], Integer.parseInt(srcParts[1]), srcParts[2], readThreads, discovery);
final Keyspace srcKeyspace = srcContext.getEntity();
// connect to dst cluster.
String[] dstParts = options.get(DST).toString().split(":", -1);
final AstyanaxContext<Keyspace> dstContext = connect(dstParts[0], Integer.parseInt(dstParts[1]), dstParts[2], writeThreads, discovery);
final Keyspace dstKeyspace = dstContext.getEntity();
final AtomicLong columnsTransferred = new AtomicLong(0);
final long startClockTime = nowInSeconds();
// establish column range.
final ByteBufferRange range = new RangeBuilder()
.setStart((Long) options.get(FROM))
.setEnd((Long) options.get(TO)).build();
// create a threadpool that will write stuff into the destination.
final ThreadPoolExecutor destWriteExecutor = new ThreadPoolExecutor(writeThreads, writeThreads,
0L, TimeUnit.MILLISECONDS,
new LinkedBlockingQueue<Runnable>());
// this threadpool ensures single-threaded output statements.
final ThreadPoolExecutor postExecutor = new ThreadPoolExecutor(1, 1, 0L, TimeUnit.MILLISECONDS, new LinkedBlockingQueue<Runnable>());
// this threadpool performs verifications.
final ThreadPoolExecutor verifyExecutor = new ThreadPoolExecutor(1, 1, 0L, TimeUnit.MILLISECONDS, new LinkedBlockingQueue<Runnable>());
// keep track of the number of keys that have been copied.
final AtomicInteger processedKeys = new AtomicInteger(0);
// keep track of the number of keys that have been iterated over (includes skips).
final AtomicInteger iteratedKeys = new AtomicInteger(0);
final AtomicBoolean skipping = new AtomicBoolean(true);
// sentinal that indicates it is time to stop doing everything.
final AtomicBoolean stopAll = new AtomicBoolean(false);
final AtomicLong heartbeat = new AtomicLong(System.currentTimeMillis());
final boolean verify = (Boolean)options.get(VERIFY);
final Random random = new Random(System.nanoTime());
// indicate what's going to happen.
out.println(String.format("Will process roughly %d keys from %s to %s for dates %s to %s",
keyLimit,
options.get(SRC),
options.get(DST),
new Date((Long)options.get(FROM)),
new Date((Long)options.get(TO))));
if (skip > 0) {
out.println("Be patient while I skip " + skip + " keys");
}
try {
final ColumnFamily<Locator, Long> columnFamily = (ColumnFamily<Locator, Long>)options.get(COLUMN_FAMILY);
// when we skip, we'd like the batch size to be larger. that way, if the user specified a small batch size,
// we don't spend a lot of time iterating through nothing. this should reduce the number of rount trips to
// one per thread.
int realizedBatchSize = batchSize;
if (skip > 0 && iteratedKeys.get() < skip) {
realizedBatchSize = skip / readThreads;
}
realizedBatchSize = Math.min(5000, realizedBatchSize); // no too big though.
// we have no way of knowing when we've processed all rows (no callbacks or anthying); this thread makes
// sure that work is being done. when it sees a 30s period of nothing, it shuts things down.
new Thread("Stagnation") {
public void run() {
while (!destWriteExecutor.isShutdown()) {
if (System.currentTimeMillis() - heartbeat.get() > 30*1000) {
if (!skipping.get()) {
out.println("It looks like we're done");
destWriteExecutor.shutdown();
postExecutor.shutdown();
verifyExecutor.shutdown();
srcContext.shutdown();
dstContext.shutdown();
break;
}
}
try { sleep(1000L); } catch (Exception ex) {}
}
}
}.start();
// get all the data.
srcKeyspace.prepareQuery(columnFamily)
.getAllRows()
.setRowLimit(realizedBatchSize)
.setRepeatLastToken(false)
.withColumnRange(range)
.setConcurrencyLevel(readThreads)
.executeWithCallback(new RowCallback<Locator, Long>() {
@Override
public void success(Rows<Locator, Long> rows) {
if (skipping.get()) {
out.println(String.format("skipping... " + iteratedKeys.get()));
}
for (Locator locator : rows.getKeys()) {
// short circuit quit if wee need to.
if (stopAll.get()) break;
// do skipping if needed.
int overallKey = iteratedKeys.incrementAndGet();
if (overallKey < skip) {
continue;
}
skipping.set(false);
// shut things down when we reach the key limit.
if (processedKeys.get() >= keyLimit && !stopAll.get()) {
out.println("Reached key limit.");
stopAll.set(true);
destWriteExecutor.shutdownNow();
verifyExecutor.shutdownNow();
postExecutor.submit(new Runnable() {
public void run() {
srcContext.shutdown();
dstContext.shutdown();
}
});
postExecutor.shutdown();
break;
}
final Locator locatorCapture = locator;
final Row<Locator, Long> row = rows.getRow(locator);
// send copy commands to the write thread pool.
destWriteExecutor.submit(new Runnable() {
public void run() {
// back out if we've processed our quota of rows.
if (processedKeys.get() >= keyLimit) {
return;
}
// copy the column.
MutationBatch batch = dstKeyspace.prepareMutationBatch();
ColumnListMutation<Long> mutation = batch.withRow(columnFamily, locatorCapture);
assert ttl != 0;
long colCount = 0;
for (Column<Long> c : row.getColumns()) {
mutation.putColumn(c.getName(), c.getByteBufferValue(), ttl);
colCount += 1;
}
columnsTransferred.addAndGet(colCount);
// save it, submit a log message to be shown later.
try {
batch.execute();
if (verify && random.nextFloat() < VERIFY_PERCENT) {
verifyExecutor.submit(new Runnable() {public void run() {
try {
ColumnList<Long> srcData = srcKeyspace.prepareQuery(columnFamily).getKey(locatorCapture)
.withColumnRange(range)
.execute()
.getResult();
ColumnList<Long> dstData = dstKeyspace.prepareQuery(columnFamily).getKey(locatorCapture)
.withColumnRange(range)
.execute()
.getResult();
checkSameResults(srcData, dstData);