assert(catalog_stmt != null);
Procedure catalog_proc = catalog_stmt.getParent();
if (catalog_proc.equals(ts.getProcedure()) == false) {
LOG.warn(ts.debug() + "\n" + allFragmentBuilders + "\n---- INVALID ----\n" + frag);
String msg = String.format("%s - Unexpected %s", ts, catalog_frag.fullName());
throw new ServerFaultException(msg, ts.getTransactionId());
}
}
} // FOR
if (has_remote == false) {
LOG.warn(ts.debug() + "\n" + allFragmentBuilders);
String msg = ts + "Trying to execute all local single-partition queries using the slow-path!";
throw new ServerFaultException(msg, ts.getTransactionId());
}
}
boolean first = true;
boolean serializedParams = false;
CountDownLatch latch = null;
boolean all_local = true;
boolean is_localSite;
boolean is_localPartition;
boolean is_localReadOnly = true;
int num_localPartition = 0;
int num_localSite = 0;
int num_remote = 0;
int num_skipped = 0;
int total = 0;
Collection<WorkFragment.Builder> fragmentBuilders = allFragmentBuilders;
// Make sure our txn is in our DependencyTracker
if (trace.val)
LOG.trace(String.format("%s - Added transaction to %s",
ts, this.depTracker.getClass().getSimpleName()));
this.depTracker.addTransaction(ts);
// Count the number of fragments that we're going to send to each partition and
// figure out whether the txn will always be read-only at this partition
tmp_fragmentsPerPartition.clearValues();
for (WorkFragment.Builder fragmentBuilder : allFragmentBuilders) {
int partition = fragmentBuilder.getPartitionId();
tmp_fragmentsPerPartition.put(partition);
if (this.partitionId == partition && fragmentBuilder.getReadOnly() == false) {
is_localReadOnly = false;
}
} // FOR
long undoToken = this.calculateNextUndoToken(ts, is_localReadOnly);
ts.initFirstRound(undoToken, batchSize);
final boolean predict_singlePartition = ts.isPredictSinglePartition();
// Calculate whether we are finished with partitions now
final Estimate lastEstimate = ts.getLastEstimate();
DonePartitionsNotification notify = null;
if (hstore_conf.site.exec_early_prepare && ts.isSysProc() == false && ts.allowEarlyPrepare()) {
notify = this.computeDonePartitions(ts, lastEstimate, tmp_fragmentsPerPartition, finalTask);
if (notify != null && notify.hasSitesToNotify())
this.notifyDonePartitions(ts, notify);
}
// Attach the ParameterSets to our transaction handle so that anybody on this HStoreSite
// can access them directly without needing to deserialize them from the WorkFragments
ts.attachParameterSets(batchParams);
// Now if we have some work sent out to other partitions, we need to wait until they come back
// In the first part, we wait until all of our blocked WorkFragments become unblocked
final BlockingDeque<Collection<WorkFragment.Builder>> queue = this.depTracker.getUnblockedWorkFragmentsQueue(ts);
// Run through this loop if:
// (1) We have no pending errors
// (2) This is our first time in the loop (first == true)
// (3) If we know that there are still messages being blocked
// (4) If we know that there are still unblocked messages that we need to process
// (5) The latch for this round is still greater than zero
while (ts.hasPendingError() == false &&
(first == true || this.depTracker.stillHasWorkFragments(ts) || (latch != null && latch.getCount() > 0))) {
if (trace.val)
LOG.trace(String.format("%s - %s loop [first=%s, stillHasWorkFragments=%s, latch=%s]",
ts, ClassUtil.getCurrentMethodName(),
first, this.depTracker.stillHasWorkFragments(ts), queue.size(), latch));
// If this is the not first time through the loop, then poll the queue
// to get our list of fragments
if (first == false) {
all_local = true;
is_localSite = false;
is_localPartition = false;
num_localPartition = 0;
num_localSite = 0;
num_remote = 0;
num_skipped = 0;
total = 0;
if (trace.val)
LOG.trace(String.format("%s - Waiting for unblocked tasks on partition %d",
ts, this.partitionId));
fragmentBuilders = queue.poll(); // NON-BLOCKING
// If we didn't get back a list of fragments here, then we will spin through
// and invoke utilityWork() to try to do something useful until what we need shows up
if (needs_profiling) ts.profiler.startExecDtxnWork();
if (hstore_conf.site.exec_profiling) this.profiler.sp1_time.start();
try {
while (fragmentBuilders == null) {
// If there is more work that we could do, then we'll just poll the queue
// without waiting so that we can go back and execute it again if we have
// more time.
if (this.utilityWork()) {
fragmentBuilders = queue.poll();
}
// Otherwise we will wait a little so that we don't spin the CPU
else {
fragmentBuilders = queue.poll(WORK_QUEUE_POLL_TIME, TimeUnit.MILLISECONDS);
}
} // WHILE
} catch (InterruptedException ex) {
if (this.hstore_site.isShuttingDown() == false) {
LOG.error(String.format("%s - We were interrupted while waiting for blocked tasks", ts), ex);
}
return (null);
} finally {
if (needs_profiling) ts.profiler.stopExecDtxnWork();
if (hstore_conf.site.exec_profiling) this.profiler.sp1_time.stopIfStarted();
}
}
assert(fragmentBuilders != null);
// If the list to fragments unblock is empty, then we
// know that we have dispatched all of the WorkFragments for the
// transaction's current SQLStmt batch. That means we can just wait
// until all the results return to us.
if (fragmentBuilders.isEmpty()) {
if (trace.val)
LOG.trace(String.format("%s - Got an empty list of WorkFragments at partition %d. " +
"Blocking until dependencies arrive",
ts, this.partitionId));
break;
}
this.tmp_localWorkFragmentBuilders.clear();
if (predict_singlePartition == false) {
this.tmp_remoteFragmentBuilders.clear();
this.tmp_localSiteFragmentBuilders.clear();
}
// -------------------------------
// FAST PATH: Assume everything is local
// -------------------------------
if (predict_singlePartition) {
for (WorkFragment.Builder fragmentBuilder : fragmentBuilders) {
if (first == false || this.depTracker.addWorkFragment(ts, fragmentBuilder, batchParams)) {
this.tmp_localWorkFragmentBuilders.add(fragmentBuilder);
total++;
num_localPartition++;
}
} // FOR
// We have to tell the transaction handle to start the round before we send off the
// WorkFragments for execution, since they might start executing locally!
if (first) {
ts.startRound(this.partitionId);
latch = this.depTracker.getDependencyLatch(ts);
}
// Execute all of our WorkFragments quickly at our local ExecutionEngine
for (WorkFragment.Builder fragmentBuilder : this.tmp_localWorkFragmentBuilders) {
if (debug.val)
LOG.debug(String.format("%s - Got unblocked %s to execute locally",
ts, fragmentBuilder.getClass().getSimpleName()));
assert(fragmentBuilder.getPartitionId() == this.partitionId) :
String.format("Trying to process %s for %s on partition %d but it should have been " +
"sent to partition %d [singlePartition=%s]\n%s",
fragmentBuilder.getClass().getSimpleName(), ts, this.partitionId,
fragmentBuilder.getPartitionId(), predict_singlePartition, fragmentBuilder);
WorkFragment fragment = fragmentBuilder.build();
this.processWorkFragment(ts, fragment, batchParams);
} // FOR
}
// -------------------------------
// SLOW PATH: Mixed local and remote messages
// -------------------------------
else {
// Look at each task and figure out whether it needs to be executed at a remote
// HStoreSite or whether we can execute it at one of our local PartitionExecutors.
for (WorkFragment.Builder fragmentBuilder : fragmentBuilders) {
int partition = fragmentBuilder.getPartitionId();
is_localSite = hstore_site.isLocalPartition(partition);
is_localPartition = (partition == this.partitionId);
all_local = all_local && is_localPartition;
// If this is the last WorkFragment that we're going to send to this partition for
// this batch, then we will want to check whether we know that this is the last
// time this txn will ever need to go to that txn. If so, then we'll want to
if (notify != null && notify.donePartitions.contains(partition) &&
tmp_fragmentsPerPartition.dec(partition) == 0) {
if (debug.val)
LOG.debug(String.format("%s - Setting last fragment flag in %s for partition %d",
ts, WorkFragment.class.getSimpleName(), partition));
fragmentBuilder.setLastFragment(true);
}
if (first == false || this.depTracker.addWorkFragment(ts, fragmentBuilder, batchParams)) {
total++;
// At this point we know that all the WorkFragment has been registered
// in the LocalTransaction, so then it's safe for us to look to see
// whether we already have a prefetched result that we need
// if (prefetch && is_localPartition == false) {
// boolean skip_queue = true;
// for (int i = 0, cnt = fragmentBuilder.getFragmentIdCount(); i < cnt; i++) {
// int fragId = fragmentBuilder.getFragmentId(i);
// int paramIdx = fragmentBuilder.getParamIndex(i);
//
// VoltTable vt = this.queryCache.getResult(ts.getTransactionId(),
// fragId,
// partition,
// parameters[paramIdx]);
// if (vt != null) {
// if (trace.val)
// LOG.trace(String.format("%s - Storing cached result from partition %d for fragment %d",
// ts, partition, fragId));
// this.depTracker.addResult(ts, partition, fragmentBuilder.getOutputDepId(i), vt);
// } else {
// skip_queue = false;
// }
// } // FOR
// // If we were able to get cached results for all of the fragmentIds in
// // this WorkFragment, then there is no need for us to send the message
// // So we'll just skip queuing it up! How nice!
// if (skip_queue) {
// if (debug.val)
// LOG.debug(String.format("%s - Using prefetch result for all fragments from partition %d",
// ts, partition));
// num_skipped++;
// continue;
// }
// }
// Otherwise add it to our list of WorkFragments that we want
// queue up right now
if (is_localPartition) {
is_localReadOnly = (is_localReadOnly && fragmentBuilder.getReadOnly());
this.tmp_localWorkFragmentBuilders.add(fragmentBuilder);
num_localPartition++;
} else if (is_localSite) {
this.tmp_localSiteFragmentBuilders.add(fragmentBuilder);
num_localSite++;
} else {
this.tmp_remoteFragmentBuilders.add(fragmentBuilder);
num_remote++;
}
}
} // FOR
assert(total == (num_remote + num_localSite + num_localPartition + num_skipped)) :
String.format("Total:%d / Remote:%d / LocalSite:%d / LocalPartition:%d / Skipped:%d",
total, num_remote, num_localSite, num_localPartition, num_skipped);
// We have to tell the txn to start the round before we send off the
// WorkFragments for execution, since they might start executing locally!
if (first) {
ts.startRound(this.partitionId);
latch = this.depTracker.getDependencyLatch(ts);
}
// Now request the fragments that aren't local
// We want to push these out as soon as possible
if (num_remote > 0) {
// We only need to serialize the ParameterSets once
if (serializedParams == false) {
if (needs_profiling) ts.profiler.startSerialization();
tmp_serializedParams.clear();
for (int i = 0; i < batchParams.length; i++) {
if (batchParams[i] == null) {
tmp_serializedParams.add(ByteString.EMPTY);
} else {
this.fs.clear();
try {
batchParams[i].writeExternal(this.fs);
ByteString bs = ByteString.copyFrom(this.fs.getBBContainer().b);
tmp_serializedParams.add(bs);
} catch (Exception ex) {
String msg = "Failed to serialize ParameterSet " + i + " for " + ts;
throw new ServerFaultException(msg, ex, ts.getTransactionId());
}
}
} // FOR
if (needs_profiling) ts.profiler.stopSerialization();
}
//if (trace.val)
// LOG.trace(String.format("%s - Requesting %d %s to be executed on remote partitions " +
// "[doneNotifications=%s]",
// ts, WorkFragment.class.getSimpleName(), num_remote, notify!=null));
this.requestWork(ts, tmp_remoteFragmentBuilders, tmp_serializedParams, notify);
if (needs_profiling) ts.profiler.markRemoteQuery();
}
// Then dispatch the task that are needed at the same HStoreSite but
// at a different partition than this one
if (num_localSite > 0) {
if (trace.val)
LOG.trace(String.format("%s - Executing %d WorkFragments on local site's partitions",
ts, num_localSite));
for (WorkFragment.Builder builder : this.tmp_localSiteFragmentBuilders) {
PartitionExecutor other = hstore_site.getPartitionExecutor(builder.getPartitionId());
other.queueWork(ts, builder.build());
} // FOR
if (needs_profiling) ts.profiler.markRemoteQuery();
}
// Then execute all of the tasks need to access the partitions at this HStoreSite
// We'll dispatch the remote-partition-local-site fragments first because they're going
// to need to get queued up by at the other PartitionExecutors
if (num_localPartition > 0) {
if (trace.val)
LOG.trace(String.format("%s - Executing %d WorkFragments on local partition",
ts, num_localPartition));
for (WorkFragment.Builder fragmentBuilder : this.tmp_localWorkFragmentBuilders) {
this.processWorkFragment(ts, fragmentBuilder.build(), batchParams);
} // FOR
}
}
if (trace.val)
LOG.trace(String.format("%s - Dispatched %d WorkFragments " +
"[remoteSite=%d, localSite=%d, localPartition=%d]",
ts, total, num_remote, num_localSite, num_localPartition));
first = false;
} // WHILE
this.fs.getBBContainer().discard();
if (trace.val)
LOG.trace(String.format("%s - BREAK OUT [first=%s, stillHasWorkFragments=%s, latch=%s]",
ts, first, this.depTracker.stillHasWorkFragments(ts), latch));
// assert(ts.stillHasWorkFragments() == false) :
// String.format("Trying to block %s before all of its WorkFragments have been dispatched!\n%s\n%s",
// ts,
// StringUtil.join("** ", "\n", tempDebug),
// this.getVoltProcedure(ts.getProcedureName()).getLastBatchPlan());
// Now that we know all of our WorkFragments have been dispatched, we can then
// wait for all of the results to come back in.
if (latch == null) latch = this.depTracker.getDependencyLatch(ts);
assert(latch != null) :
String.format("Unexpected null dependency latch for " + ts);
if (latch.getCount() > 0) {
if (debug.val) {
LOG.debug(String.format("%s - All blocked messages dispatched. Waiting for %d dependencies",
ts, latch.getCount()));
if (trace.val) LOG.trace(ts.toString());
}
boolean timeout = false;
long startTime = EstTime.currentTimeMillis();
if (needs_profiling) ts.profiler.startExecDtxnWork();
if (hstore_conf.site.exec_profiling) this.profiler.sp1_time.start();
try {
while (latch.getCount() > 0 && ts.hasPendingError() == false) {
if (this.utilityWork() == false) {
timeout = latch.await(WORK_QUEUE_POLL_TIME, TimeUnit.MILLISECONDS);
if (timeout == false) break;
}
if ((EstTime.currentTimeMillis() - startTime) > hstore_conf.site.exec_response_timeout) {
timeout = true;
break;
}
} // WHILE
} catch (InterruptedException ex) {
if (this.hstore_site.isShuttingDown() == false) {
LOG.error(String.format("%s - We were interrupted while waiting for results", ts), ex);
}
timeout = true;
} catch (Throwable ex) {
String msg = String.format("Fatal error for %s while waiting for results", ts);
throw new ServerFaultException(msg, ex);
} finally {
if (needs_profiling) ts.profiler.stopExecDtxnWork();
if (hstore_conf.site.exec_profiling) this.profiler.sp1_time.stopIfStarted();
}
if (timeout && this.isShuttingDown() == false) {
LOG.warn(String.format("Still waiting for responses for %s after %d ms [latch=%d]\n%s",
ts, hstore_conf.site.exec_response_timeout, latch.getCount(), ts.debug()));
LOG.warn("Procedure Parameters:\n" + ts.getProcedureParameters());
hstore_conf.site.exec_profiling = true;
LOG.warn(hstore_site.statusSnapshot());
String msg = "The query responses for " + ts + " never arrived!";
throw new ServerFaultException(msg, ts.getTransactionId());
}
}
// Update done partitions
if (notify != null && notify.donePartitions.isEmpty() == false) {
if (debug.val)