package edu.brown.hstore.txns;
import java.lang.reflect.Field;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeMap;
import java.util.concurrent.BlockingDeque;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.LinkedBlockingDeque;
import java.util.concurrent.locks.ReentrantLock;
import org.apache.log4j.Logger;
import org.voltdb.CatalogContext;
import org.voltdb.ParameterSet;
import org.voltdb.VoltTable;
import org.voltdb.VoltTableNonBlocking;
import org.voltdb.catalog.CatalogType;
import org.voltdb.catalog.PlanFragment;
import org.voltdb.exceptions.ServerFaultException;
import org.voltdb.utils.Pair;
import edu.brown.catalog.CatalogUtil;
import edu.brown.hstore.HStoreConstants;
import edu.brown.hstore.Hstoreservice.WorkFragment;
import edu.brown.hstore.PartitionExecutor;
import edu.brown.hstore.conf.HStoreConf;
import edu.brown.hstore.txns.AbstractTransaction.RoundState;
import edu.brown.interfaces.DebugContext;
import edu.brown.logging.LoggerUtil;
import edu.brown.logging.LoggerUtil.LoggerBoolean;
import edu.brown.utils.StringUtil;
/**
* This class is responsible for managing the input and output dependencies of distributed
* transactions. It contains logic to creating blocking data structures that are released
* once the appropriate VoltTables arrive for queries executed on remote partitions.
* @author pavlo
*
*/
public class DependencyTracker {
private static final Logger LOG = Logger.getLogger(DependencyTracker.class);
private static final LoggerBoolean debug = new LoggerBoolean();
private static final LoggerBoolean trace = new LoggerBoolean();
static {
LoggerUtil.attachObserver(LOG, debug, trace);
}
/**
* Special set to indicate that there are no more WorkFragments to be executed
*/
private static final Set<WorkFragment.Builder> EMPTY_FRAGMENT_SET = Collections.emptySet();
/**
* Internal Dependency Information
*/
private class TransactionState {
// ----------------------------------------------------------------------------
// GLOBAL DATA MEMBERS
// ----------------------------------------------------------------------------
/**
* The id of the current transaction that holds this state handle
*/
private Long txn_id;
// ----------------------------------------------------------------------------
// ROUND DATA MEMBERS
// ----------------------------------------------------------------------------
/**
* This latch will block until all the Dependency results have returned
* Generated in startRound()
*/
private CountDownLatch dependency_latch;
/**
* Mapping from DependencyId to the corresponding DependencyInfo object
* Map<DependencyId, DependencyInfo>
*/
private final Map<Integer, DependencyInfo> dependencies = new HashMap<Integer, DependencyInfo>();
/**
* Final result output dependencies. Each position in the list represents a single Statement
*/
private final List<Integer> output_order = new ArrayList<Integer>();
/**
* Sometimes we will get results back while we are still queuing up the rest of the tasks and
* haven't started the next round. So we need a temporary space where we can put these guys until
* we start the round. Otherwise calculating the proper latch count is tricky
* Partition-DependencyId Key -> VoltTable
*/
private final Map<Pair<Integer, Integer>, VoltTable> queued_results = new LinkedHashMap<Pair<Integer,Integer>, VoltTable>();
/**
* Blocked FragmentTaskMessages
*/
private final List<WorkFragment.Builder> blocked_tasks = new ArrayList<WorkFragment.Builder>();
/**
* Unblocked FragmentTaskMessages
* The VoltProcedure thread will block on this queue waiting for tasks to execute inside of ExecutionSite
* This has to be a set so that we make sure that we only submit a single message that contains all of the tasks to the Dtxn.Coordinator
*/
private final BlockingDeque<Collection<WorkFragment.Builder>> unblocked_tasks = new LinkedBlockingDeque<Collection<WorkFragment.Builder>>();
/**
* Whether the current transaction still has outstanding WorkFragments that it
* needs to execute or get back dependencies from
*/
private boolean still_has_tasks = true;
/**
* The total # of dependencies this Transaction is waiting for in the current round
*/
private int dependency_ctr = 0;
/**
* The total # of dependencies received thus far in the current round
*/
private int received_ctr = 0;
// ----------------------------------------------------------------------------
// PREFETCH QUERY DATA
// ----------------------------------------------------------------------------
// private QueryTracker prefetch_tracker;
/**
* SQLStmt Counter -> FragmentId -> DependencyInfo
*/
private Map<Integer, Map<Integer, DependencyInfo>> prefetch_dependencies;
/**
* The total # of WorkFragments that the txn prefetched
*/
private int prefetch_ctr = 0;
// ----------------------------------------------------------------------------
// INITIALIZATION
// ----------------------------------------------------------------------------
private TransactionState(LocalTransaction ts) {
this.txn_id = ts.getTransactionId();
if (ts.hasPrefetchQueries()) {
// this.prefetch_tracker = new QueryTracker();
this.prefetch_dependencies = new HashMap<Integer, Map<Integer,DependencyInfo>>();
}
}
/**
*
* @param d_id Output Dependency Id
* @return
*/
protected DependencyInfo getDependencyInfo(int d_id) {
return (this.dependencies.get(d_id));
}
/**
* Clear the dependency information for a single SQLStmt batch round.
* We will clear out the prefetch information because we need that
* until the transaction is finished.
*/
public void clear() {
if (trace.val)
LOG.trace("Clearing out internal state for " + this);
this.dependencies.clear();
this.output_order.clear();
this.queued_results.clear();
this.blocked_tasks.clear();
this.unblocked_tasks.clear();
this.still_has_tasks = true;
this.dependency_ctr = 0;
this.received_ctr = 0;
}
@Override
public String toString() {
return String.format("%s{#%d}", this.getClass().getSimpleName(), this.txn_id);
}
public Map<String, Object> debugMap() {
Map<String, Object> m = new LinkedHashMap<String, Object>();
for (Field f : this.getClass().getDeclaredFields()) {
Object obj = null;
try {
obj = f.get(this);
} catch (IllegalAccessException ex) {
throw new RuntimeException(ex);
}
// Skip parent reference
if (obj instanceof DependencyTracker) continue;
if (obj != null && obj == this.dependencies) {
Map<Integer, Object> inner = new TreeMap<Integer, Object>();
for (Entry<Integer, DependencyInfo> e : this.dependencies.entrySet()) {
inner.put(e.getKey(), e.getValue().debug());
}
obj = inner;
}
else if (obj != null && obj == this.prefetch_dependencies) {
Map<Integer, Object> inner = new TreeMap<Integer, Object>();
for (Integer stmtCounter : this.prefetch_dependencies.keySet()) {
Map<Integer, Object> stmtDeps = new LinkedHashMap<Integer, Object>();
for (Entry<Integer, DependencyInfo> e : this.prefetch_dependencies.get(stmtCounter).entrySet()) {
stmtDeps.put(e.getKey(), e.getValue().debug());
} // FOR
inner.put(stmtCounter, stmtDeps);
} // FOR
obj = inner;
}
m.put(StringUtil.title(f.getName().replace("_", " ")), obj);
} // FOR
return (m);
}
} // CLASS
private final PartitionExecutor executor;
private final CatalogContext catalogContext;
private final Map<Long, TransactionState> txnStates = new ConcurrentHashMap<Long, TransactionState>();
// ----------------------------------------------------------------------------
// INITIALIZATION
// ----------------------------------------------------------------------------
public DependencyTracker(PartitionExecutor executor) {
this.executor = executor;
this.catalogContext = this.executor.getCatalogContext();
}
public void addTransaction(LocalTransaction ts) {
if (this.txnStates.containsKey(ts.getTransactionId())) {
return;
}
// FIXME
TransactionState state = new TransactionState(ts);
this.txnStates.put(ts.getTransactionId(), state);
if (trace.val)
LOG.trace(String.format("Added %s to %s", ts, this));
}
public void removeTransaction(LocalTransaction ts) {
// FIXME
TransactionState state = this.txnStates.remove(ts.getTransactionId());
if (trace.val && state != null) {
LOG.trace(String.format("Removed %s from %s", ts, this));
}
}
// ----------------------------------------------------------------------------
// EXECUTION ROUNDS
// ----------------------------------------------------------------------------
protected void initRound(LocalTransaction ts) {
final TransactionState state = this.getState(ts);
assert(state.queued_results.isEmpty()) :
String.format("Trying to initialize ROUND #%d for %s but there are %d queued results",
ts.getCurrentRound(ts.getBasePartition()),
ts, state.queued_results.size());
if (ts.getCurrentRound(ts.getBasePartition()) != 0) state.clear();
}
protected void startRound(LocalTransaction ts) {
if (trace.val)
LOG.trace(String.format("%s - Start round", ts));
final TransactionState state = this.getState(ts);
final int basePartition = ts.getBasePartition();
final int currentRound = ts.getCurrentRound(basePartition);
final int batch_size = ts.getCurrentBatchSize();
// Create our output counters
assert(state.output_order.isEmpty());
for (int stmtIndex = 0; stmtIndex < batch_size; stmtIndex++) {
if (trace.val)
LOG.trace(String.format("%s - Examining %d dependencies [stmtIndex=%d, currentRound=%d]",
ts, state.dependencies.size(), stmtIndex, currentRound));
for (DependencyInfo dinfo : state.dependencies.values()) {
if (trace.val)
LOG.trace(String.format("%s - Checking %s", ts, dinfo));
// Add this DependencyInfo our output list if it's being used in this round for this txn
// and if it is not an internal dependency
if (dinfo.inSameTxnRound(ts.getTransactionId(), currentRound) &&
dinfo.isInternal() == false && dinfo.getStatementIndex() == stmtIndex) {
state.output_order.add(dinfo.getDependencyId());
}
} // FOR
} // FOR
// XXX Disable assert - for SnapshotRestore test
/*
assert(batch_size == state.output_order.size()) :
String.format("%s - Expected %d output dependencies but we queued up %d " +
"[outputOrder=%s / numDependencies=%d]",
ts, batch_size, state.output_order.size(),
state.output_order, state.dependencies.size());
*/
// Release any queued responses/results
if (state.queued_results.isEmpty() == false) {
if (trace.val)
LOG.trace(String.format("%s - Releasing %d queued results",
ts, state.queued_results.size()));
for (Entry<Pair<Integer, Integer>, VoltTable> e : state.queued_results.entrySet()) {
this.addResult(ts, e.getKey(), e.getValue(), true);
} // FOR
state.queued_results.clear();
}
// Now create the latch
int count = state.dependency_ctr - state.received_ctr;
assert(count >= 0);
assert(state.dependency_latch == null) : "This should never happen!\n" + ts.debug();
state.dependency_latch = new CountDownLatch(count);
if (debug.val)
LOG.debug(String.format("%s - Created %s with dependency counter set to %d",
ts, state.dependency_latch.getClass().getSimpleName(), count));
}
protected void finishRound(LocalTransaction ts) {
final TransactionState state = this.getState(ts);
assert(state.dependency_ctr == state.received_ctr) :
String.format("Trying to finish ROUND #%d on partition %d for %s before it was started",
ts.getCurrentRound(ts.getBasePartition()),
ts.getBasePartition(), ts);
assert(state.queued_results.isEmpty()) :
String.format("Trying to finish ROUND #%d on partition %d for %s but there are %d queued results",
ts.getCurrentRound(ts.getBasePartition()),
ts.getBasePartition(), ts, state.queued_results.size());
// Reset our initialization flag so that we can be ready to run more stuff the next round
if (state.dependency_latch != null) {
assert(state.dependency_latch.getCount() == 0);
if (trace.val)
LOG.trace("Setting CountDownLatch to null for " + ts);
state.dependency_latch = null;
}
state.clear();
}
// ----------------------------------------------------------------------------
// INTERNAL METHODS
// ----------------------------------------------------------------------------
private TransactionState getState(LocalTransaction ts) {
TransactionState state = this.txnStates.get(ts.getTransactionId());
assert(state != null) :
String.format("Unexpected null %s handle for %s at %s",
TransactionState.class.getSimpleName(), ts, this);
return (state);
}
/**
*
* @param state
* @param currentRound
* @param stmtCounter
* @param paramsHash TODO
* @param fragmentId TODO
* @param dep_id
* @return
*/
private DependencyInfo getOrCreateDependencyInfo(LocalTransaction ts,
TransactionState state,
int currentRound,
int stmtCounter,
int stmtIndex,
int paramsHash,
int fragmentId,
Integer dep_id) {
DependencyInfo dinfo = state.dependencies.get(dep_id);
if (dinfo != null) {
if (trace.val)
LOG.trace(String.format("%s - Reusing DependencyInfo[hashCode=%d] for %s. " +
"Checking whether it needs to be reset " +
"[currentRound=%d, lastRound=%d, lastTxn=%s]",
ts, dinfo.hashCode(), TransactionUtil.debugStmtDep(stmtCounter, dep_id),
currentRound, dinfo.getRound(), dinfo.getTransactionId()));
if (dinfo.inSameTxnRound(state.txn_id, currentRound) == false) {
if (trace.val)
LOG.trace(String.format("%s - Clearing out DependencyInfo[%d].",
state.txn_id, dinfo.hashCode()));
dinfo.finish();
}
} else {
dinfo = new DependencyInfo(this.catalogContext);
state.dependencies.put(dep_id, dinfo);
if (trace.val)
LOG.trace(String.format("%s - Created new DependencyInfo for %s " +
"[stmtIndex=%d, fragmentId=%d, paramsHash=%d]",
ts, TransactionUtil.debugStmtDep(stmtCounter, dep_id),
stmtIndex, fragmentId, paramsHash));
}
if (dinfo.isInitialized() == false) {
if (debug.val)
LOG.debug(String.format("%s - Initializing DependencyInfo for %s " +
"[stmtIndex=%d, fragmentId=%d, paramsHash=%d]",
ts, TransactionUtil.debugStmtDep(stmtCounter, dep_id),
stmtIndex, fragmentId, paramsHash));
dinfo.init(state.txn_id, currentRound, stmtCounter, stmtIndex, paramsHash, dep_id.intValue());
}
return (dinfo);
}
/**
* Check to see whether there is already a prefetched query queued up for the
* given WorkFragment information.
* @param state
* @param round
* @param stmtCounter
* @param partitionId TODO
* @param paramsHash
* @param fragmentId
* @param dependencyId
* @return
*/
private DependencyInfo getPrefetchDependencyInfo(TransactionState state,
int round,
int stmtCounter,
int stmtIndex,
int partitionId,
int paramsHash,
int fragmentId,
int dependencyId) {
Map<Integer, DependencyInfo> stmt_deps = state.prefetch_dependencies.get(stmtCounter);
if (stmt_deps == null) {
if (trace.val)
LOG.trace(String.format("%s - Invalid prefetch query for %s." +
"No StmtCounter match.",
state, TransactionUtil.debugStmtDep(stmtCounter, dependencyId)));
return (null);
}
DependencyInfo dinfo = stmt_deps.get(fragmentId);
if (dinfo == null) {
if (trace.val)
LOG.trace(String.format("%s - Invalid prefetch query for %s. " +
"No FragmentID match. [%d]",
state, TransactionUtil.debugStmtDep(stmtCounter, dependencyId),
fragmentId));
return (null);
}
if (dinfo.getParameterSetHash() != paramsHash) {
if (trace.val)
LOG.trace(String.format("%s - Invalid prefetch query for %s. " +
"Parameter hash mismatch [%d != %d]",
state, TransactionUtil.debugStmtDep(stmtCounter, dependencyId),
dinfo.getParameterSetHash(), paramsHash));
return (null);
}
else if (dinfo.getExpectedPartitions().contains(partitionId) == false) {
if (trace.val)
LOG.trace(String.format("%s - Invalid prefetch query for %s. " +
"Partition mismatch [%d != %d]",
state, TransactionUtil.debugStmtDep(stmtCounter, dependencyId),
partitionId, dinfo.getExpectedPartitions()));
return (null);
}
// IMPORTANT: We have to update this DependencyInfo's output id
// so that the blocked WorkFragment can retrieve it properly when it
// runs. This is necessary because we don't know what the PlanFragment's
// output id will be before it runs...
if (debug.val && dinfo.isPrefetch() == false) {
LOG.debug(String.format("%s - Converting prefetch %s into regular result\n%s",
state, dinfo.getClass().getSimpleName(), dinfo));
}
dinfo.prefetchOverride(round, dependencyId, stmtIndex);
state.dependencies.put(dependencyId, dinfo);
return (dinfo);
}
/**
* Update internal state information after a new result was added to a DependencyInfo.
* This may cause the next round of blocked WorkFragments to get released.
* @param ts
* @param state
* @param dinfo
*/
private void updateAfterNewResult(final LocalTransaction ts,
final TransactionState state,
final DependencyInfo dinfo) {
// Check whether we need to start running stuff now
// 2011-12-31: This needs to be synchronized because they might check
// whether there are no more blocked tasks before we
// can add to_unblock to the unblocked_tasks queue
if (state.blocked_tasks.isEmpty() == false && dinfo.hasTasksReady()) {
Collection<WorkFragment.Builder> to_unblock = dinfo.getAndReleaseBlockedWorkFragments();
assert(to_unblock != null);
assert(to_unblock.isEmpty() == false);
if (debug.val)
LOG.debug(String.format("%s - Got %d WorkFragments to unblock that were waiting for DependencyId %d",
ts, to_unblock.size(), dinfo.getDependencyId()));
state.blocked_tasks.removeAll(to_unblock);
state.unblocked_tasks.addLast(to_unblock);
}
else if (debug.val) {
LOG.debug(String.format("%s - No WorkFragments to unblock after storing result for DependencyId %d " +
"[blockedTasks=%d, hasTasksReady=%s]",
ts, dinfo.getDependencyId(), state.blocked_tasks.size(), dinfo.hasTasksReady()));
}
if (state.dependency_latch != null) {
state.dependency_latch.countDown();
if (debug.val)
LOG.debug(String.format("%s - Decremented %s to %d for partition %d ",
ts, state.dependency_latch.getClass().getSimpleName(),
state.dependency_latch.getCount(), ts.getBasePartition()));
// HACK: If the latch is now zero, then push an EMPTY set into the unblocked queue
// This will cause the blocked PartitionExecutor thread to wake up and realize that he's done
if (state.dependency_latch.getCount() == 0) {
if (debug.val)
LOG.debug(String.format("%s - Pushing EMPTY_FRAGMENT_SET to PartitionExecutor " +
"at partition %d because all of the dependencies have arrived!",
ts, ts.getBasePartition()));
state.unblocked_tasks.addLast(EMPTY_FRAGMENT_SET);
}
}
state.still_has_tasks = (state.blocked_tasks.isEmpty() == false ||
state.unblocked_tasks.isEmpty() == false);
}
// ----------------------------------------------------------------------------
// DEPENDENCY TRACKING METHODS
// ----------------------------------------------------------------------------
/**
* Get the final results of the last round of execution for the given txn.
* This should only be called to get the VoltTables that you want to send into
* the Java stored procedure code (e.g., the return value for voltExecuteSql())
* @return
*/
public VoltTable[] getResults(LocalTransaction ts) {
final TransactionState state = this.getState(ts);
final VoltTable results[] = new VoltTable[state.output_order.size()];
if (debug.val)
LOG.debug(String.format("%s - Generating output results with %d tables",
ts, results.length));
HStoreConf hstore_conf = this.executor.getHStoreConf();
boolean nonblocking = (hstore_conf.site.specexec_nonblocking &&
ts.isSysProc() == false &&
ts.profiler != null);
for (int stmtIndex = 0; stmtIndex < results.length; stmtIndex++) {
Integer dependency_id = state.output_order.get(stmtIndex);
assert(dependency_id != null) :
"Null output dependency id for Statement index " + stmtIndex + " in txn #" + state.txn_id;
assert(state.dependencies.containsKey(dependency_id)) :
String.format("Missing info for %s in %s",
TransactionUtil.debugStmtDep(stmtIndex, dependency_id), ts);
VoltTable vt = state.dependencies.get(dependency_id).getResult();
// Special Non-Blocking Wrapper
if (nonblocking) {
VoltTableNonBlocking vtnb = new VoltTableNonBlocking(hstore_conf.site.txn_profiling ? ts.profiler : null);
if (vt != null) vtnb.setRealTable(vt);
results[stmtIndex] = vtnb;
} else {
assert(vt != null) :
String.format("Null output result for Statement index %d in %s", stmtIndex, this);
results[stmtIndex] = vt;
}
} // FOR
return (results);
}
/**
* Queues up a WorkFragment for this txn.
* If the return value is true, then the WorkFragment can be executed
* immediately (either locally or on at a remote partition).
* If the return value is false, then the WorkFragment is blocked waiting for dependencies.
* @param ts
* @param fragment
* @return true if the WorkFragment should be dispatched right now
*/
public boolean addWorkFragment(LocalTransaction ts, WorkFragment.Builder fragment, ParameterSet batchParams[]) {
final TransactionState state = this.getState(ts);
assert(ts.getCurrentRoundState(ts.getBasePartition()) == RoundState.INITIALIZED) :
String.format("Invalid round state %s for %s at partition %d",
ts.getCurrentRoundState(ts.getBasePartition()),
ts, ts.getBasePartition());
boolean blocked = false;
final int partition = fragment.getPartitionId();
final int num_fragments = fragment.getFragmentIdCount();
final int currentRound = ts.getCurrentRound(ts.getBasePartition());
if (debug.val)
LOG.debug(String.format("%s - Adding %s for partition %d with %d fragments",
ts, WorkFragment.class.getSimpleName(), partition, num_fragments));
// PAVLO: 2011-12-10
// We moved updating the exec_touchedPartitions histogram into the
// BatchPlanner so that we won't increase the counter for a partition
// if we read from a replicated table at the local partition
// this.state.exec_touchedPartitions.put(partition, num_fragments);
// PAVLO 2011-12-20
// I don't know why, but before this loop used to be synchronized
// It definitely does not need to be because this is only invoked by the
// transaction's base partition PartitionExecutor
int output_dep_id, input_dep_id;
int ignore_ctr = 0;
for (int i = 0; i < num_fragments; i++) {
int partitionId = fragment.getPartitionId();
int fragmentId = fragment.getFragmentId(i);
int stmtCounter = fragment.getStmtCounter(i);
int stmtIndex = fragment.getStmtIndex(i);
int paramsHash = batchParams[fragment.getParamIndex(i)].hashCode();
// If this task produces output dependencies, then we need to make
// sure that the txn wait for it to arrive first
if ((output_dep_id = fragment.getOutputDepId(i)) != HStoreConstants.NULL_DEPENDENCY_ID) {
DependencyInfo dinfo = null;
boolean prefetch = false;
// Check to see whether there is a already a prefetch WorkFragment for
// this same query invocation.
if (state.prefetch_ctr > 0) {
dinfo = this.getPrefetchDependencyInfo(state, currentRound,
stmtCounter, stmtIndex, partitionId,
paramsHash, fragmentId, output_dep_id);
prefetch = (dinfo != null);
}
if (dinfo == null) {
dinfo = this.getOrCreateDependencyInfo(ts, state, currentRound,
stmtCounter, stmtIndex, paramsHash,
fragmentId, output_dep_id);
}
// Store the stmtIndex of when this dependency will show up
dinfo.addPartition(partition);
state.dependency_ctr++;
// this.addResultDependencyStatement(ts, state, partition, output_dep_id, stmtIndex);
if (trace.val)
LOG.trace(String.format("%s - Added new %s %s for PlanFragment %d at partition %d " +
"[depCtr=%d, prefetch=%s]\n%s",
ts, dinfo.getClass().getSimpleName(),
TransactionUtil.debugStmtDep(stmtCounter, output_dep_id),
fragment.getFragmentId(i),
partition,
state.dependency_ctr, prefetch,
dinfo.debug()));
// If this query was prefetched, we need to push its results through the
// the tracker so that it can update counters
if (prefetch) {
// We also need a way to mark this entry in the WorkFragment as
// unnecessary and make sure that we don't actually send it out
// if there is no new work to be done.
fragment.setStmtIgnore(i, true);
ignore_ctr++;
ts.getTransactionLock().lock();
try {
// Switch the DependencyInfo out of prefetch mode
// This means that all incoming results (if any) will be
// added to TransactionState just like any other regular query.
dinfo.resetPrefetch();
// Now update the internal state just as if these new results
// arrived for this query.
state.received_ctr += dinfo.getResultsCount();
this.updateAfterNewResult(ts, state, dinfo);
} finally {
ts.getTransactionLock().unlock();
} // SYNCH
}
} // IF
// If this WorkFragment needs an input dependency, then we need to make sure it arrives at
// the executor before it is allowed to start executing
if (fragment.getNeedsInput()) {
input_dep_id = fragment.getInputDepId(i);
if (input_dep_id != HStoreConstants.NULL_DEPENDENCY_ID) {
DependencyInfo dinfo = null;
// Check to see whether there is already a prefetch WorkFragment that will
// generate this result for us.
if (state.prefetch_ctr > 0) {
dinfo = this.getPrefetchDependencyInfo(state, currentRound,
stmtCounter, stmtIndex, partitionId,
paramsHash, fragmentId, input_dep_id);
}
if (dinfo == null) {
dinfo = this.getOrCreateDependencyInfo(ts, state, currentRound,
stmtCounter, stmtIndex, paramsHash,
fragmentId, input_dep_id);
}
dinfo.addBlockedWorkFragment(fragment);
dinfo.markInternal();
if (blocked == false) {
state.blocked_tasks.add(fragment);
blocked = true;
}
if (trace.val)
LOG.trace(String.format("%s - Created internal input dependency %d for PlanFragment %d\n%s",
ts, input_dep_id, fragment.getFragmentId(i), dinfo.debug()));
}
}
// *********************************** DEBUG ***********************************
if (trace.val) {
int output_ctr = 0;
int dep_ctr = 0;
Map<String, Object> m = new LinkedHashMap<String, Object>();
for (DependencyInfo dinfo : state.dependencies.values()) {
if (dinfo.getStatementCounter() == stmtCounter) dep_ctr++;
if (dinfo.isInternal() == false) {
m.put(String.format("Output[%02d]", output_ctr++), dinfo.debug());
}
} // FOR
LOG.trace(String.format("%s - Number of Output Dependencies for StmtCounter #%d: " +
"%d out of %d\n%s",
ts, stmtCounter, output_ctr, dep_ctr, StringUtil.formatMaps(m)));
}
// *********************************** DEBUG ***********************************
} // FOR
// *********************************** DEBUG ***********************************
if (debug.val) {
CatalogType catalog_obj = null;
if (ts.isSysProc()) {
catalog_obj = ts.getProcedure();
} else {
for (int i = 0; i < num_fragments; i++) {
int frag_id = fragment.getFragmentId(i);
PlanFragment catalog_frag = CatalogUtil.getPlanFragment(ts.getProcedure(), frag_id);
catalog_obj = catalog_frag.getParent();
if (catalog_obj != null) break;
} // FOR
}
LOG.debug(String.format("%s - Queued up %s %s for partition %d and marked as %s [fragIds=%s]",
ts, catalog_obj, WorkFragment.class.getSimpleName(), partition,
(blocked ? "blocked" : "not blocked"),
fragment.getFragmentIdList()));
}
// *********************************** DEBUG ***********************************
if (ignore_ctr == num_fragments) {
return (false);
}
return (blocked == false);
}
/**
* Store an output dependency result for a transaction. This corresponds to the
* execution of a single WorkFragment somewhere in the cluster. If there are other
* WorkFragments to become unblocked and be ready to execute.
* @param ts
* @param partition
* @param dependency_id
* @param result
*/
public void addResult(LocalTransaction ts, int partition, int dependency_id, VoltTable result) {
assert(result != null) :
String.format("%s - The result for DependencyId %d from partition %d is null",
ts, dependency_id, partition);
this.addResult(ts, Pair.of(partition, dependency_id), result, false);
}
/**
* Store a VoltTable result that this transaction is waiting for.
* @param key The hackish partition+dependency key
* @param result The actual data for the result
* @param force If false, then we will check to make sure the result isn't a duplicate
* @param partition The partition id that generated the result
* @param dependency_id The dependency id that this result corresponds to
*/
private void addResult(final LocalTransaction ts,
final Pair<Integer, Integer> key,
final VoltTable result,
final boolean force) {
final TransactionState state = this.getState(ts);
assert(result != null);
final ReentrantLock txnLock = ts.getTransactionLock();
final int base_partition = ts.getBasePartition();
final int partition = key.getFirst().intValue();
final int dependency_id = key.getSecond().intValue();
final RoundState roundState = ts.getCurrentRoundState(base_partition);
final boolean singlePartitioned = ts.isPredictSinglePartition();
assert(roundState == RoundState.INITIALIZED || roundState == RoundState.STARTED) :
String.format("Invalid round state %s for %s at partition %d",
roundState, ts, base_partition);
if (debug.val)
LOG.debug(String.format("%s - Attemping to add new result with %d rows for %s",
ts, result.getRowCount(), TransactionUtil.debugPartDep(partition, dependency_id)));
// If the txn is still in the INITIALIZED state, then we just want to queue up the results
// for now. They will get released when we switch to STARTED
// This is the only part that we need to synchonize on
if (force == false) {
if (singlePartitioned == false) txnLock.lock();
try {
if (roundState == RoundState.INITIALIZED) {
assert(state.queued_results.containsKey(key) == false) :
String.format("%s - Duplicate result %s",
ts, TransactionUtil.debugPartDep(partition, dependency_id));
state.queued_results.put(key, result);
if (debug.val)
LOG.debug(String.format("%s - Queued result %s until the round is started",
ts, TransactionUtil.debugPartDep(partition, dependency_id)));
return;
}
} finally {
if (singlePartitioned == false) txnLock.unlock();
} // SYNCH
}
// Each partition+dependency_id should be unique within the Statement batch.
// So as the results come back to us, we have to figure out which Statement it belongs to
DependencyInfo dinfo = null;
try {
dinfo = state.getDependencyInfo(dependency_id);
} catch (NullPointerException ex) {
// HACK: IGNORE!
}
if (dinfo == null) {
// HACK: IGNORE!
return;
}
if (singlePartitioned == false) txnLock.lock();
try {
// 2013-05-12: DependencyInfo.addResult() must definitely be synchronized!!!
// There is a weird race condition where the inner PartitionSet is not
// updated properly.
dinfo.addResult(partition, result);
state.received_ctr++;
this.updateAfterNewResult(ts, state, dinfo);
} finally {
if (singlePartitioned == false) txnLock.unlock();
} // SYNCH
if (debug.val)
LOG.debug(String.format("%s - Stored new result for %s",
ts, TransactionUtil.debugPartDep(partition, dependency_id)));
if (trace.val) {
Map<String, Object> m = new LinkedHashMap<String, Object>();
m.put("Blocked Tasks", (state != null ? state.blocked_tasks.size() : null));
m.put("DependencyInfo", dinfo.debug());
m.put("hasTasksReady", dinfo.hasTasksReady());
m.put("Dependency Latch", state.dependency_latch);
LOG.trace(this + " - Status Information\n" + StringUtil.formatMaps(m));
// if (trace.val) LOG.trace(ts.debug());
}
}
/**
* Populate the given map with the the dependency results that are used for
* internal plan execution. Note that these are not the results that should be
* sent to the client.
* @param fragment
* @param results
* @return
*/
public Map<Integer, List<VoltTable>> removeInternalDependencies(final LocalTransaction ts,
final WorkFragment fragment,
final Map<Integer, List<VoltTable>> results) {
if (debug.val)
LOG.debug(String.format("%s - Retrieving %d internal dependencies for %s WorkFragment:\n%s",
ts, fragment.getInputDepIdCount(), fragment));
final TransactionState state = this.getState(ts);
for (int i = 0, cnt = fragment.getFragmentIdCount(); i < cnt; i++) {
int stmtCounter = fragment.getStmtCounter(i);
int input_d_id = fragment.getInputDepId(i);
if (input_d_id == HStoreConstants.NULL_DEPENDENCY_ID) continue;
DependencyInfo dinfo = state.getDependencyInfo(input_d_id);
assert(dinfo != null);
assert(dinfo.getPartitionCount() == dinfo.getResultsCount()) :
String.format("%s - Number of results retrieved for %s is %d " +
"but we were expecting %d\n%s\n%s\n%s",
ts, TransactionUtil.debugStmtDep(stmtCounter, input_d_id),
dinfo.getResultsCount(), dinfo.getPartitionCount(),
fragment.toString(),
StringUtil.SINGLE_LINE, ts.debug());
results.put(input_d_id, dinfo.getResults());
if (trace.val)
LOG.trace(String.format("%s - %s -> %d VoltTables",
ts, TransactionUtil.debugStmtDep(stmtCounter, input_d_id),
results.get(input_d_id).size()));
} // FOR
return (results);
}
/**
*
* @param ts
* @param input_d_id
* @return
*/
public List<VoltTable> getInternalDependency(final LocalTransaction ts, final Integer input_d_id) {
if (debug.val)
LOG.debug(String.format("%s - Retrieving internal dependencies for Dependency %d",
ts, input_d_id));
final TransactionState state = this.getState(ts);
DependencyInfo dinfo = state.getDependencyInfo(input_d_id);
assert(dinfo != null) :
String.format("No DependencyInfo object for Dependency %d in %s",
input_d_id, ts);
assert(dinfo.isInternal()) :
String.format("The DependencyInfo for Dependency %s in %s is not marked as internal",
input_d_id, ts);
assert(dinfo.getPartitionCount() == dinfo.getResultsCount()) :
String.format("Number of results from partitions retrieved for Dependency %s " +
"is %d but we were expecting %d in %s\n%s\n%s%s",
input_d_id, dinfo.getResultsCount(), dinfo.getPartitionCount(), ts,
this.toString(), StringUtil.SINGLE_LINE, ts.debug());
return (dinfo.getResults());
}
// ----------------------------------------------------------------------------
// ACCESS METHODS
// ----------------------------------------------------------------------------
public void unblock(LocalTransaction ts) {
final TransactionState state = this.getState(ts);
try {
// And then shove an empty result at them
state.unblocked_tasks.addLast(EMPTY_FRAGMENT_SET);
// Spin through this so that the waiting thread wakes up and sees that they got an error
if (state.dependency_latch != null) {
while (state.dependency_latch.getCount() > 0) {
state.dependency_latch.countDown();
} // WHILE
}
} catch (NullPointerException ex) {
// HACK!
}
}
public BlockingDeque<Collection<WorkFragment.Builder>> getUnblockedWorkFragmentsQueue(LocalTransaction ts) {
final TransactionState state = this.getState(ts);
return (state.unblocked_tasks);
}
/**
* Return the latch that will block the PartitionExecutor's thread until
* all of the query results have been retrieved for this transaction's
* current SQLStmt batch
*/
public CountDownLatch getDependencyLatch(LocalTransaction ts) {
final TransactionState state = this.getState(ts);
return state.dependency_latch;
}
/**
* Returns true if this transaction still has WorkFragments
* that need to be dispatched to the appropriate PartitionExecutor
* @return
*/
public boolean stillHasWorkFragments(LocalTransaction ts) {
final TransactionState state = this.getState(ts);
return (state.still_has_tasks);
}
/**
* Returns true if the given WorkFragment is currently set as blocked for this txn
* @param ftask
* @return
*/
public boolean isBlocked(LocalTransaction ts, WorkFragment.Builder ftask) {
final TransactionState state = this.getState(ts);
return (state.blocked_tasks.contains(ftask));
}
// ----------------------------------------------------------------------------
// QUERY PREFETCHING
// ----------------------------------------------------------------------------
/**
* Inform this tracker the txn is requesting the given WorkFragment to be
* prefetched on a remote partition.
* @param ts
* @param fragment
* @return
*/
public void addPrefetchWorkFragment(LocalTransaction ts, WorkFragment.Builder fragment, ParameterSet batchParams[]) {
assert(fragment.getPrefetch());
final TransactionState state = this.getState(ts);
final int num_fragments = fragment.getFragmentIdCount();
final int partition = fragment.getPartitionId();
for (int i = 0; i < num_fragments; i++) {
final int fragmentId = fragment.getFragmentId(i);
final int stmtCounter = fragment.getStmtCounter(i);
final int stmtIndex = fragment.getStmtIndex(i);
final int paramHash = batchParams[fragment.getParamIndex(i)].hashCode();
// A prefetched query must *always* produce an output!
int output_dep_id = fragment.getOutputDepId(i);
assert(output_dep_id != HStoreConstants.NULL_DEPENDENCY_ID);
// But should never have an input dependency!
assert(fragment.getNeedsInput() == false);
// Note that we need to do a lookup in the map based on the StmtCounter
// and not its StmtIndex. This is because the StmtCounter is global for the entire
// transaction whereas the StmtIndex is unique for a single SQLStmt batch.
Map<Integer, DependencyInfo> stmt_deps = state.prefetch_dependencies.get(stmtCounter);
if (stmt_deps == null) {
stmt_deps = new HashMap<Integer, DependencyInfo>();
state.prefetch_dependencies.put(stmtCounter, stmt_deps);
}
DependencyInfo dinfo = stmt_deps.get(fragmentId);
if (dinfo == null) {
dinfo = new DependencyInfo(this.catalogContext);
dinfo.init(state.txn_id, -1, stmtCounter, stmtIndex, paramHash, output_dep_id);
dinfo.markPrefetch();
}
dinfo.addPartition(partition);
stmt_deps.put(fragmentId, dinfo);
state.prefetch_ctr++;
if (debug.val) {
String msg = String.format("%s - Adding prefetch %s %s at partition %d for %s",
ts, dinfo,
TransactionUtil.debugStmtDep(stmtCounter, output_dep_id), partition,
CatalogUtil.getPlanFragment(catalogContext.catalog, fragment.getFragmentId(i)).fullName());
if (trace.val)
msg += "\n" + String.format("ProcedureParams = %s\n" +
"ParameterSet[%d] = %s\n%s",
ts.getProcedureParameters(),
fragment.getParamIndex(i), batchParams[fragment.getParamIndex(i)],
dinfo.debug());
LOG.debug(msg);
}
} // FOR
return;
}
/**
* Store a new prefetch result for a transaction
* @param txnId
* @param stmtCounter
* @param fragmentId
* @param partitionId
* @param params
* @param result
*/
public void addPrefetchResult(LocalTransaction ts,
int stmtCounter,
int fragmentId,
int partitionId,
int paramsHash,
VoltTable result) {
assert(ts.hasPrefetchQueries());
if (debug.val)
LOG.debug(String.format("%s - Adding prefetch result %s with %d rows from partition %d [paramsHash=%d]",
ts, TransactionUtil.debugStmtFrag(stmtCounter, fragmentId),
result.getRowCount(), partitionId, paramsHash));
final TransactionState state = this.getState(ts);
if (state == null) {
LOG.error(String.format("Missing %s for %s. Unable to store prefetch result from partition %d",
TransactionState.class.getSimpleName(), ts, partitionId));
return;
}
// Find the corresponding DependencyInfo
Map<Integer, DependencyInfo> stmt_deps = state.prefetch_dependencies.get(stmtCounter);
if (stmt_deps == null) {
String msg = String.format("Unexpected prefetch result for %s from partition %d - " +
"Invalid SQLStmt index '%d'",
ts, partitionId, stmtCounter);
throw new ServerFaultException(msg, ts.getTransactionId());
}
DependencyInfo dinfo = stmt_deps.get(fragmentId);
if (dinfo == null) {
String msg = String.format("Unexpected prefetch result for %s from partition %d - " +
"Invalid PlanFragment id '%d'",
ts, partitionId, fragmentId);
throw new ServerFaultException(msg, ts.getTransactionId());
}
assert(dinfo.getParameterSetHash() == paramsHash) :
String.format("%s - ParameterSet Mismatch in %s for %s [%d != %d]",
ts, dinfo, TransactionUtil.debugStmtFrag(stmtCounter, fragmentId),
dinfo.getParameterSetHash(), paramsHash);
assert(dinfo.getExpectedPartitions().contains(partitionId));
// Always add it to our DependencyInfo handle and then check to see whether we have
// all of the results that we need for it.
// If we do, then we need to check to see whether the txn needs the results
// right now.
final ReentrantLock txnLock = ts.getTransactionLock();
txnLock.lock();
try {
// Check to see whether we should adding this through
// the normal channels or whether we are still in "prefetch" mode
if (dinfo.isPrefetch() == false) {
this.addResult(ts, partitionId, dinfo.getDependencyId(), result);
}
else {
dinfo.addResult(partitionId, result);
}
} finally {
txnLock.unlock();
}
}
// ----------------------------------------------------------------------------
// DEBUG STUFF
// ----------------------------------------------------------------------------
@Override
public String toString() {
return String.format("%s{Partition=%d / Hash=%d}",
this.getClass().getSimpleName(),
this.executor.getPartitionId(),
this.hashCode());
}
public class Debug implements DebugContext {
public boolean hasTransactionState(LocalTransaction ts) {
try {
return (getState(ts) != null);
} catch (AssertionError ex) {
return (false);
}
}
public DependencyInfo getDependencyInfo(LocalTransaction ts, int d_id) {
final TransactionState state = getState(ts);
return (state.dependencies.get(d_id));
}
public Collection<DependencyInfo> getAllDependencies(LocalTransaction ts) {
final TransactionState state = getState(ts);
return (state.dependencies.values());
}
public int getDependencyCount(LocalTransaction ts) {
final TransactionState state = getState(ts);
return (state.dependency_ctr);
}
public Collection<WorkFragment.Builder> getBlockedWorkFragments(LocalTransaction ts) {
final TransactionState state = getState(ts);
return (state.blocked_tasks);
}
public List<Integer> getOutputOrder(LocalTransaction ts) {
final TransactionState state = getState(ts);
return (state.output_order);
}
public Map<Integer, DependencyInfo> getStatementDependencies(LocalTransaction ts, int stmtIndex) {
final TransactionState state = getState(ts);
return (state.dependencies);
}
public int getPrefetchCounter(LocalTransaction ts) {
final TransactionState state = getState(ts);
return (state.prefetch_ctr);
}
/**
* Returns the number of outstanding prefetch DependencyInfo with
* results that were not utilized by the txn's regular query invocations.
* If there is no TransactionState for the given txn handle or the txn did
* not execute with prefetch queries, then the return result will be null.
* @param ts
* @return
*/
public Integer getUnusedPrefetchResultCount(LocalTransaction ts) {
TransactionState state = null;
try {
state = getState(ts);
} catch (AssertionError ex) {
// IGNORE
}
if (state == null || state.prefetch_dependencies == null) {
return (null);
}
int ctr = 0;
if (state.prefetch_dependencies != null) {
for (Map<Integer, DependencyInfo> m : state.prefetch_dependencies.values()) {
for (DependencyInfo dinfo : m.values()) {
if (dinfo.isPrefetch() && dinfo.hasAllResults()) ctr++;
} // FOR
} // FOR
}
return (ctr);
}
public Map<String, Object> debugMap(LocalTransaction ts) {
try {
TransactionState state = getState(ts);
return state.debugMap();
} catch (AssertionError ex) {
// IGNORE
}
return (null);
}
}
private Debug cachedDebugContext;
public Debug getDebugContext() {
if (this.cachedDebugContext == null) {
// We don't care if we're thread-safe here...
this.cachedDebugContext = new Debug();
}
return this.cachedDebugContext;
}
}