Source Code of edu.brown.hstore.PartitionExecutor$DonePartitionsNotification

/***************************************************************************
 *   Copyright (C) 2013 by H-Store Project                                 *
 *   Brown University                                                      *
 *   Massachusetts Institute of Technology                                 *
 *   Yale University                                                       *
 *                                                                         *
 *   Permission is hereby granted, free of charge, to any person obtaining *
 *   a copy of this software and associated documentation files (the       *
 *   "Software"), to deal in the Software without restriction, including   *
 *   without limitation the rights to use, copy, modify, merge, publish,   *
 *   distribute, sublicense, and/or sell copies of the Software, and to    *
 *   permit persons to whom the Software is furnished to do so, subject to *
 *   the following conditions:                                             *
 *                                                                         *
 *   The above copyright notice and this permission notice shall be        *
 *   included in all copies or substantial portions of the Software.       *
 *                                                                         *
 *   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       *
 *   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    *
 *   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*
 *   IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR     *
 *   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, *
 *   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR *
 *   OTHER DEALINGS IN THE SOFTWARE.                                       *
 ***************************************************************************/
/* This file is part of VoltDB.
 * Copyright (C) 2008-2010 VoltDB L.L.C.
 *
 * VoltDB is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * VoltDB is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with VoltDB.  If not, see <http://www.gnu.org/licenses/>.
 */
package edu.brown.hstore;


import java.io.File;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Deque;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Queue;
import java.util.TreeSet;
import java.util.concurrent.BlockingDeque;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.Semaphore;
import java.util.concurrent.TimeUnit;


import org.apache.log4j.Logger;
import org.voltdb.AriesLog;
import org.voltdb.BackendTarget;
import org.voltdb.CatalogContext;
import org.voltdb.ClientResponseImpl;
import org.voltdb.DependencySet;
import org.voltdb.HsqlBackend;
import org.voltdb.MemoryStats;
import org.voltdb.ParameterSet;
import org.voltdb.SQLStmt;
import org.voltdb.SnapshotSiteProcessor;
import org.voltdb.SnapshotSiteProcessor.SnapshotTableTask;
import org.voltdb.SysProcSelector;
import org.voltdb.VoltProcedure;
import org.voltdb.VoltProcedure.VoltAbortException;
import org.voltdb.VoltSystemProcedure;
import org.voltdb.VoltTable;
import org.voltdb.catalog.Catalog;
import org.voltdb.catalog.Cluster;
import org.voltdb.catalog.Database;
import org.voltdb.catalog.Host;
import org.voltdb.catalog.Partition;
import org.voltdb.catalog.PlanFragment;
import org.voltdb.catalog.Procedure;
import org.voltdb.catalog.Site;
import org.voltdb.catalog.Statement;
import org.voltdb.catalog.Table;
import org.voltdb.exceptions.ConstraintFailureException;
import org.voltdb.exceptions.EEException;
import org.voltdb.exceptions.EvictedTupleAccessException;
import org.voltdb.exceptions.MispredictionException;
import org.voltdb.exceptions.SQLException;
import org.voltdb.exceptions.SerializableException;
import org.voltdb.exceptions.ServerFaultException;
import org.voltdb.jni.ExecutionEngine;
import org.voltdb.jni.ExecutionEngineIPC;
import org.voltdb.jni.ExecutionEngineJNI;
import org.voltdb.jni.MockExecutionEngine;
import org.voltdb.messaging.FastDeserializer;
import org.voltdb.messaging.FastSerializer;
import org.voltdb.types.SpecExecSchedulerPolicyType;
import org.voltdb.types.SpeculationConflictCheckerType;
import org.voltdb.types.SpeculationType;
import org.voltdb.utils.DBBPool;
import org.voltdb.utils.DBBPool.BBContainer;
import org.voltdb.utils.Encoder;
import org.voltdb.utils.EstTime;
import org.voltdb.utils.VoltTableUtil;


import com.google.protobuf.ByteString;
import com.google.protobuf.RpcCallback;


import edu.brown.catalog.CatalogUtil;
import edu.brown.catalog.PlanFragmentIdGenerator;
import edu.brown.catalog.special.CountedStatement;
import edu.brown.hstore.Hstoreservice.QueryEstimate;
import edu.brown.hstore.Hstoreservice.Status;
import edu.brown.hstore.Hstoreservice.TransactionPrefetchResult;
import edu.brown.hstore.Hstoreservice.TransactionPrepareResponse;
import edu.brown.hstore.Hstoreservice.TransactionWorkRequest;
import edu.brown.hstore.Hstoreservice.TransactionWorkResponse;
import edu.brown.hstore.Hstoreservice.WorkFragment;
import edu.brown.hstore.Hstoreservice.WorkResult;
import edu.brown.hstore.callbacks.LocalFinishCallback;
import edu.brown.hstore.callbacks.LocalPrepareCallback;
import edu.brown.hstore.callbacks.PartitionCountingCallback;
import edu.brown.hstore.callbacks.RemotePrepareCallback;
import edu.brown.hstore.conf.HStoreConf;
import edu.brown.hstore.estimators.Estimate;
import edu.brown.hstore.estimators.EstimatorState;
import edu.brown.hstore.estimators.EstimatorUtil;
import edu.brown.hstore.estimators.TransactionEstimator;
import edu.brown.hstore.internal.DeferredQueryMessage;
import edu.brown.hstore.internal.FinishTxnMessage;
import edu.brown.hstore.internal.InternalMessage;
import edu.brown.hstore.internal.InternalTxnMessage;
import edu.brown.hstore.internal.PotentialSnapshotWorkMessage;
import edu.brown.hstore.internal.PrepareTxnMessage;
import edu.brown.hstore.internal.SetDistributedTxnMessage;
import edu.brown.hstore.internal.StartTxnMessage;
import edu.brown.hstore.internal.UtilityWorkMessage;
import edu.brown.hstore.internal.UtilityWorkMessage.TableStatsRequestMessage;
import edu.brown.hstore.internal.UtilityWorkMessage.UpdateMemoryMessage;
import edu.brown.hstore.internal.WorkFragmentMessage;
import edu.brown.hstore.specexec.QueryTracker;
import edu.brown.hstore.specexec.checkers.AbstractConflictChecker;
import edu.brown.hstore.specexec.checkers.MarkovConflictChecker;
import edu.brown.hstore.specexec.checkers.OptimisticConflictChecker;
import edu.brown.hstore.specexec.checkers.TableConflictChecker;
import edu.brown.hstore.specexec.checkers.UnsafeConflictChecker;
import edu.brown.hstore.txns.AbstractTransaction;
import edu.brown.hstore.txns.DependencyTracker;
import edu.brown.hstore.txns.LocalTransaction;
import edu.brown.hstore.txns.MapReduceTransaction;
import edu.brown.hstore.txns.PrefetchState;
import edu.brown.hstore.txns.RemoteTransaction;
import edu.brown.hstore.util.ArrayCache.IntArrayCache;
import edu.brown.hstore.util.ArrayCache.LongArrayCache;
import edu.brown.hstore.util.ParameterSetArrayCache;
import edu.brown.hstore.util.TransactionCounter;
import edu.brown.hstore.util.TransactionUndoTokenComparator;
import edu.brown.hstore.util.TransactionWorkRequestBuilder;
import edu.brown.interfaces.Configurable;
import edu.brown.interfaces.DebugContext;
import edu.brown.interfaces.Shutdownable;
import edu.brown.logging.LoggerUtil;
import edu.brown.logging.LoggerUtil.LoggerBoolean;
import edu.brown.markov.EstimationThresholds;
import edu.brown.profilers.PartitionExecutorProfiler;
import edu.brown.protorpc.NullCallback;
import edu.brown.statistics.FastIntHistogram;
import edu.brown.utils.ClassUtil;
import edu.brown.utils.CollectionUtil;
import edu.brown.utils.EventObservable;
import edu.brown.utils.EventObserver;
import edu.brown.utils.FileUtil;
import edu.brown.utils.PartitionEstimator;
import edu.brown.utils.PartitionSet;
import edu.brown.utils.StringBoxUtil;
import edu.brown.utils.StringUtil;
import edu.brown.utils.ThreadUtil;


/**
 * The main executor of transactional work in the system for a single partition.
 * Controls running stored procedures and manages the execution engine's running of plan
 * fragments. Interacts with the DTXN system to get work to do. The thread might
 * do other things, but this is where the good stuff happens.
 */
public class PartitionExecutor implements Runnable, Configurable, Shutdownable {
    private static final Logger LOG = Logger.getLogger(PartitionExecutor.class);
    private static final LoggerBoolean debug = new LoggerBoolean();
    private static final LoggerBoolean trace = new LoggerBoolean();
    static {
        LoggerUtil.attachObserver(LOG, debug, trace);
    }


    private static final long WORK_QUEUE_POLL_TIME = 10; // 0.5 milliseconds
    private static final TimeUnit WORK_QUEUE_POLL_TIMEUNIT = TimeUnit.MICROSECONDS;
    
    private static final UtilityWorkMessage UTIL_WORK_MSG = new UtilityWorkMessage();
    private static final UpdateMemoryMessage STATS_WORK_MSG = new UpdateMemoryMessage();
    
    // ----------------------------------------------------------------------------
    // INTERNAL EXECUTION STATE
    // ----------------------------------------------------------------------------


    /**
     * The current execution mode for this PartitionExecutor
     * This defines what level of speculative execution we have enabled.
     */
    public enum ExecutionMode {
        /**
         * Disable processing all transactions until told otherwise.
         * We will still accept new ones
         */
        DISABLED,
        /**
         * Reject any transaction that tries to get added
         */
        DISABLED_REJECT,
        /**
         * No speculative execution. All transactions are committed immediately
         */
        COMMIT_ALL,
        /**
         * Allow read-only txns to return results.
         */
        COMMIT_READONLY,
        /**
         * Allow non-conflicting txns to return results.
         */
        COMMIT_NONCONFLICTING,
        /**
         * All txn responses must wait until the current distributed txn is committed
         */ 
        COMMIT_NONE,
    };
    
    // ----------------------------------------------------------------------------
    // DATA MEMBERS
    // ----------------------------------------------------------------------------


    private Thread self;


    /**
     * If this flag is enabled, then we need to shut ourselves down and stop running txns
     */
    private ShutdownState shutdown_state = Shutdownable.ShutdownState.INITIALIZED;
    private Semaphore shutdown_latch;
    
    /**
     * Catalog objects
     */
    protected final CatalogContext catalogContext;
    protected Site site;
    protected int siteId;
    private Partition partition;
    private int partitionId;


    private final BackendTarget backend_target;
    private final ExecutionEngine ee;
    private final HsqlBackend hsql;
    private final DBBPool buffer_pool = new DBBPool(false, false);
    private final FastSerializer fs = new FastSerializer(this.buffer_pool);
    
    /**
     * The PartitionEstimator is what we use to figure our what partitions each 
     * query invocation needs to be sent to at run time.
     * It is deterministic.
     */
    private final PartitionEstimator p_estimator;
    
    /**
     * The TransactionEstimator is the runtime piece that we use to keep track of
     * where a locally running transaction is in its execution workflow. This allows 
     * us to make predictions about what kind of things we expect the xact to do in 
     * the future
     */
    private final TransactionEstimator localTxnEstimator;
    
    private EstimationThresholds thresholds = EstimationThresholds.factory();
    
    // Each execution site manages snapshot using a SnapshotSiteProcessor
    private final SnapshotSiteProcessor m_snapshotter;


    /**
     * ProcedureId -> Queue<VoltProcedure>
     */
    private final Queue<VoltProcedure>[] procedures;
    
    // ----------------------------------------------------------------------------
    // H-Store Transaction Stuff
    // ----------------------------------------------------------------------------


    private HStoreSite hstore_site;
    private HStoreCoordinator hstore_coordinator;
    private HStoreConf hstore_conf;
    private TransactionQueueManager queueManager;
    private PartitionLockQueue lockQueue;
    private DependencyTracker depTracker;
    
    // ----------------------------------------------------------------------------
    // Work Queue
    // ----------------------------------------------------------------------------
    
    /**
     * This is the queue of the list of things that we need to execute.
     * The entries may be either InitiateTaskMessages (i.e., start a stored procedure) or
     * WorkFragment (i.e., execute some fragments on behalf of another transaction)
     * We will use this special wrapper around the PartitionExecutorQueue that can determine
     * whether this partition is overloaded and therefore new requests should be throttled
     */
    private final PartitionMessageQueue work_queue;
    
    // ----------------------------------------------------------------------------
    // Internal Execution State
    // ----------------------------------------------------------------------------
    
    /**
     * The transaction id of the current transaction
     * This is mostly used for testing and should not be relied on from the outside.
     */
    private Long currentTxnId = null;
    
    /**
     * We can only have one active "parent" transaction at a time.
     * We can speculatively execute other transactions out of order, but the active parent
     * transaction will always be the same.  
     */
    private AbstractTransaction currentTxn;
    
    /**
     * We can only have one active distributed transactions at a time.  
     * The multi-partition TransactionState that is currently executing at this partition
     * When we get the response for these txn, we know we can commit/abort the speculatively executed transactions
     */
    private AbstractTransaction currentDtxn = null;
    private String lastDtxnDebug = null;
    
    /**
     * The current VoltProcedure handle that is executing at this partition
     * This will be set to null as soon as the VoltProcedure.run() method completes
     */
    private VoltProcedure currentVoltProc = null;
    
    /**
     * List of messages that are blocked waiting for the outstanding dtxn to commit
     */
    private final List<InternalMessage> currentBlockedTxns = new ArrayList<InternalMessage>();


    /**
     * The current ExecutionMode. This defines when transactions are allowed to execute
     * and whether they can return their results to the client immediately or whether they
     * must wait until the current_dtxn commits.
     */
    private ExecutionMode currentExecMode = ExecutionMode.COMMIT_ALL;


    /**
     * The time in ms since epoch of the last call to ExecutionEngine.tick(...)
     */
    private long lastTickTime = 0;


    /**
     * The time in ms since last stats update
     */
    private long lastStatsTime = 0;
    
    /**
     * The last txn id that we executed (either local or remote)
     */
    private volatile Long lastExecutedTxnId = null;
    
    /**
     * The last txn id that we committed
     */
    private volatile Long lastCommittedTxnId = Long.valueOf(-1l);
    
    /**
     * The last undoToken that we handed out
     */
    private long lastUndoToken = 0l;
    
    /**
     * The last undoToken that we committed at this partition
     */
    private long lastCommittedUndoToken = -1l;
    
    // ARIES    
    private boolean m_ariesRecovery;    
     
    private final String m_ariesDefaultLogFileName = "aries.log";
    
    public long getArieslogBufferLength() {
        return ee.getArieslogBufferLength();
    }


    public void getArieslogData(int bufferLength, byte[] arieslogDataArray) {
        ee.getArieslogData(bufferLength, arieslogDataArray);
    }


    public long readAriesLogForReplay(long[] size) {
        return ee.readAriesLogForReplay(size);
    }


    public void freePointerToReplayLog(long ariesReplayPointer) {
        ee.freePointerToReplayLog(ariesReplayPointer);
    }


    public boolean doingAriesRecovery() 
    {
        return m_ariesRecovery;
    }
    
    public void ariesRecoveryCompleted() 
    {
     //m_ariesRecovery = false;
    }
    
    // ----------------------------------------------------------------------------
    // SPECULATIVE EXECUTION STATE
    // ----------------------------------------------------------------------------
    
    private SpeculationConflictCheckerType specExecCheckerType;
    private AbstractConflictChecker specExecChecker;
    private boolean specExecSkipAfter = false;
    private SpecExecScheduler specExecScheduler;
    
    /**
     * Transactions that were speculatively executed before or after the current 
     * distributed transaction finished at this partition and are now waiting to be committed.
     * Any transaction in this list should have its ClientResponse member set.
     */
    private final LinkedList<LocalTransaction> specExecBlocked = new LinkedList<LocalTransaction>();
    
    /**
     * Special comparator that will sort txns in the order according to their undo tokens.
     */
    private final TransactionUndoTokenComparator specExecComparator;
    
    /**
     * If this flag is set to true, that means some txn has modified the database
     * in the current batch of speculatively executed txns. Any read-only specexec txn that 
     * is executed when this flag is set to false can be returned to the client immediately.
     * TODO: This should really be a bitmap of table ids so that we have finer grain control
     */
    private boolean specExecModified = false;


    /**
     * If set to true, then we should not check for speculative execution candidates
     * at run time. This needs to be set any time we change the currentDtxn
     */
    private boolean specExecIgnoreCurrent = false;
    
    // ----------------------------------------------------------------------------
    // SHARED VOLTPROCEDURE DATA MEMBERS
    // ----------------------------------------------------------------------------


    /**
     * Mapping from SQLStmt batch hash codes (computed by VoltProcedure.getBatchHashCode()) to BatchPlanners
     * The idea is that we can quickly derived the partitions for each unique set of SQLStmt list
     */
    private final Map<Integer, BatchPlanner> batchPlanners = new HashMap<Integer, BatchPlanner>(100);
    
    // ----------------------------------------------------------------------------
    // DISTRIBUTED TRANSACTION TEMPORARY DATA COLLECTIONS
    // ----------------------------------------------------------------------------
    
    /**
     * WorkFragments that we need to send to a remote HStoreSite for execution
     */
    private final List<WorkFragment.Builder> tmp_remoteFragmentBuilders = new ArrayList<WorkFragment.Builder>();
    /**
     * WorkFragments that we need to send to our own PartitionExecutor
     */
    private final List<WorkFragment.Builder> tmp_localWorkFragmentBuilders = new ArrayList<WorkFragment.Builder>();
    /**
     * WorkFragments that we need to send to a different PartitionExecutor that is on this same HStoreSite
     */
    private final List<WorkFragment.Builder> tmp_localSiteFragmentBuilders = new ArrayList<WorkFragment.Builder>();
    /**
     * Temporary space used when calling removeInternalDependencies()
     */
    private final HashMap<Integer, List<VoltTable>> tmp_removeDependenciesMap = new HashMap<Integer, List<VoltTable>>();
    /**
     * Remote SiteId -> TransactionWorkRequest.Builder
     */
    private final TransactionWorkRequestBuilder tmp_transactionRequestBuilders[];
    /**
     * PartitionId -> List<VoltTable>
     */
    private final Map<Integer, List<VoltTable>> tmp_EEdependencies = new HashMap<Integer, List<VoltTable>>();
    /**
     * List of serialized ParameterSets
     */
    private final List<ByteString> tmp_serializedParams = new ArrayList<ByteString>();
    /**
     * Histogram for the number of WorkFragments that we're going to send to partitions
     * in the current batch.
     */
    private final FastIntHistogram tmp_fragmentsPerPartition = new FastIntHistogram(true);
    /**
     * Reusable int array for StmtCounters
     */
    private final IntArrayCache tmp_stmtCounters = new IntArrayCache(10);
    /**
     * Reusable ParameterSet array cache for WorkFragments
     */
    private final ParameterSetArrayCache tmp_fragmentParams = new ParameterSetArrayCache(5);
    /**
     * Reusable long array for fragment ids
     */
    private final LongArrayCache tmp_fragmentIds = new LongArrayCache(10);
    /**
     * Reusable long array for fragment id offsets
     */
    private final IntArrayCache tmp_fragmentOffsets = new IntArrayCache(10);
    /**
     * Reusable int array for output dependency ids
     */
    private final IntArrayCache tmp_outputDepIds = new IntArrayCache(10);
    /**
     * Reusable int array for input dependency ids
     */
    private final IntArrayCache tmp_inputDepIds = new IntArrayCache(10);
    
    /**
     * The following three arrays are used by utilityWork() to create transactions
     * for deferred queries
     */
    private final SQLStmt[] tmp_def_stmt = new SQLStmt[1];
    private final ParameterSet[] tmp_def_params = new ParameterSet[1];
    private LocalTransaction tmp_def_txn;
    
    // ----------------------------------------------------------------------------
    // INTERNAL CLASSES
    // ----------------------------------------------------------------------------
    
    private class DonePartitionsNotification {
        /**
         * All of the partitions that a transaction is currently done with.
         */
        private final PartitionSet donePartitions = new PartitionSet();
        
        /**
         * RemoteSiteId -> Partitions that we need to notify that this txn is done with.
         */
        private PartitionSet[] notificationsPerSite;
        
        /**
         * Site ids that we need to notify separately about the done partitions.
         */
        private Collection<Integer> _sitesToNotify;
        
        public void addSiteNotification(Site remoteSite, int partitionId, boolean noQueriesInBatch) {
            int remoteSiteId = remoteSite.getId();
            if (this.notificationsPerSite == null) {
                this.notificationsPerSite = new PartitionSet[catalogContext.numberOfSites];
            }
            if (this.notificationsPerSite[remoteSiteId] == null) {
                this.notificationsPerSite[remoteSiteId] = new PartitionSet();
            }
            this.notificationsPerSite[remoteSiteId].add(partitionId);
            if (noQueriesInBatch) {
                if (this._sitesToNotify == null) {
                    this._sitesToNotify = new HashSet<Integer>();
                }
                this._sitesToNotify.add(Integer.valueOf(remoteSiteId));
            }
        }
        
        /**
         * Return the set of partitions that needed to be notified separately
         * for the given site id. The return value may be null.
         * @param remoteSiteId
         * @return
         */
        public PartitionSet getNotifications(int remoteSiteId) {
            if (this.notificationsPerSite != null) {
                return (this.notificationsPerSite[remoteSiteId]);
            }
            return (null);
        }
        
        public boolean hasSitesToNotify() {
            return (this._sitesToNotify != null && this._sitesToNotify.isEmpty() == false);
        }
    }
    
    // ----------------------------------------------------------------------------
    // PROFILING OBJECTS
    // ----------------------------------------------------------------------------
    
    private final PartitionExecutorProfiler profiler = new PartitionExecutorProfiler();
    
    // ----------------------------------------------------------------------------
    // WORK REQUEST CALLBACK
    // ----------------------------------------------------------------------------
    
    /**
     * This will be invoked for each TransactionWorkResponse that comes back from
     * the remote HStoreSites. Note that we don't need to do any counting as to whether
     * a transaction has gotten back all of the responses that it expected. That logic is down
     * below in waitForResponses()
     */
    private final RpcCallback<TransactionWorkResponse> request_work_callback = new RpcCallback<TransactionWorkResponse>() {
        @Override
        public void run(TransactionWorkResponse msg) {
            Long txn_id = msg.getTransactionId();
            LocalTransaction ts = hstore_site.getTransaction(txn_id);
            
            // We can ignore anything that comes in for a transaction that we don't know about
            if (ts == null) {
                if (debug.val) LOG.debug("No transaction state exists for txn #" + txn_id);
                return;
            }
            
            if (debug.val)
                LOG.debug(String.format("Processing TransactionWorkResponse for %s with %d results%s",
                          ts, msg.getResultsCount(), (trace.val ? "\n"+msg : "")));
            for (int i = 0, cnt = msg.getResultsCount(); i < cnt; i++) {
                WorkResult result = msg.getResults(i); 
                if (debug.val)
                    LOG.debug(String.format("Got %s from partition %d for %s",
                              result.getClass().getSimpleName(), result.getPartitionId(), ts));
                PartitionExecutor.this.processWorkResult(ts, result);
            } // FOR
            if (hstore_conf.site.specexec_enable) { 
                specExecScheduler.interruptSearch(UTIL_WORK_MSG);
            }
        }
    }; // END CLASS


    // ----------------------------------------------------------------------------
    // SYSPROC STUFF
    // ----------------------------------------------------------------------------
    
    // Associate the system procedure planfragment ids to wrappers.
    // Planfragments are registered when the procedure wrapper is init()'d.
    private final Map<Long, VoltSystemProcedure> m_registeredSysProcPlanFragments = new HashMap<Long, VoltSystemProcedure>();


    public void registerPlanFragment(final long pfId, final VoltSystemProcedure proc) {
        synchronized (m_registeredSysProcPlanFragments) {
            if (!m_registeredSysProcPlanFragments.containsKey(pfId)) {
                assert(m_registeredSysProcPlanFragments.containsKey(pfId) == false) : "Trying to register the same sysproc more than once: " + pfId;
                m_registeredSysProcPlanFragments.put(pfId, proc);
                if (trace.val) LOG.trace(String.format("Registered %s sysproc handle at partition %d for FragmentId #%d",
                                 VoltSystemProcedure.procCallName(proc.getClass()), partitionId, pfId));
            }
        } // SYNCH
    }


    /**
     * SystemProcedures are "friends" with PartitionExecutors and granted
     * access to internal state via m_systemProcedureContext.
     * access to internal state via m_systemProcedureContext.
     */
    public interface SystemProcedureExecutionContext {
        public Catalog getCatalog();
        public Database getDatabase();
        public Cluster getCluster();
        public Site getSite();
        public Host getHost();
        public ExecutionEngine getExecutionEngine();
        public long getLastCommittedTxnId();
        public PartitionExecutor getPartitionExecutor();
        public HStoreSite getHStoreSite();
        public Long getCurrentTxnId();
    }


    protected class SystemProcedureContext implements SystemProcedureExecutionContext {
        public Catalog getCatalog()                 { return catalogContext.catalog; }
        public Database getDatabase()               { return catalogContext.database; }
        public Cluster getCluster()                 { return catalogContext.cluster; }
        public Site getSite()                       { return site; }
        public Host getHost()                       { return site.getHost(); }
        public ExecutionEngine getExecutionEngine() { return ee; }
        public long getLastCommittedTxnId()         { return lastCommittedTxnId; }
        public PartitionExecutor getPartitionExecutor() { return PartitionExecutor.this; }
        public HStoreSite getHStoreSite()           { return hstore_site; }
        public Long getCurrentTxnId()               { return PartitionExecutor.this.currentTxnId; }
    }


    private final SystemProcedureContext m_systemProcedureContext = new SystemProcedureContext();
    private AriesLog m_ariesLog ;


    public SystemProcedureExecutionContext getSystemProcedureExecutionContext(){
  return m_systemProcedureContext;
    }  
    
    // ----------------------------------------------------------------------------
    // INITIALIZATION
    // ----------------------------------------------------------------------------


    /**
     * Dummy constructor...
     */
    protected PartitionExecutor() {
        this.catalogContext = null;
        this.work_queue = null;
        this.ee = null;
        this.hsql = null;
        this.specExecChecker = null;
        this.specExecScheduler = null;
        this.specExecComparator = null;
        this.p_estimator = null;
        this.localTxnEstimator = null;
        this.m_snapshotter = null;
        this.thresholds = null;
        this.site = null;
        this.backend_target = BackendTarget.HSQLDB_BACKEND;
        this.siteId = 0;
        this.partitionId = 0;
        this.procedures = null;
        this.tmp_transactionRequestBuilders = null;
        this.m_ariesLog = null;
    }


    /**
     * Initialize the StoredProcedure runner and EE for this Site.
     * @param partitionId
     * @param t_estimator
     * @param coordinator
     * @param siteManager
     * @param serializedCatalog A list of catalog commands, separated by
     * newlines that, when executed, reconstruct the complete m_catalog.
     */
    public PartitionExecutor(final int partitionId,
                             final CatalogContext catalogContext,
                             final BackendTarget target,
                             final PartitionEstimator p_estimator,
                             final TransactionEstimator t_estimator) {
        this.hstore_conf = HStoreConf.singleton();
        this.work_queue = new PartitionMessageQueue();
        this.backend_target = target;
        this.catalogContext = catalogContext;
        this.partition = catalogContext.getPartitionById(partitionId);
        assert(this.partition != null) : "Invalid Partition #" + partitionId;
        this.partitionId = this.partition.getId();
        this.site = this.partition.getParent();
        assert(site != null) : "Unable to get Site for Partition #" + partitionId;
        this.siteId = this.site.getId();


        this.lastUndoToken = this.partitionId * 1000000;
        this.p_estimator = p_estimator;
        this.localTxnEstimator = t_estimator;
        this.specExecComparator = new TransactionUndoTokenComparator(this.partitionId);
                
        // VoltProcedure Queues
        @SuppressWarnings("unchecked")
        Queue<VoltProcedure> voltProcQueues[] = new Queue[catalogContext.procedures.size()+1];
        this.procedures = voltProcQueues;
        
        // An execution site can be backed by HSQLDB, by volt's EE accessed
        // via JNI or by volt's EE accessed via IPC.  When backed by HSQLDB,
        // the VoltProcedure interface invokes HSQLDB directly through its
        // hsql Backend member variable.  The real volt backend is encapsulated
        // by the ExecutionEngine class. This class has implementations for both
        // JNI and IPC - and selects the desired implementation based on the
        // value of this.eeBackend.
        HsqlBackend hsqlTemp = null;
        ExecutionEngine eeTemp = null;
        SnapshotSiteProcessor snapshotter = null;
        try {
            if (trace.val) LOG.trace("Creating EE wrapper with target type '" + target + "'");
            if (this.backend_target == BackendTarget.HSQLDB_BACKEND) {
                hsqlTemp = new HsqlBackend(partitionId);
                final String hexDDL = catalogContext.database.getSchema();
                final String ddl = Encoder.hexDecodeToString(hexDDL);
                final String[] commands = ddl.split(";");
                for (String command : commands) {
                    if (command.length() == 0) {
                        continue;
                    }
                    hsqlTemp.runDDL(command);
                }
                eeTemp = new MockExecutionEngine();
                
            }
            else if (target == BackendTarget.NATIVE_EE_JNI) {
                org.voltdb.EELibraryLoader.loadExecutionEngineLibrary(true);
                // set up the EE
                eeTemp = new ExecutionEngineJNI(this,
                                                catalogContext.cluster.getRelativeIndex(),
                                                this.getSiteId(),
                                                this.getPartitionId(),
                                                this.site.getHost().getId(),
                                                "localhost");
                
                // Initialize Anti-Cache
                if (hstore_conf.site.anticache_enable) {
                    File acFile = AntiCacheManager.getDatabaseDir(this);
                    long blockSize = hstore_conf.site.anticache_block_size;
                    eeTemp.antiCacheInitialize(acFile, blockSize);
                }
                              
                
                // Initialize STORAGE_MMAP
                if (hstore_conf.site.storage_mmap) {
                    File dbFile = getMMAPDir(this);
                    long mapSize = hstore_conf.site.storage_mmap_file_size;
                    long syncFrequency = hstore_conf.site.storage_mmap_sync_frequency;
                    eeTemp.MMAPInitialize(dbFile, mapSize, syncFrequency);
                }
                
                // Initialize ARIES
                if (hstore_conf.site.aries) {
                    File dbFile = getARIESDir(this);
                    File logFile = getARIESFile(this);
                    eeTemp.ARIESInitialize(dbFile, logFile);
                }                            
                
                // Important: This has to be called *after* we initialize the anti-cache
                //            and the storage information!
                eeTemp.loadCatalog(catalogContext.catalog.serialize());
                this.lastTickTime = System.currentTimeMillis();
                eeTemp.tick(this.lastTickTime, 0);
                
                snapshotter = new SnapshotSiteProcessor(new Runnable() {
                    final PotentialSnapshotWorkMessage msg = new PotentialSnapshotWorkMessage();
                    @Override
                    public void run() {
                        PartitionExecutor.this.work_queue.add(this.msg);
                    }
                });
            }
            else {
                // set up the EE over IPC
                eeTemp = new ExecutionEngineIPC(this,
                                                catalogContext.cluster.getRelativeIndex(),
                                                this.getSiteId(),
                                                this.getPartitionId(),
                                                this.site.getHost().getId(),
                                                "localhost",
                                                target);
                eeTemp.loadCatalog(catalogContext.catalog.serialize());
                this.lastTickTime = System.currentTimeMillis();
                eeTemp.tick(this.lastTickTime, 0);
            }
        }
        // just print error info an bail if we run into an error here
        catch (final Exception ex) {
            throw new ServerFaultException("Failed to initialize PartitionExecutor", ex);
        }
        this.ee = eeTemp;
        this.hsql = hsqlTemp;
        m_snapshotter = snapshotter;
        assert(this.ee != null);
        assert(!(this.ee == null && this.hsql == null)) : "Both execution engine objects are empty. This should never happen";
        
        // Initialize temporary data structures
        int num_sites = this.catalogContext.numberOfSites;
        this.tmp_transactionRequestBuilders = new TransactionWorkRequestBuilder[num_sites];
    }
    
    /**
     * Link this PartitionExecutor with its parent HStoreSite
     * This will initialize the references the various components shared among the PartitionExecutors 
     * @param hstore_site
     */
    public void initHStoreSite(HStoreSite hstore_site) {
        if (trace.val)
            LOG.trace(String.format("Initializing HStoreSite components at partition %d", this.partitionId));
        assert(this.hstore_site == null) :
            String.format("Trying to initialize HStoreSite for PartitionExecutor #%d twice!", this.partitionId);
        this.hstore_site = hstore_site;
        this.depTracker = hstore_site.getDependencyTracker(this.partitionId);
        this.thresholds = hstore_site.getThresholds();
        this.queueManager = hstore_site.getTransactionQueueManager();
        this.lockQueue = this.queueManager.getLockQueue(this.partitionId);
        
        if (hstore_conf.site.exec_deferrable_queries) {
            tmp_def_txn = new LocalTransaction(hstore_site);
        }


        // ARIES        
        this.m_ariesLog = this.hstore_site.getAriesLogger();


        // -------------------------------
        // BENCHMARK START NOTIFICATIONS
        // -------------------------------
        
        // Poke ourselves to update the partition stats when the first
        // non-sysproc procedure shows up. I forget why we need to do this...
        EventObservable<HStoreSite> observable = this.hstore_site.getStartWorkloadObservable(); 
        observable.addObserver(new EventObserver<HStoreSite>() {
            @Override
            public void update(EventObservable<HStoreSite> o, HStoreSite arg) {
                queueUtilityWork(STATS_WORK_MSG);
            }
        });
        
        // Reset our profiling information when we get the first non-sysproc
        this.profiler.resetOnEventObservable(observable);
        
        // Initialize speculative execution scheduler
        this.initSpecExecScheduler();
    }
    
    private void setSpecExecChecker(AbstractConflictChecker checker) {
        this.specExecChecker = checker;
        this.specExecSkipAfter = this.specExecChecker.skipConflictAfter();
        
        if (this.specExecScheduler != null) {
            this.specExecScheduler.getDebugContext().setConflictChecker(checker);
        }
    }
    
    /**
     * Initialize this PartitionExecutor' speculative execution scheduler
     */
    private void initSpecExecScheduler() {
        assert(this.specExecScheduler == null);
        assert(this.hstore_site != null);
        
        this.specExecCheckerType = SpeculationConflictCheckerType.get(hstore_conf.site.specexec_scheduler_checker);
        AbstractConflictChecker checker = null;
        switch (this.specExecCheckerType) {
            // -------------------------------
            // ROW-LEVEL
            // -------------------------------
            case MARKOV:
                // The MarkovConflictChecker is thread-safe, so we all of the partitions
                // at this site can reuse the same one.
                checker = MarkovConflictChecker.singleton(this.catalogContext, this.thresholds);
                break;
            // -------------------------------
            // TABLE-LEVEL
            // -------------------------------
            case TABLE:
                checker = new TableConflictChecker(this.catalogContext);
                break;
            // -------------------------------
            // UNSAFE
            // NOTE: You probably don't want to use this!
            // -------------------------------
            case UNSAFE:
                checker = new UnsafeConflictChecker(this.catalogContext, hstore_conf.site.specexec_unsafe_limit);
                LOG.warn(StringUtil.bold(String.format("Using %s in %s for partition %d. This is a bad idea!",
                         checker.getClass().getSimpleName(), this.getClass().getSimpleName(), this.partitionId)));
                break;
            // -------------------------------
            // OPTIMISTIC
            // -------------------------------
            case OPTIMISTIC:
                checker = new OptimisticConflictChecker(this.catalogContext, this.ee);
                break;
            // BUSTED!
            default: {
                String msg = String.format("Invalid %s '%s'",
                                           SpeculationConflictCheckerType.class.getSimpleName(),
                                           hstore_conf.site.specexec_scheduler_checker);
                throw new RuntimeException(msg);
            }
        } // SWITCH
        this.setSpecExecChecker(checker);
        assert(this.specExecChecker != null);
        
        SpecExecSchedulerPolicyType policy = SpecExecSchedulerPolicyType.get(hstore_conf.site.specexec_scheduler_policy);
        assert(policy != null) : String.format("Invalid %s '%s'",
                                               SpecExecSchedulerPolicyType.class.getSimpleName(),
                                               hstore_conf.site.specexec_scheduler_policy);
        assert(this.lockQueue.getPartitionId() == this.partitionId);
        this.specExecScheduler = new SpecExecScheduler(this.specExecChecker,
                                                       this.partitionId,
                                                       this.lockQueue,
                                                       policy,
                                                       hstore_conf.site.specexec_scheduler_window);
        this.specExecChecker.setEstimationThresholds(this.thresholds);
        this.specExecScheduler.updateConf(hstore_conf, null);
        
        if (debug.val && hstore_conf.site.specexec_enable)
            LOG.debug(String.format("Initialized %s for partition %d [checker=%s, policy=%s]",
                      this.specExecScheduler.getClass().getSimpleName(), this.partitionId,
                      this.specExecChecker.getClass().getSimpleName(), policy));
    }
    
    @Override
    public void updateConf(HStoreConf hstore_conf, String[] changed) {
        if (this.specExecScheduler != null) {
            this.specExecScheduler.updateConf(hstore_conf, changed);
        }
    }
    
    // ARIES
    public void waitForAriesRecoveryCompletion() {
        // wait for other threads to complete Aries recovery
        // ONLY called from main site.
        while (!m_ariesLog.isRecoveryCompleted()) {
            /*
            try {
                // don't sleep too long, shouldn't bias numbers
                Thread.sleep(1000);
            } catch (InterruptedException e) {
                // TODO Auto-generated catch block
                //e.printStackTrace();
            }
            */
        }
    }
    
    public void doPartitionRecovery(long txnIdToBeginReplay) {    
        LOG.warn("ARIES : aries : " + this.hstore_conf.site.aries+ " aries forward only : "+this.hstore_conf.site.aries_forward_only );


        if (this.hstore_conf.site.aries && this.hstore_conf.site.aries_forward_only == false) {
            // long logReadStartTime = System.currentTimeMillis();


            // define an array so that we can pass to native code by reference
            long size[] = new long[1];
            long ariesReplayPointer = readAriesLogForReplay(size);


            // LOG.info("ARIES : replay pointer address: " +
            // ariesReplayPointer);
            LOG.info("ARIES : partition recovery started at partition : " + this.partitionId + " log size :" + size[0]);


            // long logReadEndTime = System.currentTimeMillis();
            // LOG.info("ARIES : log read in " + (logReadEndTime -
            // logReadStartTime) + " milliseconds");


            long ariesStartTime = System.currentTimeMillis();


            m_ariesLog.setPointerToReplayLog(ariesReplayPointer, size[0]);
            m_ariesLog.setTxnIdToBeginReplay(txnIdToBeginReplay);


            waitForAriesRecoveryCompletion();


            freePointerToReplayLog(ariesReplayPointer);


            long ariesEndTime = System.currentTimeMillis();
            LOG.info("ARIES : partition recovery finished in " + (ariesEndTime - ariesStartTime) + " milliseconds");


            m_ariesLog.init();
        }
    }
        
    // ----------------------------------------------------------------------------
    // MAIN EXECUTION LOOP
    // ----------------------------------------------------------------------------
    
    /**
     * Primary run method that is invoked a single time when the thread is started.
     * Has the opportunity to do startup config.
     */
    @Override
    public final void run() {
        if (this.hstore_site == null) {
            String msg = String.format("Trying to start %s for partition %d before its HStoreSite was initialized",
                                       this.getClass().getSimpleName(), this.partitionId);
            throw new RuntimeException(msg);
        }
        else if (this.self != null) {
            String msg = String.format("Trying to restart %s for partition %d after it was already running",
                                       this.getClass().getSimpleName(), this.partitionId);
            throw new RuntimeException(msg);
        }
        
        // Initialize all of our VoltProcedures handles
        // This needs to be done here so that the Workload trace handles can be 
        // set up properly
        this.initializeVoltProcedures();
        
        this.self = Thread.currentThread();
        this.self.setName(HStoreThreadManager.getThreadName(this.hstore_site, this.partitionId));
        
        this.hstore_coordinator = hstore_site.getCoordinator();
        this.hstore_site.getThreadManager().registerEEThread(partition);
        this.shutdown_latch = new Semaphore(0);
        this.shutdown_state = ShutdownState.STARTED;
        if (hstore_conf.site.exec_profiling) profiler.start_time = System.currentTimeMillis();


        assert(this.hstore_site != null);
        assert(this.hstore_coordinator != null);
        assert(this.specExecScheduler != null);
        assert(this.queueManager != null);
        
        // ARIES :: Starts recovery on partition
        if(m_ariesLog != null){
            doPartitionRecovery(Long.MIN_VALUE);
        }
        
        // *********************************** DEBUG ***********************************
        if (hstore_conf.site.exec_validate_work) {
            LOG.warn("Enabled Distributed Transaction Validation Checker");
        }
        // *********************************** DEBUG ***********************************


        // Things that we will need in the loop below
        InternalMessage nextWork = null;
        AbstractTransaction nextTxn = null;
        if (debug.val)
            LOG.debug("Starting PartitionExecutor run loop...");
        try {
            while (this.shutdown_state == ShutdownState.STARTED) {
                this.currentTxnId = null;
                nextTxn = null;
                nextWork = null;
                
                // This is the starting state of the PartitionExecutor.
                // At this point here we currently don't have a txn to execute nor 
                // are we involved in a distributed txn running at another partition.
                // So we need to go our PartitionLockQueue and get back the next
                // txn that will have our lock.
                if (this.currentDtxn == null) {
                    this.tick();
                    
                    if (hstore_conf.site.exec_profiling) profiler.poll_time.start();
                    try {
                        nextTxn = this.queueManager.checkLockQueue(this.partitionId); // NON-BLOCKING
                    } finally {
                        if (hstore_conf.site.exec_profiling) profiler.poll_time.stopIfStarted();
                    }
                    
                    // If we get something back here, then it should become our current transaction.
                    if (nextTxn != null) {
                        // If it's a single-partition txn, then we can return the StartTxnMessage 
                        // so that we can fire it off right away.
                        if (nextTxn.isPredictSinglePartition()) {
                            LocalTransaction localTxn = (LocalTransaction)nextTxn;
                            nextWork = localTxn.getStartTxnMessage();
                            if (hstore_conf.site.txn_profiling && localTxn.profiler != null) 
                                localTxn.profiler.startQueueExec();
                        }
                        // If it's as distribued txn, then we'll want to just set it as our 
                        // current dtxn at this partition and then keep checking the queue
                        // for more work.
                        else {
                            this.setCurrentDtxn(nextTxn);
                        }
                    }
                }
                
                // -------------------------------
                // Poll Work Queue
                // -------------------------------
                
                // Check if we have anything to do right now
                if (nextWork == null) {
                    if (hstore_conf.site.exec_profiling) profiler.idle_time.start();
                    try {
                        // If we're allowed to speculatively execute txns, then we don't want to have
                        // to wait to see if anything will show up in our work queue.
                        if (hstore_conf.site.specexec_enable && this.lockQueue.approximateIsEmpty() == false) {
                            nextWork = this.work_queue.poll();
                            /*if (nextWork != null) {
                                        System.out.println(String.format("Polled a work %s from partition %d",
                                                                          nextWork.getClass().getSimpleName(), this.work_queue.size()));
                            } else {
                                System.out.println("Null work!");
                            }*/
                        } else {
                            nextWork = this.work_queue.poll(WORK_QUEUE_POLL_TIME, WORK_QUEUE_POLL_TIMEUNIT);    
                            /*if (nextWork != null) {
                                        LOG.info(String.format("Polled a work %s from partition %d",
                                                                          nextWork.getClass().getSimpleName(), this.work_queue.size()));
                            } else {
                                LOG.info("Null work!");
                            }*/
                        }
                    } catch (InterruptedException ex) {
                        continue;
                    } finally {
                        if (hstore_conf.site.exec_profiling) profiler.idle_time.stopIfStarted();
                    }
                }
                
                // -------------------------------
                // Process Work
                // -------------------------------
                if (nextWork != null) {
                    if (trace.val) LOG.trace("Next Work: " + nextWork);
                    if (hstore_conf.site.exec_profiling) {
                        profiler.numMessages.put(nextWork.getClass().getSimpleName());
                        profiler.exec_time.start();
                        if (this.currentDtxn != null) profiler.sp2_time.stopIfStarted();
                    }
                    try {
                        // -------------------------------
                        // TRANSACTIONAL WORK
                        // -------------------------------
                        if (nextWork instanceof InternalTxnMessage) {
                            this.processInternalTxnMessage((InternalTxnMessage)nextWork);
                        }
                        // -------------------------------
                        // EVERYTHING ELSE
                        // -------------------------------
                        else {
                            this.processInternalMessage(nextWork);
                        }
                    } finally {
                        if (hstore_conf.site.exec_profiling) {
                            profiler.exec_time.stopIfStarted();
                            if (this.currentDtxn != null) profiler.sp2_time.start();
                        }
                    }
                    if (this.currentTxnId != null) this.lastExecutedTxnId = this.currentTxnId;
                }
                // Check if we have any utility work to do while we wait
                else if (hstore_conf.site.specexec_enable) {
//                    if (trace.val)
//                        LOG.trace(String.format("The %s for partition %s empty. Checking for utility work...",
//                                  this.work_queue.getClass().getSimpleName(), this.partitionId));
                    if (this.utilityWork()) {
                        nextWork = UTIL_WORK_MSG;
                    }
                } else {
                    ThreadUtil.sleep(5);
                }
            } // WHILE
        } catch (final Throwable ex) {
            if (this.isShuttingDown() == false) {
                // ex.printStackTrace();
                LOG.fatal(String.format("Unexpected error at partition %d [current=%s, lastDtxn=%s]",
                          this.partitionId, this.currentTxn, this.lastDtxnDebug), ex);
                if (this.currentTxn != null) LOG.fatal("TransactionState Dump:\n" + this.currentTxn.debug());
            }
            this.shutdown_latch.release();
            this.hstore_coordinator.shutdownClusterBlocking(ex);
        } finally {
            if (debug.val) {
                String txnDebug = "";
                if (this.currentTxn != null && this.currentTxn.getBasePartition() == this.partitionId) {
                    txnDebug = " while a txn is still running\n" + this.currentTxn.debug();
                }
                LOG.warn(String.format("PartitionExecutor %d is stopping%s%s",
                         this.partitionId,
                         (this.currentTxnId != null ? " In-Flight Txn: #" + this.currentTxnId : ""),
                         txnDebug));
            }
            
            // Release the shutdown latch in case anybody waiting for us
            this.shutdown_latch.release();
        }
    }
    
    /**
     * Special function that allows us to do some utility work while 
     * we are waiting for a response or something real to do.
     * Note: this tracks how long the system spends doing utility work. It would
     * be interesting to have the system report on this before it shuts down.
     * @return true if there is more utility work that can be done
     */
    private boolean utilityWork() {
        if (hstore_conf.site.exec_profiling) this.profiler.util_time.start();
        
        // -------------------------------
        // Poll Lock Queue
        // -------------------------------


        LocalTransaction specTxn = null;
        InternalMessage work = null;
        
        // Check whether there is something we can speculatively execute right now
        if (this.specExecIgnoreCurrent == false && this.lockQueue.approximateIsEmpty() == false) {
//            if (trace.val)
//                LOG.trace(String.format("Checking %s for something to do at partition %d while %s",
//                          this.specExecScheduler.getClass().getSimpleName(),
//                          this.partitionId,
//                          (this.currentDtxn != null ? "blocked on " + this.currentDtxn : "idle")));
            assert(hstore_conf.site.specexec_enable) :
                "Trying to schedule speculative txn even though it is disabled";
            SpeculationType specType = this.calculateSpeculationType();
            if (hstore_conf.site.exec_profiling) this.profiler.conflicts_time.start();
            try {
                specTxn = this.specExecScheduler.next(this.currentDtxn, specType);
            } finally {
                if (hstore_conf.site.exec_profiling) this.profiler.conflicts_time.stopIfStarted();
            }
            
            // Because we don't have fine-grained undo support, we are just going
            // keep all of our speculative execution txn results around
            if (specTxn != null) {
                // TODO: What we really want to do is check to see whether we have anything
                // in our work queue before we go ahead and fire off this txn
                if (debug.val) {
                    if (this.work_queue.isEmpty() == false) {
                        LOG.warn(String.format("About to speculatively execute %s on partition %d but there " +
                                 "are %d messages in the work queue\n%s",
                                 specTxn, this.partitionId, this.work_queue.size(),
                                 CollectionUtil.first(this.work_queue)));
                    }
                    LOG.debug(String.format("Utility Work found speculative txn to execute on " +
                              "partition %d [%s, specType=%s]",
                              this.partitionId, specTxn, specType));
                    // IMPORTANT: We need to make sure that we remove this transaction for the lock queue
                    // before we execute it so that we don't try to run it again.
                    // We have to do this now because otherwise we may get the same transaction again
                    assert(this.lockQueue.contains(specTxn.getTransactionId()) == false) :
                        String.format("Failed to remove speculative %s before executing", specTxn);
                }
                assert(specTxn.getBasePartition() == this.partitionId) :
                    String.format("Trying to speculatively execute %s at partition %d but its base partition is %d\n%s",
                                  specTxn, this.partitionId, specTxn.getBasePartition(), specTxn.debug());
                assert(specTxn.isMarkedControlCodeExecuted() == false) :
                    String.format("Trying to speculatively execute %s at partition %d but was already executed\n%s",
                                  specTxn, this.partitionId, specTxn.getBasePartition(), specTxn.debug());
                assert(specTxn.isSpeculative() == false) :
                    String.format("Trying to speculatively execute %s at partition %d but was already speculative\n%s",
                                  specTxn, this.partitionId, specTxn.getBasePartition(), specTxn.debug());
                


                // It's also important that we cancel this txn's init queue callback, otherwise
                // it will never get cleaned up properly. This is necessary in order to support
                // sending out client results *before* the dtxn finishes
                specTxn.getInitCallback().cancel();
                
                // Ok now that that's out of the way, let's run this baby...
                specTxn.setSpeculative(specType);
                if (hstore_conf.site.exec_profiling) profiler.specexec_time.start();
                try {
                    this.executeTransaction(specTxn);
                } finally {
                    if (hstore_conf.site.exec_profiling) profiler.specexec_time.stopIfStarted();
                }
            }
//            else if (trace.val) {
//                LOG.trace(String.format("%s - No speculative execution candidates found at partition %d [queueSize=%d]",
//                          this.currentDtxn, this.partitionId, this.queueManager.getLockQueue(this.partitionId).size()));
//            }
        }
//        else if (trace.val && this.currentDtxn != null) {
//            LOG.trace(String.format("%s - Skipping check for speculative execution txns at partition %d " +
//                      "[lockQueue=%d, specExecIgnoreCurrent=%s]",
//                      this.currentDtxn, this.partitionId, this.lockQueue.size(), this.specExecIgnoreCurrent));
//        }
        
        if (hstore_conf.site.exec_profiling) this.profiler.util_time.stopIfStarted();
        return (specTxn != null || work != null);
    }
    
    // ----------------------------------------------------------------------------
    // MESSAGE PROCESSING METHODS
    // ----------------------------------------------------------------------------
    
    /**
     * Process an InternalMessage
     * @param work
     */
    private final void processInternalMessage(InternalMessage work) {
        
        // -------------------------------
        // UTILITY WORK
        // -------------------------------
        if (work instanceof UtilityWorkMessage) {
            // UPDATE MEMORY STATS
            if (work instanceof UpdateMemoryMessage) {
                //LOG.info("Update mem stats");
                this.updateMemoryStats(EstTime.currentTimeMillis());
            }
            // TABLE STATS REQUEST
            else if (work instanceof TableStatsRequestMessage) {
                TableStatsRequestMessage stats_work = (TableStatsRequestMessage)work;
                VoltTable results[] = this.ee.getStats(SysProcSelector.TABLE,
                                                       stats_work.getLocators(),
                                                       false,
                                                       EstTime.currentTimeMillis());
                assert(results.length == 1);
                //results[0].advanceRow();
                //LOG.info(String.format("Notified ovserver at partition %d", results[0].getLong("PARTITION_ID")));
                stats_work.getObservable().notifyObservers(results[0]);
            }
            else {
                // IGNORE
            }
        }
        // -------------------------------
        // DEFERRED QUERIES
        // -------------------------------
        else if (work instanceof DeferredQueryMessage) {
            DeferredQueryMessage def_work = (DeferredQueryMessage)work;
            
            // Set the txnId in our handle to be what the original txn was that
            // deferred this query.
            tmp_def_stmt[0] = def_work.getStmt();
            tmp_def_params[0] = def_work.getParams();
            tmp_def_txn.init(def_work.getTxnId(), 
                       -1, // We don't really need the clientHandle
                       EstTime.currentTimeMillis(),
                       this.partitionId,
                       catalogContext.getPartitionSetSingleton(this.partitionId),
                       false,
                       false,
                       tmp_def_stmt[0].getProcedure(),
                       def_work.getParams(),
                       null // We don't need the client callback
            );
            this.executeSQLStmtBatch(tmp_def_txn, 1, tmp_def_stmt, tmp_def_params, false, false);
        }
        // -------------------------------
        // SNAPSHOT WORK
        // -------------------------------
        else if (work instanceof PotentialSnapshotWorkMessage) {
            m_snapshotter.doSnapshotWork(ee);
        }
        // -------------------------------
        // BAD MOJO!
        // -------------------------------
        else {
            String msg = "Unexpected work message in queue: " + work;
            throw new ServerFaultException(msg, this.currentTxnId);
        }
    }


    /**
     * Process an InternalTxnMessage
     * @param work
     */
    private void processInternalTxnMessage(InternalTxnMessage work) {
        //LOG.info("process a txn msg");
        AbstractTransaction ts = work.getTransaction();
        this.currentTxn = ts;
        this.currentTxnId = ts.getTransactionId();


        // If this transaction has already been aborted and they are trying to give us
        // something that isn't a FinishTaskMessage, then we won't bother processing it
        if (ts.isAborted() && (work instanceof FinishTxnMessage) == false) {
            if (debug.val)
                LOG.debug(String.format("%s - Cannot process %s on partition %d because txn was marked as aborted",
                          ts, work.getClass().getSimpleName(), this.partitionId));
            return;
        }
                    
        if (debug.val)
            LOG.debug(String.format("Processing %s at partition %d", work, this.partitionId));
        
        // -------------------------------
        // Start Transaction
        // -------------------------------
        if (work instanceof StartTxnMessage) {
            if (hstore_conf.site.specexec_enable && ts.isPredictSinglePartition()) this.specExecScheduler.reset();
            if (hstore_conf.site.exec_profiling) profiler.txn_time.start();
            try {
                this.executeTransaction((LocalTransaction)ts);
            } finally {
                if (hstore_conf.site.exec_profiling) profiler.txn_time.stopIfStarted();
            }
        }
        // -------------------------------
        // Execute Query Plan Fragments
        // -------------------------------
        else if (work instanceof WorkFragmentMessage) {
            WorkFragment fragment = ((WorkFragmentMessage)work).getFragment();
            assert(fragment != null);
            
            // HACK HACK HACK
            if (ts.isInitialized() == false) {
                LOG.warn(String.format("Skipping %s at partition %d for unitialized txn",
                         work.getClass().getSimpleName(), this.partitionId));
                return;
            }


            // Get the ParameterSet array for this WorkFragment
            // It can either be attached to the AbstractTransaction handle if it came
            // over the wire directly from the txn's base partition, or it can be attached
            // as for prefetch WorkFragments 
            ParameterSet parameters[] = null;
            if (fragment.getPrefetch()) {
                parameters = ts.getPrefetchParameterSets();
                ts.markExecPrefetchQuery(this.partitionId);
                if (trace.val && ts.isSysProc() == false)
                    LOG.trace(ts + " - Prefetch Parameters:\n" + StringUtil.join("\n", parameters));
            } else {
                parameters = ts.getAttachedParameterSets();
                if (trace.val && ts.isSysProc() == false) 
                    LOG.trace(ts + " - Attached Parameters:\n" + StringUtil.join("\n", parameters));
            }
            
            // At this point we know that we are either the current dtxn or the current dtxn is null
            // We will allow any read-only transaction to commit if
            // (1) The WorkFragment for the remote txn is read-only
            // (2) This txn has always been read-only up to this point at this partition
            ExecutionMode newMode = null;
            if (hstore_conf.site.specexec_enable) {
                if (fragment.getReadOnly() && ts.isExecReadOnly(this.partitionId)) {
                    newMode = ExecutionMode.COMMIT_READONLY ;
                } else {
                    newMode = ExecutionMode.COMMIT_NONE;
                }
            } else {
                newMode = ExecutionMode.DISABLED;
            }
            // There is no current DTXN, so that means its us!
            if (this.currentDtxn == null) {
                this.setCurrentDtxn(ts);
                if (debug.val)
                    LOG.debug(String.format("Marking %s as current DTXN on partition %d [nextMode=%s]",
                              ts, this.partitionId, newMode));                    
            }
            // There is a current DTXN but it's not us!
            // That means we need to block ourselves until it finishes
            else if (this.currentDtxn != ts) {
                if (debug.val)
                    LOG.debug(String.format("%s - Blocking on partition %d until current Dtxn %s finishes",
                              ts, this.partitionId, this.currentDtxn));
                this.blockTransaction(work);
                return;
            }
            assert(this.currentDtxn == ts) :
                String.format("Trying to execute a second Dtxn %s before the current one has finished [current=%s]",
                              ts, this.currentDtxn);
            this.setExecutionMode(ts, newMode);
            this.processWorkFragment(ts, fragment, parameters);
        }
        // -------------------------------
        // Finish Transaction
        // -------------------------------
        else if (work instanceof FinishTxnMessage) {
            FinishTxnMessage fTask = (FinishTxnMessage)work;
            this.finishDistributedTransaction(fTask.getTransaction(), fTask.getStatus());
        }
        // -------------------------------
        // Prepare Transaction
        // -------------------------------
        else if (work instanceof PrepareTxnMessage) {
            PrepareTxnMessage pTask = (PrepareTxnMessage)work;
            this.prepareTransaction(pTask.getTransaction(), pTask.getCallback());
        }
        // -------------------------------
        // Set Distributed Transaction 
        // -------------------------------
        else if (work instanceof SetDistributedTxnMessage) {
            if (this.currentDtxn != null) {
                this.blockTransaction(work);
            } else {
                this.setCurrentDtxn(((SetDistributedTxnMessage)work).getTransaction());
            }
        }
    }


    // ----------------------------------------------------------------------------
    // DATA MEMBER METHODS
    // ----------------------------------------------------------------------------
    
    public final ExecutionEngine getExecutionEngine() {
        return (this.ee);
    }
    public final Thread getExecutionThread() {
        return (this.self);
    }
    public final HsqlBackend getHsqlBackend() {
        return (this.hsql);
    }
    public final PartitionEstimator getPartitionEstimator() {
        return (this.p_estimator);
    }
    public final TransactionEstimator getTransactionEstimator() {
        return (this.localTxnEstimator);
    }
    public final BackendTarget getBackendTarget() {
        return (this.backend_target);
    }
    public final HStoreSite getHStoreSite() {
        return (this.hstore_site);
    }
    public final HStoreConf getHStoreConf() {
        return (this.hstore_conf);
    }
    public final CatalogContext getCatalogContext() {
        return (this.catalogContext);
    }
    public final int getSiteId() {
        return (this.siteId);
    }
    public final Partition getPartition() {
        return (this.partition);
    }
    public final int getPartitionId() {
        return (this.partitionId);
    }
    public final DependencyTracker getDependencyTracker() {
        return (this.depTracker);
    }
    public final PartitionExecutorProfiler getProfiler() {
        return profiler;
    }
    
    // ----------------------------------------------------------------------------
    // VOLT PROCEDURE HELPER METHODS
    // ----------------------------------------------------------------------------
    
    
    protected void initializeVoltProcedures() {
        // load up all the stored procedures
        for (final Procedure catalog_proc : catalogContext.procedures) {
            VoltProcedure volt_proc = this.initializeVoltProcedure(catalog_proc);
            Queue<VoltProcedure> queue = new LinkedList<VoltProcedure>();
            queue.add(volt_proc);
            this.procedures[catalog_proc.getId()] = queue;
        } // FOR
    }


    @SuppressWarnings("unchecked")
    protected VoltProcedure initializeVoltProcedure(Procedure catalog_proc) {
        VoltProcedure volt_proc = null;
        
        if (catalog_proc.getHasjava()) {
            // Only try to load the Java class file for the SP if it has one
            Class<? extends VoltProcedure> p_class = null;
            final String className = catalog_proc.getClassname();
            try {
                p_class = (Class<? extends VoltProcedure>)Class.forName(className);
                volt_proc = (VoltProcedure)p_class.newInstance();
            } catch (Exception e) {
                throw new ServerFaultException("Failed to created VoltProcedure instance for " + catalog_proc.getName() , e);
            }
            
        } else {
            volt_proc = new VoltProcedure.StmtProcedure();
        }
        volt_proc.init(this, catalog_proc, this.backend_target);
        return (volt_proc);
    }
    
    /**
     * Returns a new VoltProcedure instance for a given stored procedure name
     * <B>Note:</B> You will get a new VoltProcedure for each invocation
     * @param proc_name
     * @return
     */
    protected VoltProcedure getVoltProcedure(int proc_id) {
        VoltProcedure voltProc = this.procedures[proc_id].poll();
        if (voltProc == null) {
            Procedure catalog_proc = catalogContext.getProcedureById(proc_id);
            voltProc = this.initializeVoltProcedure(catalog_proc);
        }
        return (voltProc);
    }
    
    /**
     * Return the given VoltProcedure back into the queue to be re-used again
     * @param voltProc
     */
    protected void finishVoltProcedure(VoltProcedure voltProc) {
        voltProc.finish();
        this.procedures[voltProc.getProcedureId()].offer(voltProc);
    }
    
    // ----------------------------------------------------------------------------
    // UTILITY METHODS
    // ----------------------------------------------------------------------------
    
    private void tick() {
        // invoke native ee tick if at least one second has passed
        final long time = EstTime.currentTimeMillis();
        long elapsed = time - this.lastTickTime; 
        if (elapsed >= 1000) {
            if ((this.lastTickTime != 0) && (this.ee != null)) {
                this.ee.tick(time, this.lastCommittedTxnId);
                                
                if ((time - this.lastStatsTime) >= 20000) {
                    this.updateMemoryStats(time);
                }
            }
            this.lastTickTime = time;
        }
        
        // LOGICAL
        // do other periodic work
        if (m_snapshotter != null)
            m_snapshotter.doSnapshotWork(this.ee);
                
    }
        
    
    
    private void updateMemoryStats(long time) {
        if (trace.val)
            LOG.trace("Updating memory stats for partition " + this.partitionId);
        
        Collection<Table> tables = this.catalogContext.database.getTables();
        int[] tableIds = new int[tables.size()];
        int i = 0;
        for (Table table : tables) {
            tableIds[i++] = table.getRelativeIndex();
        }


        // data to aggregate
        long tupleCount = 0;
        @SuppressWarnings("unused")
        long tupleAccessCount = 0;
        int tupleDataMem = 0;
        int tupleAllocatedMem = 0;
        int indexMem = 0;
        int stringMem = 0;
        
        // ACTIVE
        long tuplesEvicted = 0;
        long blocksEvicted = 0;
        long bytesEvicted = 0;
        
        // GLOBAL WRITTEN
        long tuplesWritten = 0;
        long blocksWritten = 0;
        long bytesWritten = 0;
        
        // GLOBAL READ
        long tuplesRead = 0;
        long blocksRead = 0;
        long bytesRead = 0;


        // update table stats
        VoltTable[] s1 = null;
        try {
            s1 = this.ee.getStats(SysProcSelector.TABLE, tableIds, false, time);
        } catch (RuntimeException ex) {
            LOG.warn("Unexpected error when trying to retrieve EE stats for partition " + this.partitionId, ex);
        }
        if (s1 != null) {
            VoltTable stats = s1[0];
            assert(stats != null);


            // rollup the table memory stats for this site
            while (stats.advanceRow()) {
                int idx = 7;
                tupleCount += stats.getLong("TUPLE_COUNT");
                tupleAccessCount += stats.getLong("TUPLE_ACCESSES");
                tupleAllocatedMem += (int) stats.getLong("TUPLE_ALLOCATED_MEMORY");
                tupleDataMem += (int) stats.getLong("TUPLE_DATA_MEMORY");
                stringMem += (int) stats.getLong("STRING_DATA_MEMORY");
                indexMem += (int) stats.getLong("INDEX_MEMORY");
                
                // ACTIVE
                if (hstore_conf.site.anticache_enable) {
                    tuplesEvicted += (long) stats.getLong("ANTICACHE_TUPLES_EVICTED");
                    blocksEvicted += (long) stats.getLong("ANTICACHE_BLOCKS_EVICTED");
                    bytesEvicted += (long) stats.getLong("ANTICACHE_BYTES_EVICTED");
                
                    // GLOBAL WRITTEN
                    tuplesWritten += (long) stats.getLong("ANTICACHE_TUPLES_WRITTEN");
                    blocksWritten += (long) stats.getLong("ANTICACHE_BLOCKS_WRITTEN");
                    bytesWritten += (long) stats.getLong("ANTICACHE_BYTES_WRITTEN");
                    
                    // GLOBAL READ
                    tuplesRead += (long) stats.getLong("ANTICACHE_TUPLES_READ");
                    blocksRead += (long) stats.getLong("ANTICACHE_BLOCKS_READ");
                    bytesRead += (long) stats.getLong("ANTICACHE_BYTES_READ");
                }
            }
            stats.resetRowPosition();
        }


        // update the rolled up memory statistics
        MemoryStats memoryStats = hstore_site.getMemoryStatsSource();
        memoryStats.eeUpdateMemStats(this.partitionId,
                                     tupleCount,
                                     tupleDataMem,
                                     tupleAllocatedMem,
                                     indexMem,
                                     stringMem,
                                     0, // FIXME
                                     
                                     // ACTIVE
                                     tuplesEvicted, blocksEvicted, bytesEvicted,
                                     
                                     // GLOBAL WRITTEN
                                     tuplesWritten, blocksWritten, bytesWritten,
                                     
                                     // GLOBAL READ
                                     tuplesRead, blocksRead, bytesRead
        );
        
        this.lastStatsTime = time;
    }
    
    public void haltProcessing() {
//        if (debug.val)
            LOG.warn("Halting transaction processing at partition " + this.partitionId);
        
        ExecutionMode origMode = this.currentExecMode;
        this.setExecutionMode(this.currentTxn, ExecutionMode.DISABLED_REJECT);
        List<InternalMessage> toKeep = new ArrayList<InternalMessage>(); 
        InternalMessage msg = null;
        while ((msg = this.work_queue.poll()) != null) {
            // -------------------------------
            // StartTxnMessage
            // -------------------------------
            if (msg instanceof StartTxnMessage) {
                StartTxnMessage startMsg = (StartTxnMessage)msg;
                hstore_site.transactionReject((LocalTransaction)startMsg.getTransaction(), Status.ABORT_REJECT);
            }
            // -------------------------------
            // Things to keep
            // -------------------------------
            else {
                toKeep.add(msg);
            }
        } // WHILE
        // assert(this.work_queue.isEmpty());
        this.work_queue.addAll(toKeep);
        
        // For now we'll set it back so that we can execute new stuff. Clearing out
        // the queue should enough for now
        this.setExecutionMode(this.currentTxn, origMode);
    }
    
    /**
     * Figure out the current speculative execution mode for this partition 
     * @return
     */
    private SpeculationType calculateSpeculationType() {
        SpeculationType specType = SpeculationType.NULL;


        // IDLE
        if (this.currentDtxn == null) {
            specType = SpeculationType.IDLE;
        }
        // LOCAL
        else if (this.currentDtxn.getBasePartition() == this.partitionId) {
            if (((LocalTransaction)this.currentDtxn).isMarkedControlCodeExecuted() == false) {
                specType = SpeculationType.IDLE;
            } else if (this.currentDtxn.isMarkedPrepared(this.partitionId)) {
                specType = SpeculationType.SP3_LOCAL;
            } else {
                specType = SpeculationType.SP1_LOCAL;
            }
        }
        // REMOTE
        else {
            if (this.currentDtxn.isMarkedPrepared(this.partitionId)) {
                specType = SpeculationType.SP3_REMOTE;
            } else if (this.currentDtxn.hasExecutedWork(this.partitionId) == false) {
                specType = SpeculationType.SP2_REMOTE_BEFORE;
            } else {
                specType = SpeculationType.SP2_REMOTE_AFTER;
            }
        }


        return (specType);
    }


    /**
     * Set the current ExecutionMode for this executor. The transaction handle given as an input
     * argument is the transaction that caused the mode to get changed. It is only used for debug
     * purposes.
     * @param newMode
     * @param txn_id
     */
    private void setExecutionMode(AbstractTransaction ts, ExecutionMode newMode) {
        if (debug.val && this.currentExecMode != newMode) {
            LOG.debug(String.format("Setting ExecutionMode for partition %d to %s because of %s [origMode=%s]",
                      this.partitionId, newMode, ts, this.currentExecMode));
        }
        assert(newMode != ExecutionMode.COMMIT_READONLY ||
              (newMode == ExecutionMode.COMMIT_READONLY && this.currentDtxn != null)) :
            String.format("%s is trying to set partition %d to %s when the current DTXN is null?", ts, this.partitionId, newMode);
        this.currentExecMode = newMode;
    }
    
    /**
     * Returns the next undo token to use when hitting up the EE with work
     * MAX_VALUE = no undo
     * @param txn_id
     * @return
     */
    private long getNextUndoToken() {
        if (trace.val) LOG.trace(String.format("Next Undo for Partition %d: %d", this.partitionId, this.lastUndoToken+1));
        return (++this.lastUndoToken);
    }
    
    /**
     * For the given txn, return the next undo token to use for its next execution round
     * @param ts
     * @param readOnly
     * @return
     */
    private long calculateNextUndoToken(AbstractTransaction ts, boolean readOnly) {
        long undoToken = HStoreConstants.DISABLE_UNDO_LOGGING_TOKEN;
        long lastUndoToken = ts.getLastUndoToken(this.partitionId);
        boolean singlePartition = ts.isPredictSinglePartition();
        
        // Speculative txns always need an undo token
        // It's just easier this way...
        if (ts.isSpeculative()) {
            undoToken = this.getNextUndoToken();
        }
        // If this plan is read-only, then we don't need a new undo token (unless
        // we don't have one already)
        else if (readOnly) {
            if (lastUndoToken == HStoreConstants.NULL_UNDO_LOGGING_TOKEN) {
                lastUndoToken = HStoreConstants.DISABLE_UNDO_LOGGING_TOKEN;
//                lastUndoToken = this.getNextUndoToken();
            }
            undoToken = lastUndoToken;
        }
        // Otherwise, we need to figure out whether we want to be a brave soul and 
        // not use undo logging at all
        else {
            // If one of the following conditions are true, then we need to get a new token:
            //  (1) If this our first time up at bat
            //  (2) If we're a distributed transaction
            //  (3) The force undo logging option is enabled
            if (lastUndoToken == HStoreConstants.NULL_UNDO_LOGGING_TOKEN ||
                    singlePartition == false ||
                    hstore_conf.site.exec_force_undo_logging_all) {
                undoToken = this.getNextUndoToken();
            }
            // If we originally executed this transaction with undo buffers and we have a MarkovEstimate,
            // then we can go back and check whether we want to disable undo logging for the rest of the transaction
            else if (ts.getEstimatorState() != null && singlePartition && ts.isSpeculative() == false) {
                Estimate est = ts.getEstimatorState().getLastEstimate();
                assert(est != null) : "Got back null MarkovEstimate for " + ts;
                if (hstore_conf.site.exec_no_undo_logging == false ||
                    est.isValid() == false ||
                    est.isAbortable(this.thresholds) ||
                    est.isReadOnlyPartition(this.thresholds, this.partitionId) == false) {
                    undoToken = lastUndoToken;
                } else if (debug.val) {
                    LOG.warn(String.format("Bold! Disabling undo buffers for inflight %s\n%s", ts, est));
                }
            }
        }
        // Make sure that it's at least as big as the last one handed out
        if (undoToken < this.lastUndoToken) undoToken = this.lastUndoToken;
        
        if (debug.val)
            LOG.debug(String.format("%s - Next undo token at partition %d is %s [readOnly=%s]",
                      ts, this.partitionId,
                      (undoToken == HStoreConstants.DISABLE_UNDO_LOGGING_TOKEN ? "<DISABLED>" :
                          (undoToken == HStoreConstants.NULL_UNDO_LOGGING_TOKEN ? "<NULL>" : undoToken)),
                      readOnly));
        
        return (undoToken);
    }
    
    /**
     * Populate the provided inputs map with the VoltTables needed for the give 
     * input DependencyId. If the txn is a LocalTransaction, then we will
     * get the data we need from the base partition's DependencyTracker. 
     * @param ts
     * @param input_dep_ids
     * @param inputs
     * @return
     */
    private void getFragmentInputs(AbstractTransaction ts,
                                   int input_dep_id,
                                   Map<Integer, List<VoltTable>> inputs) {
        if (input_dep_id == HStoreConstants.NULL_DEPENDENCY_ID) return;
        
        if (trace.val)
            LOG.trace(String.format("%s - Attempting to retrieve input dependencies for DependencyId #%d",
                      ts, input_dep_id));


        // If the Transaction is on the same HStoreSite, then all the 
        // input dependencies will be internal and can be retrieved locally
        if (ts instanceof LocalTransaction) {
            DependencyTracker txnTracker = null;
            if (ts.getBasePartition() != this.partitionId) {
                txnTracker = hstore_site.getDependencyTracker(ts.getBasePartition());
            } else {
                txnTracker = this.depTracker;
            }
            List<VoltTable> deps = txnTracker.getInternalDependency((LocalTransaction)ts, input_dep_id);
            assert(deps != null);
            assert(inputs.containsKey(input_dep_id) == false);
            inputs.put(input_dep_id, deps);
            if (trace.val)
                LOG.trace(String.format("%s - Retrieved %d INTERNAL VoltTables for DependencyId #%d",
                          ts, deps.size(), input_dep_id,
                          (trace.val ? "\n" + deps : "")));
        }
        // Otherwise they will be "attached" inputs to the RemoteTransaction handle
        // We should really try to merge these two concepts into a single function call
        else if (ts.getAttachedInputDependencies().containsKey(input_dep_id)) {
            List<VoltTable> deps = ts.getAttachedInputDependencies().get(input_dep_id);
            List<VoltTable> pDeps = null;
            // We have to copy the tables if we have debugging enabled
            if (trace.val) { // this.firstPartition == false) {
                pDeps = new ArrayList<VoltTable>();
                for (VoltTable vt : deps) {
                    ByteBuffer buffer = vt.getTableDataReference();
                    byte arr[] = new byte[vt.getUnderlyingBufferSize()];
                    buffer.get(arr, 0, arr.length);
                    pDeps.add(new VoltTable(ByteBuffer.wrap(arr), true));
                }
            } else {
                pDeps = deps;
            }
            inputs.put(input_dep_id, pDeps); 
            if (trace.val)
                LOG.trace(String.format("%s - Retrieved %d ATTACHED VoltTables for DependencyId #%d in %s",
                          ts, deps.size(), input_dep_id));
        }
    }
    
    /**
     * Set the given AbstractTransaction handle as the current distributed txn
     * that is running at this partition. Note that this will check to make sure
     * that no other txn is marked as the currentDtxn.
     * @param ts
     */
    private void setCurrentDtxn(AbstractTransaction ts) {
        // There can never be another current dtxn still unfinished at this partition!
        assert(this.currentBlockedTxns.isEmpty()) :
            String.format("Concurrent multi-partition transactions at partition %d: " +
                          "Orig[%s] <=> New[%s] / BlockedQueue:%d",
                          this.partitionId, this.currentDtxn, ts, this.currentBlockedTxns.size());
        assert(this.currentDtxn == null) :
            String.format("Concurrent multi-partition transactions at partition %d: " +
                          "Orig[%s] <=> New[%s] / BlockedQueue:%d",
                          this.partitionId, this.currentDtxn, ts, this.currentBlockedTxns.size());
        
        // Check whether we should check for speculative txns to execute whenever this
        // dtxn is idle at this partition
        this.currentDtxn = ts;
        if (hstore_conf.site.specexec_enable && ts.isSysProc() == false && this.specExecScheduler.isDisabled() == false) {
            this.specExecIgnoreCurrent = this.specExecChecker.shouldIgnoreTransaction(ts);
        } else {
            this.specExecIgnoreCurrent = true;
        }
        if (debug.val) {
            LOG.debug(String.format("Set %s as the current DTXN for partition %d [specExecIgnore=%s, previous=%s]",
                      ts, this.partitionId, this.specExecIgnoreCurrent, this.lastDtxnDebug));
            this.lastDtxnDebug = this.currentDtxn.toString();
        }
        if (hstore_conf.site.exec_profiling && ts.getBasePartition() != this.partitionId) {
            profiler.sp2_time.start();
        }
    }
    
    /**
     * Reset the current dtxn for this partition
     */
    private void resetCurrentDtxn() {
        assert(this.currentDtxn != null) :
            "Trying to reset the currentDtxn when it is already null";
        if (debug.val)
            LOG.debug(String.format("Resetting current DTXN for partition %d to null [previous=%s]",
                      this.partitionId, this.lastDtxnDebug));
        this.currentDtxn = null;
    }


    /**
     * Store a new prefetch result for a transaction
     * @param txnId
     * @param fragmentId
     * @param partitionId
     * @param params
     * @param result
     */
    public void addPrefetchResult(LocalTransaction ts,
                                  int stmtCounter,
                                  int fragmentId,
                                  int partitionId,
                                  int paramsHash,
                                  VoltTable result) {
        if (debug.val)
            LOG.debug(String.format("%s - Adding prefetch result for %s with %d rows from partition %d " +
                      "[stmtCounter=%d / paramsHash=%d]",
                      ts, CatalogUtil.getPlanFragment(catalogContext.catalog, fragmentId).fullName(),
                      result.getRowCount(), partitionId, stmtCounter, paramsHash));
        this.depTracker.addPrefetchResult(ts, stmtCounter, fragmentId, partitionId, paramsHash, result);
    }
    
    /**
     * Returns the directory where the EE should store the mmap'ed files
     * for this PartitionExecutor
     * @return
     */
    public static File getMMAPDir(PartitionExecutor executor) {
        HStoreConf hstore_conf = executor.getHStoreConf();
        Database catalog_db = CatalogUtil.getDatabase(executor.getPartition());


        // First make sure that our base directory exists
        String base_dir = FileUtil.realpath(hstore_conf.site.storage_mmap_dir +
                File.separatorChar +
                catalog_db.getProject());


        //synchronized (AntiCacheManager.class) {
        FileUtil.makeDirIfNotExists(base_dir);
        //} // SYNC


        // Then each partition will have a separate directory inside of the base one
        String partitionName = HStoreThreadManager.formatPartitionName(executor.getSiteId(),
                executor.getPartitionId());
        
        File dbDirPath = new File(base_dir + File.separatorChar + partitionName);
        if (hstore_conf.site.storage_mmap_reset) {
            LOG.warn(String.format("Deleting storage mmap directory '%s'", dbDirPath));
            FileUtil.deleteDirectory(dbDirPath);
        }
        FileUtil.makeDirIfNotExists(dbDirPath);


        return (dbDirPath);
    }
    
    /**
     * Returns the directory where the EE should store the ARIES log files
     * for this PartitionExecutor
     * @return
     */
    public static File getARIESDir(PartitionExecutor executor) {
        HStoreConf hstore_conf = executor.getHStoreConf();
        Database catalog_db = CatalogUtil.getDatabase(executor.getPartition());


        // First make sure that our base directory exists
        String base_dir = FileUtil.realpath(hstore_conf.site.aries_dir + File.separatorChar + catalog_db.getProject());


        synchronized (PartitionExecutor.class) {
            FileUtil.makeDirIfNotExists(base_dir);
        } // SYNC


        String partitionName = HStoreThreadManager.formatPartitionName(executor.getSiteId(), executor.getPartitionId());


        File dbDirPath = new File(base_dir + File.separatorChar + partitionName);


        if (hstore_conf.site.aries_reset) {
            LOG.warn(String.format("Deleting aries directory '%s'", dbDirPath));
            FileUtil.deleteDirectory(dbDirPath);
        }
        FileUtil.makeDirIfNotExists(dbDirPath);


        return (dbDirPath);
    }    
      


    /**
     * Returns the file where the EE should store the ARIES log for this
     * PartitionExecutor
     * 
     * @return
     */
    public static File getARIESFile(PartitionExecutor executor) {


        File dbDir = getARIESDir(executor);
        File logFile = new File(dbDir.getAbsolutePath() + File.separatorChar + executor.m_ariesDefaultLogFileName);


        return (logFile);
    }
    
    // ---------------------------------------------------------------
    // PartitionExecutor API
    // ---------------------------------------------------------------


    /**
     * Queue a new transaction initialization at this partition. This will cause the 
     * transaction to get added to this partition's lock queue. This PartitionExecutor does 
     * not have to be this txn's base partition/
     * @param ts
     */
    public void queueSetPartitionLock(AbstractTransaction ts) {
        assert(ts.isInitialized()) : "Unexpected uninitialized transaction: " + ts;
        SetDistributedTxnMessage work = ts.getSetDistributedTxnMessage();
        boolean success = this.work_queue.offer(work);
        assert(success) :
            String.format("Failed to queue %s at partition %d for %s",
                          work, this.partitionId, ts);
        if (debug.val)
            LOG.debug(String.format("%s - Added %s to front of partition %d " +
                      "work queue [size=%d]",
                      ts, work.getClass().getSimpleName(), this.partitionId,
                      this.work_queue.size()));
        if (hstore_conf.site.specexec_enable) this.specExecScheduler.interruptSearch(work);
    }
    
    /**
     * New work from the coordinator that this local site needs to execute (non-blocking)
     * This method will simply chuck the task into the work queue.
     * We should not be sent an InitiateTaskMessage here!
     * @param ts
     * @param task
     */
    public void queueWork(AbstractTransaction ts, WorkFragment fragment) {
        assert(ts.isInitialized()) : "Unexpected uninitialized transaction: " + ts;
        WorkFragmentMessage work = ts.getWorkFragmentMessage(fragment);
        boolean success = this.work_queue.offer(work); // , true);
        assert(success) :
            String.format("Failed to queue %s at partition %d for %s",
                          work, this.partitionId, ts);
        ts.markQueuedWork(this.partitionId);
        if (debug.val)
            LOG.debug(String.format("%s - Added %s to partition %d " +
                      "work queue [size=%d]",
                      ts, work.getClass().getSimpleName(), this.partitionId,
                      this.work_queue.size()));
        if (hstore_conf.site.specexec_enable) this.specExecScheduler.interruptSearch(work);
    }


    /**
     * Add a new work message to our utility queue 
     * @param work
     */
    public void queueUtilityWork(InternalMessage work) {
        this.work_queue.add(work);
        if (debug.val)
            LOG.warn(String.format("Added utility work %s to partition %d with size %d",
                      work.getClass().getSimpleName(), this.partitionId, this.work_queue.size()));
    }


    
    /**
     * Put the prepare request for the transaction into the queue
     * @param task
     * @param status The final status of the transaction
     */
    public void queuePrepare(AbstractTransaction ts, PartitionCountingCallback<? extends AbstractTransaction> callback) {
        assert(ts.isInitialized()) : "Uninitialized transaction: " + ts;
        assert(callback.isInitialized()) : "Uninitialized callback: " + ts;
        
        PrepareTxnMessage work = new PrepareTxnMessage(ts, callback);
        boolean success = this.work_queue.offer(work);
        assert(success) :
            String.format("Failed to queue %s at partition %d for %s",
                          work, this.partitionId, ts);
        if (debug.val)
            LOG.debug(String.format("%s - Added %s to partition %d " +
                      "work queue [size=%d]",
                      ts, work.getClass().getSimpleName(), this.partitionId,
                      this.work_queue.size()));
        // if (hstore_conf.site.specexec_enable) this.specExecScheduler.interruptSearch();
    }
    
    /**
     * Put the finish request for the transaction into the queue
     * @param task
     * @param status The final status of the transaction
     */
    public void queueFinish(AbstractTransaction ts, Status status) {
        assert(ts.isInitialized()) : "Unexpected uninitialized transaction: " + ts;
        FinishTxnMessage work = ts.getFinishTxnMessage(status);
        boolean success = this.work_queue.offer(work); // , true);
        assert(success) :
            String.format("Failed to queue %s at partition %d for %s",
                          work, this.partitionId, ts);
        if (debug.val)
            LOG.debug(String.format("%s - Added %s to partition %d " +
                      "work queue [size=%d]",
                      ts, work.getClass().getSimpleName(), this.partitionId,
                      this.work_queue.size()));
        // if (success) this.specExecScheduler.haltSearch();
    }


    /**
     * Queue a new transaction invocation request at this partition
     * @param ts
     * @param task
     * @param callback
     */
    public boolean queueStartTransaction(LocalTransaction ts) {
        assert(ts != null) : "Unexpected null transaction handle!";
        boolean singlePartitioned = ts.isPredictSinglePartition();
        boolean force = (singlePartitioned == false) || ts.isMapReduce() || ts.isSysProc();
        
        // UPDATED 2012-07-12
        // We used to have a bunch of checks to determine whether we needed
        // put the new request in the blocked queue or not. This required us to
        // acquire the exec_lock to do the check and then another lock to actually put 
        // the request into the work_queue. Now we'll just throw it right in
        // the queue (checking for throttling of course) and let the main
        // thread sort out the mess of whether the txn should get blocked or not
        if (this.currentExecMode == ExecutionMode.DISABLED_REJECT) {
            if (debug.val)
                LOG.warn(String.format("%s - Not queuing txn at partition %d because current mode is %s",
                         ts, this.partitionId, this.currentExecMode));
            return (false);
        }
        
        StartTxnMessage work = ts.getStartTxnMessage();
        if (debug.val)
            LOG.debug(String.format("Queuing %s for '%s' request on partition %d " +
                      "[currentDtxn=%s, queueSize=%d, mode=%s]",
                      work.getClass().getSimpleName(), ts.getProcedure().getName(), this.partitionId,
                      this.currentDtxn, this.work_queue.size(), this.currentExecMode));
        boolean success = this.work_queue.offer(work); // , force);
        if (debug.val && force && success == false) {
            String msg = String.format("Failed to add %s even though force flag was true!", ts);
            throw new ServerFaultException(msg, ts.getTransactionId());
        }
        if (success && hstore_conf.site.specexec_enable) this.specExecScheduler.interruptSearch(work);
        return (success);
    }


    // ---------------------------------------------------------------
    // WORK QUEUE PROCESSING METHODS
    // ---------------------------------------------------------------
    
    /**
     * Process a WorkResult and update the internal state the LocalTransaction accordingly
     * Note that this will always be invoked by a thread other than the main execution thread
     * for this PartitionExecutor. That means if something comes back that's bad, we need a way
     * to alert the other thread so that it can act on it. 
     * @param ts
     * @param result
     */
    private void processWorkResult(LocalTransaction ts, WorkResult result) {
        boolean needs_profiling = (hstore_conf.site.txn_profiling && ts.profiler != null);
        if (debug.val)
            LOG.debug(String.format("Processing WorkResult for %s on partition %d [srcPartition=%d, deps=%d]",
                      ts, this.partitionId, result.getPartitionId(), result.getDepDataCount()));
        
        // If the Fragment failed to execute, then we need to abort the Transaction
        // Note that we have to do this before we add the responses to the TransactionState so that
        // we can be sure that the VoltProcedure knows about the problem when it wakes the stored 
        // procedure back up
        if (result.getStatus() != Status.OK) {
            if (trace.val)
                LOG.trace(String.format("Received non-success response %s from partition %d for %s",
                          result.getStatus(), result.getPartitionId(), ts));


            SerializableException error = null;
            if (needs_profiling) ts.profiler.startDeserialization();
            try {
                ByteBuffer buffer = result.getError().asReadOnlyByteBuffer();
                error = SerializableException.deserializeFromBuffer(buffer);
            } catch (Exception ex) {
                String msg = String.format("Failed to deserialize SerializableException from partition %d " +
                                                   "for %s [bytes=%d]",
                                           result.getPartitionId(), ts, result.getError().size());
                throw new ServerFaultException(msg, ex);
            } finally {
                if (needs_profiling) ts.profiler.stopDeserialization();
            }
            // At this point there is no need to even deserialize the rest of the message because 
            // we know that we're going to have to abort the transaction
            if (error == null) {
                LOG.warn(ts + " - Unexpected null SerializableException\n" + result);
            } else {
                if (debug.val)
                    LOG.error(String.format("%s - Got error from partition %d in %s",
                              ts, result.getPartitionId(), result.getClass().getSimpleName()), error);
                if (error instanceof EvictedTupleAccessException){
                  EvictedTupleAccessException evta = (EvictedTupleAccessException) error;
                    LOG.error(String.format("Evicted tuple access exception error has partition id set as %d", evta.getPartitionId()));                  
                }
                ts.setPendingError(error, true);
            }
            return;
        }
        
        if (needs_profiling) ts.profiler.startDeserialization();
        for (int i = 0, cnt = result.getDepDataCount(); i < cnt; i++) {
            if (trace.val)
                LOG.trace(String.format("Storing intermediate results from partition %d for %s",
                          result.getPartitionId(), ts));
            int depId = result.getDepId(i);
            ByteString bs = result.getDepData(i);
            VoltTable vt = null;
            if (bs.isEmpty() == false) {
                FastDeserializer fd = new FastDeserializer(bs.asReadOnlyByteBuffer());
                try {
                    vt = fd.readObject(VoltTable.class);
                    if (trace.val)
                        LOG.trace(String.format("Displaying results from partition %d for %s :: \n %s",
                                  result.getPartitionId(), ts, vt.toString()));                    
                } catch (Exception ex) {
                    throw new ServerFaultException("Failed to deserialize VoltTable from partition " + result.getPartitionId() + " for " + ts, ex);
                }
            }
            this.depTracker.addResult(ts, result.getPartitionId(), depId, vt);
        } // FOR (dependencies)
        if (needs_profiling) ts.profiler.stopDeserialization();
    }


    /**
     * Execute a new transaction at this partition.
     * This will invoke the run() method define in the VoltProcedure for this txn and 
     * then process the ClientResponse. Only the PartitionExecutor itself should be calling
     * this directly, since it's the only thing that knows what's going on with the world...
     * @param ts
     */
    private void executeTransaction(LocalTransaction ts) {
        assert(ts.isInitialized()) :
            String.format("Trying to execute uninitialized transaction %s at partition %d",
                          ts, this.partitionId);
        assert(ts.isMarkedReleased(this.partitionId)) :
            String.format("Transaction %s was not marked released at partition %d before being executed",
                          ts, this.partitionId);
        
        if (trace.val)
            LOG.debug(String.format("%s - Attempting to start transaction on partition %d",
                      ts, this.partitionId));
        
        // If this is a MapReduceTransaction handle, we actually want to get the 
        // inner LocalTransaction handle for this partition. The MapReduceTransaction
        // is just a placeholder
        if (ts instanceof MapReduceTransaction) {
            MapReduceTransaction mr_ts = (MapReduceTransaction)ts; 
            ts = mr_ts.getLocalTransaction(this.partitionId);
            assert(ts != null) : 
                "Unexpected null LocalTransaction handle from " + mr_ts; 
        }
        
        ExecutionMode before_mode = this.currentExecMode;
        boolean predict_singlePartition = ts.isPredictSinglePartition();
            
        // -------------------------------
        // DISTRIBUTED TXN
        // -------------------------------
        if (predict_singlePartition == false) {
            // If there is already a dtxn running, then we need to throw this
            // mofo back into the blocked txn queue
            // TODO: If our dtxn is on the same site as us, then at this point we know that 
            //       it is done executing the control code and is sending around 2PC messages 
            //       to commit/abort. That means that we could assume that all of the other 
            //       remote partitions are going to agree on the same outcome and we can start 
            //       speculatively executing this dtxn. After all, if we're at this point in 
            //       the PartitionExecutor then we know that we got this partition's locks 
            //       from the TransactionQueueManager.
            if (this.currentDtxn != null && this.currentDtxn.equals(ts) == false) {
                assert(this.currentDtxn.equals(ts) == false) :
                    String.format("New DTXN %s != Current DTXN %s", ts, this.currentDtxn);
                
                // If this is a local txn, then we can finagle things a bit.
                if (this.currentDtxn.isExecLocal(this.partitionId)) {
                    // It would be safe for us to speculative execute this DTXN right here
                    // if the currentDtxn has aborted... but we can never be in this state.
                    assert(this.currentDtxn.isAborted() == false) : // Sanity Check
                        String.format("We want to execute %s on partition %d but aborted %s is still hanging around\n",
                                      ts, this.partitionId, this.currentDtxn, this.work_queue);
                    
                    // So that means we know that it committed, which doesn't necessarily mean
                    // that it will still commit, but we'll be able to abort, rollback, and requeue
                    // if that happens.
                    // TODO: Right now our current dtxn marker is a single value. We may want to 
                    //       switch it to a FIFO queue so that we can multiple guys hanging around.
                    //       For now we will just do the default thing and block this txn
                    this.blockTransaction(ts);
                    return;
                }
                // If it's not local, then we just have to block it right away
                else {
                    this.blockTransaction(ts);
                    return;
                }
            }
            // If there is no other DTXN right now, then we're it!
            else if (this.currentDtxn == null) { //  || this.currentDtxn.equals(ts) == false) {
                this.setCurrentDtxn(ts);
            
            }
            // 2011-11-14: We don't want to set the execution mode here, because we know that we
            //             can check whether we were read-only after the txn finishes
            this.setExecutionMode(this.currentDtxn, ExecutionMode.COMMIT_NONE);
            
            if (debug.val)
                LOG.debug(String.format("Marking %s as current DTXN on Partition %d [isLocal=%s, execMode=%s]",
                          ts, this.partitionId, true, this.currentExecMode));                    
        }
        // -------------------------------
        // SINGLE-PARTITION TXN
        // -------------------------------
        else {
            // If this is a single-partition transaction, then we need to check whether we are
            // being executed under speculative execution mode. We have to check this here 
            // because it may be the case that we queued a bunch of transactions when speculative 
            // execution was enabled, but now the transaction that was ahead of this one is finished,
            // so now we're just executing them regularly
            if (this.currentDtxn != null) {
                // HACK: If we are currently under DISABLED mode when we get this, then we just 
                // need to block the transaction and return back to the queue. This is easier than 
                // having to set all sorts of crazy locks
                if (this.currentExecMode == ExecutionMode.DISABLED || hstore_conf.site.specexec_enable == false) {
                    if (debug.val)
                        LOG.debug(String.format("%s - Blocking single-partition %s until dtxn finishes [mode=%s]",
                                  this.currentDtxn, ts, this.currentExecMode));
                    this.blockTransaction(ts);
                    return;
                }
                assert(ts.getSpeculationType() != null);
                if (debug.val)
                    LOG.debug(String.format("Speculatively executing %s while waiting for dtxn %s [%s]",
                              ts, this.currentDtxn, ts.getSpeculationType()));
                assert(ts.isSpeculative()) : ts + " was not marked as being speculative!";
            }
        }
        
        // If we reach this point, we know that we're about to execute our homeboy here...
        if (hstore_conf.site.txn_profiling && ts.profiler != null) {
            ts.profiler.startExec();
        }
        if (hstore_conf.site.exec_profiling) this.profiler.numTransactions++;
        
        // Make sure the dependency tracker knows about us
        if (ts.hasDependencyTracker()) this.depTracker.addTransaction(ts);
        
        // Grab a VoltProcedure handle for this txn
        // Two txns can't use the same VoltProcedure at the same time.
        VoltProcedure volt_proc = this.getVoltProcedure(ts.getProcedure().getId());
        assert(volt_proc != null) : "No VoltProcedure for " + ts;
        
        if (debug.val) {
            LOG.debug(String.format("%s - Starting execution of txn on partition %d " +
                      "[txnMode=%s, mode=%s]",
                      ts, this.partitionId, before_mode, this.currentExecMode));
            if (trace.val)
                LOG.trace(String.format("Current Transaction at partition #%d\n%s",
                          this.partitionId, ts.debug()));
        }
        
        if (hstore_conf.site.txn_counters) TransactionCounter.EXECUTED.inc(ts.getProcedure());
        ClientResponseImpl cresponse = null;
        VoltProcedure previous = this.currentVoltProc;
        try {
            this.currentVoltProc = volt_proc;
            ts.markControlCodeExecuted();
            cresponse = volt_proc.call(ts, ts.getProcedureParameters().toArray()); // Blocking...
        // VoltProcedure.call() should handle any exceptions thrown by the transaction
        // If we get anything out here then that's bad news
        } catch (Throwable ex) {
            if (this.isShuttingDown() == false) {
                SQLStmt last[] = volt_proc.voltLastQueriesExecuted();
                LOG.fatal("Unexpected error while executing " + ts, ex);
                if (last.length > 0) {
                    LOG.fatal(String.format("Last Queries Executed [%d]: %s",
                              last.length, Arrays.toString(last)));
                }
                LOG.fatal("LocalTransactionState Dump:\n" + ts.debug());
                this.crash(ex);
            }
        } finally {
            this.currentVoltProc = previous;
            this.finishVoltProcedure(volt_proc);
            if (hstore_conf.site.txn_profiling && ts.profiler != null) ts.profiler.startPost();
            
//            if (cresponse.getStatus() == Status.ABORT_UNEXPECTED) {
//                cresponse.getException().printStackTrace();
//            }
        }
        
        // If this is a MapReduce job, then we can just ignore the ClientResponse
        // and return immediately. The VoltMapReduceProcedure is responsible for storing
        // the result at the proper location.
        if (ts.isMapReduce()) {
            return;
        } else if (cresponse == null) {
            assert(this.isShuttingDown()) : String.format("No ClientResponse for %s???", ts);
            return;
        }
        
        // -------------------------------
        // PROCESS RESPONSE AND FIGURE OUT NEXT STEP
        // -------------------------------
        
        Status status = cresponse.getStatus();
        if (debug.val) {
            LOG.debug(String.format("%s - Finished execution of transaction control code " +
                                    "[status=%s, beforeMode=%s, currentMode=%s]",
                                    ts, status, before_mode, this.currentExecMode));
            if (ts.hasPendingError()) {
                LOG.debug(String.format("%s - Txn finished with pending error: %s",
                          ts, ts.getPendingErrorMessage()));
            }
        }


        // We assume that most transactions are not speculatively executed and are successful
        // Therefore we don't want to grab the exec_mode lock here.
        if (predict_singlePartition == false || this.canProcessClientResponseNow(ts, status, before_mode)) {
            this.processClientResponse(ts, cresponse);
        }
        // Otherwise always queue our response, since we know that whatever thread is out there
        // is waiting for us to finish before it drains the queued responses
        else {
            // If the transaction aborted, then we can't execute any transaction that touch the tables
            // that this guy touches. But since we can't just undo this transaction without undoing 
            // everything that came before it, we'll just disable executing all transactions until the 
            // current distributed transaction commits
            if (status != Status.OK && ts.isExecReadOnly(this.partitionId) == false) {
                this.setExecutionMode(ts, ExecutionMode.DISABLED);
                int blocked = this.work_queue.drainTo(this.currentBlockedTxns);
                if (debug.val) {
                    if (trace.val && blocked > 0)
                        LOG.trace(String.format("Blocking %d transactions at partition %d because ExecutionMode is now %s",
                                  blocked, this.partitionId, this.currentExecMode));
                    LOG.debug(String.format("Disabling execution on partition %d because speculative %s aborted",
                              this.partitionId, ts));
                }
            }
            if (trace.val)
                LOG.trace(String.format("%s - Queuing ClientResponse [status=%s, origMode=%s, newMode=%s, dtxn=%s]",
                          ts, cresponse.getStatus(), before_mode, this.currentExecMode, this.currentDtxn));
            this.blockClientResponse(ts, cresponse);
        }
    }
    
    /**
     * Determines whether a finished transaction that executed locally can have their ClientResponse processed immediately
     * or if it needs to wait for the response from the outstanding multi-partition transaction for this partition 
     * (1) This is the multi-partition transaction that everyone is waiting for
     * (2) The transaction was not executed under speculative execution mode 
     * (3) The transaction does not need to wait for the multi-partition transaction to finish first
     * @param ts
     * @param status
     * @param before_mode
     * @return
     */
    private boolean canProcessClientResponseNow(LocalTransaction ts, Status status, ExecutionMode before_mode) {
        if (debug.val) LOG.debug(String.format("%s - Checking whether to process %s response now at partition %d " +
                         "[singlePartition=%s, readOnly=%s, specExecModified=%s, before=%s, current=%s]",
                         ts, status, this.partitionId,
                         ts.isPredictSinglePartition(),
                         ts.isExecReadOnly(this.partitionId),
                         this.specExecModified,
                         before_mode, this.currentExecMode));
        // Commit All
        if (this.currentExecMode == ExecutionMode.COMMIT_ALL) {
            return (true);
        }
        // SPECIAL CASE
        // Any user-aborted, speculative single-partition transaction should be processed immediately.
        else if (status == Status.ABORT_USER && ts.isSpeculative()) {
            return (true);
        }
//        // SPECIAL CASE
//        // If this txn threw a user abort, and the current outstanding dtxn is read-only
//        // then it's safe for us to rollback
//        else if (status == Status.ABORT_USER &&
//                  this.currentDtxn != null &&
//                  this.currentDtxn.isExecReadOnly(this.partitionId)) {
//            return (true);
//        }
        // SPECIAL CASE
        // Anything mispredicted should be processed right away
        else if (status == Status.ABORT_MISPREDICT) {
            return (true);
        }    
        // Process successful txns based on the mode that it was executed under
        else if (status == Status.OK) {
            switch (before_mode) {
                case COMMIT_ALL:
                    return (true);
                case COMMIT_READONLY:
                    // Read-only speculative txns can be committed right now
                    // TODO: Right now we're going to use the specExecModified flag to disable
                    // sending out any results from spec execed txns that may have read from 
                    // a modified database. We should switch to a bitmap of table ids so that we
                    // have can be more selective.
                    // return (false);
                    return (this.specExecModified == false && ts.isExecReadOnly(this.partitionId));
                case COMMIT_NONE: {
                    // If this txn does not conflict with the current dtxn, then we should be able
                    // to let it commit but we can't because of the way our undo tokens work
                    return (false);
                }
                default:
                    throw new ServerFaultException("Unexpected execution mode: " + before_mode, ts.getTransactionId()); 
            } // SWITCH
        }
//        // If the transaction aborted and it was read-only thus far, then we want to process it immediately
//        else if (status != Status.OK && ts.isExecReadOnly(this.partitionId)) {
//            return (true);
//        }


        assert(this.currentExecMode != ExecutionMode.COMMIT_ALL) :
            String.format("Queuing ClientResponse for %s when in non-specutative mode [mode=%s, status=%s]",
                          ts, this.currentExecMode, status);
        return (false);
    }


    /**
     * Process a WorkFragment for a transaction and execute it in this partition's underlying EE. 
     * @param ts
     * @param fragment
     * @param allParameters The array of all the ParameterSets for the current SQLStmt batch.
     */
    private void processWorkFragment(AbstractTransaction ts, WorkFragment fragment, ParameterSet allParameters[]) {
        assert(this.partitionId == fragment.getPartitionId()) :
            String.format("Tried to execute WorkFragment %s for %s at partition %d but it was suppose " +
                          "to be executed on partition %d",
                          fragment.getFragmentIdList(), ts, this.partitionId, fragment.getPartitionId());
        assert(ts.isMarkedPrepared(this.partitionId) == false) :
            String.format("Tried to execute WorkFragment %s for %s at partition %d after it was marked 2PC:PREPARE",
                          fragment.getFragmentIdList(), ts, this.partitionId);
        
        // A txn is "local" if the Java is executing at the same partition as this one
        boolean is_basepartition = (ts.getBasePartition() == this.partitionId);
        boolean is_remote = (ts instanceof LocalTransaction == false);
        boolean is_prefetch = fragment.getPrefetch();
        boolean is_readonly = fragment.getReadOnly();
        if (debug.val)
            LOG.debug(String.format("%s - Executing %s [isBasePartition=%s, isRemote=%s, isPrefetch=%s, isReadOnly=%s, fragments=%s]",
                      ts, fragment.getClass().getSimpleName(),
                      is_basepartition, is_remote, is_prefetch, is_readonly,
                      fragment.getFragmentIdCount()));
        
        // If this WorkFragment isn't being executed at this txn's base partition, then
        // we need to start a new execution round
        if (is_basepartition == false) {
            long undoToken = this.calculateNextUndoToken(ts, is_readonly);
            ts.initRound(this.partitionId, undoToken);
            ts.startRound(this.partitionId);
        }
        
        DependencySet result = null;
        Status status = Status.OK;
        SerializableException error = null;
        
        // Check how many fragments are not marked as ignored
        // If the fragment is marked as ignore then it means that it was already
        // sent to this partition for prefetching. We need to make sure that we remove
        // it from the list of fragmentIds that we need to execute.
        int fragmentCount = fragment.getFragmentIdCount();
        for (int i = 0; i < fragmentCount; i++) {
            if (fragment.getStmtIgnore(i)) {
                fragmentCount--;
            }
        } // FOR
        final ParameterSet parameters[] = tmp_fragmentParams.getParameterSet(fragmentCount);
        assert(parameters.length == fragmentCount);
        
        // Construct data given to the EE to execute this work fragment
        this.tmp_EEdependencies.clear();
        long fragmentIds[] = tmp_fragmentIds.getArray(fragmentCount);
        int fragmentOffsets[] = tmp_fragmentOffsets.getArray(fragmentCount);
        int outputDepIds[] = tmp_outputDepIds.getArray(fragmentCount);
        int inputDepIds[] = tmp_inputDepIds.getArray(fragmentCount);
        int offset = 0;


        for (int i = 0, cnt = fragment.getFragmentIdCount(); i < cnt; i++) {
            if (fragment.getStmtIgnore(i) == false) {
                fragmentIds[offset] = fragment.getFragmentId(i);
                fragmentOffsets[offset] = i;
                outputDepIds[offset] = fragment.getOutputDepId(i);
                inputDepIds[offset] = fragment.getInputDepId(i);
                parameters[offset] = allParameters[fragment.getParamIndex(i)];
                this.getFragmentInputs(ts, inputDepIds[offset], this.tmp_EEdependencies);
                
                if (trace.val && ts.isSysProc() == false && is_basepartition == false)
                    LOG.trace(String.format("%s - Offset:%d FragmentId:%d OutputDep:%d/%d InputDep:%d/%d",
                              ts, offset, fragmentIds[offset],
                              outputDepIds[offset], fragment.getOutputDepId(i),
                              inputDepIds[offset], fragment.getInputDepId(i)));
                offset++;
            }
        } // FOR
        assert(offset == fragmentCount);
        
        try {
            result = this.executeFragmentIds(ts,
                                             ts.getLastUndoToken(this.partitionId),
                                             fragmentIds,
                                             parameters,
                                             outputDepIds,
                                             inputDepIds,
                                             this.tmp_EEdependencies);
        } catch (EvictedTupleAccessException ex) {
            // XXX: What do we do if this is not a single-partition txn?
            status = Status.ABORT_EVICTEDACCESS;
            error = ex;
        } catch (ConstraintFailureException ex) {
          LOG.info("Found the abort!!!"+ex);
            status = Status.ABORT_UNEXPECTED;
            error = ex;
        } catch (SQLException ex) {
          LOG.info("Found the abort!!!"+ex);
            status = Status.ABORT_UNEXPECTED;
            error = ex;
        } catch (EEException ex) {
            // this.crash(ex);
          LOG.info("Found the abort!!!"+ex);
            status = Status.ABORT_UNEXPECTED;
            error = ex;
        } catch (Throwable ex) {
          LOG.info("Found the abort!!!"+ex);
            status = Status.ABORT_UNEXPECTED;
            if (ex instanceof SerializableException) {
                error = (SerializableException)ex;
            } else {
                error = new SerializableException(ex);
            }
        } finally {
            if (error != null) {
                // error.printStackTrace();
//              if (error instanceof EvictedTupleAccessException){
//                EvictedTupleAccessException ex = (EvictedTupleAccessException) error;
//              }
              
                LOG.warn(String.format("%s - Unexpected %s on partition %d",
                         ts, error.getClass().getSimpleName(), this.partitionId),
                         error); // (debug.val ? error : null));
            }
            // Success, but without any results???
            if (result == null && status == Status.OK) {
                String msg = String.format("The WorkFragment %s executed successfully on Partition %d but " +
                                           "result is null for %s",
                                           fragment.getFragmentIdList(), this.partitionId, ts);
                Exception ex = new Exception(msg);
                if (debug.val) LOG.warn(ex);
                LOG.info("Found the abort!!!"+ex);
                status = Status.ABORT_UNEXPECTED;
                error = new SerializableException(ex);
            }
        }
        
        // For single-partition INSERT/UPDATE/DELETE queries, we don't directly
        // execute the SendPlanNode in order to get back the number of tuples that
        // were modified. So we have to rely on the output dependency ids set in the task
        assert(status != Status.OK ||
              (status == Status.OK && result.size() == fragmentIds.length)) :
           "Got back " + result.size() + " results but was expecting " + fragmentIds.length;
        
        // Make sure that we mark the round as finished before we start sending results
        if (is_basepartition == false) {
            ts.finishRound(this.partitionId);
        }
        
        // -------------------------------
        // PREFETCH QUERIES
        // -------------------------------
        if (is_prefetch) {
            // Regardless of whether this txn is running at the same HStoreSite as this PartitionExecutor,
            // we always need to put the result inside of the local query cache
            // This is so that we can identify if we get request for a query that we have already executed
            // We'll only do this if it succeeded. If it failed, then we won't do anything and will
            // just wait until they come back to execute the query again before 
            // we tell them that something went wrong. It's ghetto, but it's just easier this way...
            if (status == Status.OK) {
                // We're going to store the result in the base partition cache if they're 
                // on the same HStoreSite as us
                if (is_remote == false) {
                    PartitionExecutor other = this.hstore_site.getPartitionExecutor(ts.getBasePartition());
                    for (int i = 0, cnt = result.size(); i < cnt; i++) {
                        if (trace.val)
                            LOG.trace(String.format("%s - Storing %s prefetch result [params=%s]",
                                      ts, CatalogUtil.getPlanFragment(catalogContext.catalog, fragment.getFragmentId(fragmentOffsets[i])).fullName(),
                                      parameters[i]));
                        other.addPrefetchResult((LocalTransaction)ts,
                                                fragment.getStmtCounter(fragmentOffsets[i]),
                                                fragment.getFragmentId(fragmentOffsets[i]),
                                                this.partitionId,
                                                parameters[i].hashCode(),
                                                result.dependencies[i]);
                    } // FOR
                }
            }
            
            // Now if it's a remote transaction, we need to use the coordinator to send
            // them our result. Note that we want to send a single message per partition. Unlike
            // with the TransactionWorkRequests, we don't need to wait until all of the partitions
            // that are prefetching for this txn at our local HStoreSite to finish.
            if (is_remote) {
                WorkResult wr = this.buildWorkResult(ts, result, status, error);
                TransactionPrefetchResult.Builder builder = TransactionPrefetchResult.newBuilder()
                                                                .setTransactionId(ts.getTransactionId().longValue())
                                                                .setSourcePartition(this.partitionId)
                                                                .setResult(wr)
                                                                .setStatus(status)
                                                                .addAllFragmentId(fragment.getFragmentIdList())
                                                                .addAllStmtCounter(fragment.getStmtCounterList());
                for (int i = 0, cnt = fragment.getFragmentIdCount(); i < cnt; i++) {
                    builder.addParamHash(parameters[i].hashCode());
                }
                if (debug.val)
                    LOG.debug(String.format("%s - Sending back %s to partition %d [numResults=%s, status=%s]",
                              ts, wr.getClass().getSimpleName(), ts.getBasePartition(),
                              result.size(), status));
                hstore_coordinator.transactionPrefetchResult((RemoteTransaction)ts, builder.build());
            }
        }
        // -------------------------------
        // LOCAL TRANSACTION
        // -------------------------------
        else if (is_remote == false) {
            LocalTransaction local_ts = (LocalTransaction)ts;
            
            // If the transaction is local, store the result directly in the local TransactionState
            if (status == Status.OK) {
                if (trace.val)
                    LOG.trace(String.format("%s - Storing %d dependency results locally for successful work fragment",
                              ts, result.size()));
                assert(result.size() == outputDepIds.length);
                DependencyTracker otherTracker = this.hstore_site.getDependencyTracker(ts.getBasePartition());
                for (int i = 0; i < outputDepIds.length; i++) {
                    if (trace.val)
                        LOG.trace(String.format("%s - Storing DependencyId #%d [numRows=%d]\n%s",
                                  ts, outputDepIds[i], result.dependencies[i].getRowCount(),
                                  result.dependencies[i]));
                    try {
                        otherTracker.addResult(local_ts, this.partitionId, outputDepIds[i], result.dependencies[i]);
                    } catch (Throwable ex) {
//                        ex.printStackTrace();
                        String msg = String.format("Failed to stored Dependency #%d for %s [idx=%d, fragmentId=%d]",
                                                   outputDepIds[i], ts, i, fragmentIds[i]);
                        LOG.error(String.format("%s - WorkFragment:%d\nExpectedIds:%s\nOutputDepIds: %s\nResultDepIds: %s\n%s",
                                  msg, fragment.hashCode(),
                                  fragment.getOutputDepIdList(), Arrays.toString(outputDepIds),
                                  Arrays.toString(result.depIds), fragment));
                        throw new ServerFaultException(msg, ex);
                    }
                } // FOR
            } else {
                local_ts.setPendingError(error, true);
            }
        }
        // -------------------------------
        // REMOTE TRANSACTION
        // -------------------------------
        else {
            if (trace.val)
                LOG.trace(String.format("%s - Constructing WorkResult with %d bytes from partition %d to send " +
                          "back to initial partition %d [status=%s]",
                          ts, (result != null ? result.size() : null),
                          this.partitionId, ts.getBasePartition(), status));
            
            RpcCallback<WorkResult> callback = ((RemoteTransaction)ts).getWorkCallback();
            if (callback == null) {
                LOG.fatal("Unable to send FragmentResponseMessage for " + ts);
                LOG.fatal("Orignal WorkFragment:\n" + fragment);
                LOG.fatal(ts.toString());
                throw new ServerFaultException("No RPC callback to HStoreSite for " + ts, ts.getTransactionId());
            }
            WorkResult response = this.buildWorkResult((RemoteTransaction)ts, result, status, error);
            assert(response != null);
            callback.run(response);
        }
        
        // Check whether this is the last query that we're going to get
        // from this transaction. If it is, then we can go ahead and prepare the txn
        if (is_basepartition == false && fragment.getLastFragment()) {
            if (debug.val)
                LOG.debug(String.format("%s - Invoking early 2PC:PREPARE at partition %d",
                          ts, this.partitionId));
            PartitionCountingCallback<? extends AbstractTransaction> callback = ts.getPrepareCallback();
            
            // If we are at a remote site, then we have to be careful here.
            // We don't actually have the real callback that the RemotePrepareCallback needs.
            // So that we have to use a null callback that doesn't actually do anything. The 
            // RemotePrepareCallback will make sure that we mark the partition as prepared.
            if (ts instanceof RemoteTransaction) {
                PartitionSet partitions = catalogContext.getPartitionSetSingleton(this.partitionId);
                RpcCallback<TransactionPrepareResponse> origCallback = NullCallback.getInstance(); 
                ((RemotePrepareCallback)callback).init((RemoteTransaction)ts, partitions, origCallback);
            }
            this.queuePrepare(ts, callback);
        }
    }
    
    /**
     * Executes a WorkFragment on behalf of some remote site and returns the
     * resulting DependencySet
     * @param fragment
     * @return
     * @throws Exception
     */
    private DependencySet executeFragmentIds(AbstractTransaction ts,
                                             long undoToken,
                                             long fragmentIds[],
                                             ParameterSet parameters[],
                                             int output_depIds[],
                                             int input_depIds[],
                                             Map<Integer, List<VoltTable>> input_deps) throws Exception {
        
        if (fragmentIds.length == 0) {
            LOG.warn(String.format("Got a fragment batch for %s that does not have any fragments?", ts));
            return (null);
        }
        
        // *********************************** DEBUG ***********************************
        if (trace.val) {
            LOG.trace(String.format("%s - Getting ready to kick %d fragments to partition %d EE [undoToken=%d]",
                      ts, fragmentIds.length, this.partitionId,
                      (undoToken != HStoreConstants.NULL_UNDO_LOGGING_TOKEN ? undoToken : "null")));
//            if (trace.val) {
//                LOG.trace("WorkFragmentIds: " + Arrays.toString(fragmentIds));
//                Map<String, Object> m = new LinkedHashMap<String, Object>();
//                for (int i = 0; i < parameters.length; i++) {
//                    m.put("Parameter[" + i + "]", parameters[i]);
//                } // FOR
//                LOG.trace("Parameters:\n" + StringUtil.formatMaps(m));
//            }
        }
        // *********************************** DEBUG ***********************************
        
        DependencySet result = null;
        
        // -------------------------------
        // SYSPROC FRAGMENTS
        // -------------------------------
        if (ts.isSysProc()) {
            result = this.executeSysProcFragments(ts,
                                                  undoToken,
                                                  fragmentIds.length,
                                                  fragmentIds,
                                                  parameters,
                                                  output_depIds,
                                                  input_depIds,
                                                  input_deps);
        // -------------------------------
        // REGULAR FRAGMENTS
        // -------------------------------
        } else {
            result = this.executePlanFragments(ts,
                                               undoToken,
                                               fragmentIds.length,
                                               fragmentIds,
                                               parameters,
                                               output_depIds,
                                               input_depIds,
                                               input_deps);
            if (result == null) {
                LOG.warn(String.format("Output DependencySet for %s in %s is null?",
                         Arrays.toString(fragmentIds), ts));
            }
        }
        return (result);
    }
    
    /**
     * Execute a BatchPlan directly on this PartitionExecutor without having to covert it
     * to WorkFragments first. This is big speed improvement over having to queue things up
     * @param ts
     * @param plan
     * @return
     */
    private VoltTable[] executeLocalPlan(LocalTransaction ts,
                                         BatchPlanner.BatchPlan plan,
                                         ParameterSet parameterSets[]) {


        // Start the new execution round
        long undoToken = this.calculateNextUndoToken(ts, plan.isReadOnly());
        ts.initFirstRound(undoToken, plan.getBatchSize());
      
        int fragmentCount = plan.getFragmentCount();
        long fragmentIds[] = plan.getFragmentIds();
        int output_depIds[] = plan.getOutputDependencyIds();
        int input_depIds[] = plan.getInputDependencyIds();
        
        // Mark that we touched the local partition once for each query in the batch
        // ts.getTouchedPartitions().put(this.partitionId, plan.getBatchSize());
        
        // Only notify other partitions that we're done with them if we're not
        // a single-partition transaction
        if (hstore_conf.site.specexec_enable && ts.isPredictSinglePartition() == false) {
            //FIXME
            //PartitionSet new_done = ts.calculateDonePartitions(this.thresholds);
            //if (new_done != null && new_done.isEmpty() == false) {
            //   LocalPrepareCallback callback = ts.getPrepareCallback();
            //   assert(callback.isInitialized());
            //   this.hstore_coordinator.transactionPrepare(ts, callback, new_done);
            //}
        }


        if (trace.val)
            LOG.trace(String.format("Txn #%d - BATCHPLAN:\n" +
                                    "  fragmentIds:   %s\n" + 
                                    "  fragmentCount: %s\n" +
                                    "  output_depIds: %s\n" +
                                    "  input_depIds:  %s",
                                    ts.getTransactionId(),
                                    Arrays.toString(plan.getFragmentIds()),
                                    plan.getFragmentCount(),
                                    Arrays.toString(plan.getOutputDependencyIds()),
                                    Arrays.toString(plan.getInputDependencyIds())));
        
        // NOTE: There are no dependencies that we need to pass in because the entire
        // batch is local to this partition.
        DependencySet result = null;
        try {
            result = this.executePlanFragments(ts,
                                               undoToken,
                                               fragmentCount,
                                               fragmentIds,
                                               parameterSets,
                                               output_depIds,
                                               input_depIds,
                                               null);
        
        } finally {
            ts.fastFinishRound(this.partitionId);    
        }
        // assert(result != null) : "Unexpected null DependencySet result for " + ts; 
        if (trace.val)
            LOG.trace("Output:\n" + result);
        return (result != null ? result.dependencies : null);
    }
    
    /**
     * Execute the given fragment tasks on this site's underlying EE
     * @param ts
     * @param undoToken
     * @param batchSize
     * @param fragmentIds
     * @param parameterSets
     * @param output_depIds
     * @param input_depIds
     * @return
     */
    private DependencySet executeSysProcFragments(AbstractTransaction ts,
                                                  long undoToken,
                                                  int batchSize, 
                                                  long fragmentIds[],
                                                  ParameterSet parameters[],
                                                  int output_depIds[],
                                                  int input_depIds[],
                                                  Map<Integer, List<VoltTable>> input_deps) {
        assert(fragmentIds.length == 1);
        assert(fragmentIds.length == parameters.length) :
            String.format("%s - Fragments:%d / Parameters:%d",
                          ts, fragmentIds.length, parameters.length);


        VoltSystemProcedure volt_proc = this.m_registeredSysProcPlanFragments.get(fragmentIds[0]);
        if (volt_proc == null) {
            String msg = "No sysproc handle exists for FragmentID #" + fragmentIds[0] + " :: " + this.m_registeredSysProcPlanFragments;
            throw new ServerFaultException(msg, ts.getTransactionId());
        }
        
        ts.markExecNotReadOnly(this.partitionId);
        DependencySet result = null;
        try {
            result = volt_proc.executePlanFragment(ts.getTransactionId(),
                                                   this.tmp_EEdependencies,
                                                   (int)fragmentIds[0],
                                                   parameters[0],
                                                   this.m_systemProcedureContext);
        } catch (Throwable ex) {
            String msg = "Unexpected error when executing system procedure";
            throw new ServerFaultException(msg, ex, ts.getTransactionId());
        }
        if (debug.val)
            LOG.debug(String.format("%s - Finished executing sysproc fragment for %s (#%d)%s",
                      ts, m_registeredSysProcPlanFragments.get(fragmentIds[0]).getClass().getSimpleName(),
                      fragmentIds[0], (trace.val ? "\n" + result : "")));
        
        return (result);
    }
    
    /**
     * Execute the given fragment tasks on this site's underlying EE
     * @param ts
     * @param undoToken
     * @param batchSize
     * @param fragmentIds
     * @param parameterSets
     * @param output_depIds
     * @param input_depIds
     * @return
     */
    private DependencySet executePlanFragments(AbstractTransaction ts,
                                               long undoToken,
                                               int batchSize, 
                                               long fragmentIds[],
                                               ParameterSet parameterSets[],
                                               int output_depIds[],
                                               int input_depIds[],
                                               Map<Integer, List<VoltTable>> input_deps) {
        assert(this.ee != null) : "The EE object is null. This is bad!";
        Long txn_id = ts.getTransactionId();


        // *********************************** DEBUG ***********************************
        if (debug.val) {
            StringBuilder sb = new StringBuilder();
            sb.append(String.format("%s - Executing %d fragments [lastTxnId=%d, undoToken=%d]",
                      ts, batchSize, this.lastCommittedTxnId, undoToken));
            // if (trace.val) {
                Map<String, Object> m = new LinkedHashMap<String, Object>();
                m.put("Fragments", Arrays.toString(fragmentIds));
                
                Map<Integer, Object> inner = new LinkedHashMap<Integer, Object>();
                for (int i = 0; i < batchSize; i++)
                    inner.put(i, parameterSets[i].toString());
                m.put("Parameters", inner);
                
                if (batchSize > 0 && input_depIds[0] != HStoreConstants.NULL_DEPENDENCY_ID) {
                    inner = new LinkedHashMap<Integer, Object>();
                    for (int i = 0; i < batchSize; i++) {
                        List<VoltTable> deps = input_deps.get(input_depIds[i]);
                        inner.put(input_depIds[i], (deps != null ? StringUtil.join("\n", deps) : "???"));
                    } // FOR
                    m.put("Input Dependencies", inner);
                }
                m.put("Output Dependencies", Arrays.toString(output_depIds));
                sb.append("\n" + StringUtil.formatMaps(m)); 
            // }
            LOG.debug(sb.toString().trim());
        }
        // *********************************** DEBUG ***********************************


        // pass attached dependencies to the EE (for non-sysproc work).
        if (input_deps != null && input_deps.isEmpty() == false) {
            if (debug.val)
                LOG.debug(String.format("%s - Stashing %d InputDependencies at partition %d",
                          ts, input_deps.size(), this.partitionId));
            this.ee.stashWorkUnitDependencies(input_deps);
        }
        
        // Java-based Table Read-Write Sets
        boolean readonly = true;
        boolean speculative = ts.isSpeculative();
        boolean singlePartition = ts.isPredictSinglePartition();
        int tableIds[] = null;
        for (int i = 0; i < batchSize; i++) {
            boolean fragReadOnly = PlanFragmentIdGenerator.isPlanFragmentReadOnly(fragmentIds[i]);
            // We don't need to maintain read/write sets for non-speculative txns
            if (speculative || singlePartition == false) {
                if (fragReadOnly) {
                    tableIds = catalogContext.getReadTableIds(Long.valueOf(fragmentIds[i]));
                    if (tableIds != null) ts.markTableIdsRead(this.partitionId, tableIds);
                } else {
                    tableIds = catalogContext.getWriteTableIds(Long.valueOf(fragmentIds[i]));
                    if (tableIds != null) ts.markTableIdsWritten(this.partitionId, tableIds);
                }
            }
            readonly = readonly && fragReadOnly;
        }
        
        // Enable read/write set tracking
        if (hstore_conf.site.exec_readwrite_tracking && ts.hasExecutedWork(this.partitionId) == false) {
            if (trace.val)
                LOG.trace(String.format("%s - Enabling read/write set tracking in EE at partition %d",
                          ts, this.partitionId));
            this.ee.trackingEnable(txn_id);
        }
        
        // Check whether the txn has only exeuted read-only queries up to this point
        if (ts.isExecReadOnly(this.partitionId)) {
            if (readonly == false) {
                if (trace.val)
                    LOG.trace(String.format("%s - Marking txn as not read-only %s",
                              ts, Arrays.toString(fragmentIds))); 
                ts.markExecNotReadOnly(this.partitionId);
            }
            
            // We can do this here because the only way that we're not read-only is if
            // we actually modify data at this partition
            ts.markExecutedWork(this.partitionId);
        }
        
        DependencySet result = null;
        boolean needs_profiling = false;
        if (ts.isExecLocal(this.partitionId)) {
            if (hstore_conf.site.txn_profiling && ((LocalTransaction)ts).profiler != null) {
                needs_profiling = true;
                ((LocalTransaction)ts).profiler.startExecEE();
            }
        }
        
        Throwable error = null;
        try {
            assert(this.lastCommittedUndoToken < undoToken) :
                String.format("Trying to execute work using undoToken %d for %s but " +
                              "it is less than the last committed undoToken %d at partition %d",
                              undoToken, ts, this.lastCommittedUndoToken, this.partitionId);
            if (trace.val)
                LOG.trace(String.format("%s - Executing fragments %s at partition %d [undoToken=%d]",
                          ts, Arrays.toString(fragmentIds), this.partitionId, undoToken));
            result = this.ee.executeQueryPlanFragmentsAndGetDependencySet(
                            fragmentIds,
                            batchSize,
                            input_depIds,
                            output_depIds,
                            parameterSets,
                            batchSize,
                            txn_id.longValue(),
                            this.lastCommittedTxnId.longValue(),
                            undoToken);
        } catch (AssertionError ex) {
            LOG.error("Fatal error when processing " + ts + "\n" + ts.debug());
            error = ex;
            throw ex;
        } catch (EvictedTupleAccessException ex) {
            if (debug.val) LOG.warn("Caught EvictedTupleAccessException.");
            ((EvictedTupleAccessException)ex).setPartitionId(this.partitionId);
            error = ex;
            throw ex;
        } catch (SerializableException ex) {
            if (debug.val)
                LOG.error(String.format("%s - Unexpected error in the ExecutionEngine on partition %d",
                          ts, this.partitionId), ex);
            error = ex;
            throw ex;
        } catch (Throwable ex) {
            error = ex;
            String msg = String.format("%s - Failed to execute PlanFragments: %s", ts, Arrays.toString(fragmentIds));
            throw new ServerFaultException(msg, ex);
        } finally {
            if (needs_profiling) ((LocalTransaction)ts).profiler.stopExecEE();
            if (error == null && result == null) {
                LOG.warn(String.format("%s - Finished executing fragments but got back null results [fragmentIds=%s]",
                         ts, Arrays.toString(fragmentIds)));
            }
        }
        
        // *********************************** DEBUG ***********************************
        if (debug.val) {
            if (result != null) {
                LOG.debug(String.format("%s - Finished executing fragments and got back %d results",
                                        ts, result.depIds.length));
            } else {
                LOG.warn(String.format("%s - Finished executing fragments but got back null results? That seems bad...", ts));
            }
        }
        // *********************************** DEBUG ***********************************
        return (result);
    }
    
    /**
     * Load a VoltTable directly into the EE at this partition.
     * <B>NOTE:</B> This should only be invoked by a system stored procedure.
     * @param txn_id
     * @param clusterName
     * @param databaseName
     * @param tableName
     * @param data
     * @param allowELT
     * @throws VoltAbortException
     */
    public void loadTable(AbstractTransaction ts, String clusterName, String databaseName, String tableName, VoltTable data, int allowELT) throws VoltAbortException {
        Table table = this.catalogContext.database.getTables().getIgnoreCase(tableName);
        if (table == null) {
            throw new VoltAbortException("Table '" + tableName + "' does not exist in database " + clusterName + "." + databaseName);
        }
        if (data == null || data.getRowCount() == 0) {
            return;
        }
        
        if (debug.val)
            LOG.debug(String.format("Loading %d row(s) into %s [txnId=%d]",
                      data.getRowCount(), table.getName(), ts.getTransactionId()));
        ts.markExecutedWork(this.partitionId);
        this.ee.loadTable(table.getRelativeIndex(), data,
                          ts.getTransactionId(),
                          this.lastCommittedTxnId.longValue(),
                          ts.getLastUndoToken(this.partitionId),
                          allowELT != 0);
    }


    /**
     * Load a VoltTable directly into the EE at this partition.
     * <B>NOTE:</B> This should only be used for testing
     * @param txnId
     * @param table
     * @param data
     * @param allowELT
     * @throws VoltAbortException
     */
    protected void loadTable(Long txnId, Table table, VoltTable data, boolean allowELT) throws VoltAbortException {
        if (debug.val)
            LOG.debug(String.format("Loading %d row(s) into %s [txnId=%d]",
                      data.getRowCount(), table.getName(), txnId));
        this.ee.loadTable(table.getRelativeIndex(),
                          data,
                          txnId.longValue(),
                          this.lastCommittedTxnId.longValue(),
                          HStoreConstants.NULL_UNDO_LOGGING_TOKEN,
                          allowELT);
    }


    /**
     * Execute a SQLStmt batch at this partition. This is the main entry point from 
     * VoltProcedure for where we will execute a SQLStmt batch from a txn.
     * @param ts The txn handle that is executing this query batch
     * @param batchSize The number of SQLStmts that the txn queued up using voltQueueSQL()
     * @param batchStmts The SQLStmts that the txn is trying to execute
     * @param batchParams The input parameters for the SQLStmts
     * @param finalTask Whether the txn has marked this as the last batch that they will ever execute
     * @param forceSinglePartition Whether to force the BatchPlanner to only generate a single-partition plan  
     * @return
     */
    public VoltTable[] executeSQLStmtBatch(LocalTransaction ts,
                                           int batchSize,
                                           SQLStmt batchStmts[],
                                           ParameterSet batchParams[],
                                           boolean finalTask,
                                           boolean forceSinglePartition) {
        
        boolean needs_profiling = (hstore_conf.site.txn_profiling && ts.profiler != null);
        if (needs_profiling) {
            ts.profiler.addBatch(batchSize);
            ts.profiler.stopExecJava();
            ts.profiler.startExecPlanning();
        }


        // HACK: This is needed to handle updates on replicated tables properly
        // when there is only one partition in the cluster.
        if (catalogContext.numberOfPartitions == 1) {
            this.depTracker.addTransaction(ts);
        }
        
        if (hstore_conf.site.exec_deferrable_queries) {
            // TODO: Loop through batchStmts and check whether their corresponding Statement
            // is marked as deferrable. If so, then remove them from batchStmts and batchParams
            // (sliding everyone over by one in the arrays). Queue up the deferred query.
            // Be sure decrement batchSize after you finished processing this.
            // EXAMPLE: batchStmts[0].getStatement().getDeferrable()    
        }
        
        // Calculate the hash code for this batch to see whether we already have a planner
        final Integer batchHashCode = VoltProcedure.getBatchHashCode(batchStmts, batchSize);
        BatchPlanner planner = this.batchPlanners.get(batchHashCode);
        if (planner == null) { // Assume fast case
            planner = new BatchPlanner(batchStmts,
                                       batchSize,
                                       ts.getProcedure(),
                                       this.p_estimator,
                                       forceSinglePartition);
            this.batchPlanners.put(batchHashCode, planner);
        }
        assert(planner != null);
        
        // At this point we have to calculate exactly what we need to do on each partition
        // for this batch. So somehow right now we need to fire this off to either our
        // local executor or to Evan's magical distributed transaction manager
        BatchPlanner.BatchPlan plan = planner.plan(ts.getTransactionId(),
                                                   this.partitionId,
                                                   ts.getPredictTouchedPartitions(), 
                                                   ts.getTouchedPartitions(),
                                                   batchParams);
        assert(plan != null);
        if (trace.val) {
            LOG.trace(ts + " - Touched Partitions: " + ts.getTouchedPartitions().values());
            LOG.trace(ts + " - Next BatchPlan:\n" + plan.toString());
        }
        if (needs_profiling) ts.profiler.stopExecPlanning();
        
        // Tell the TransactionEstimator that we're about to execute these mofos
        EstimatorState t_state = ts.getEstimatorState();
        if (this.localTxnEstimator != null && t_state != null && t_state.isUpdatesEnabled()) {
            if (needs_profiling) ts.profiler.startExecEstimation();
            try {
                this.localTxnEstimator.executeQueries(t_state,
                                                      planner.getStatements(),
                                                      plan.getStatementPartitions());
            } finally {
                if (needs_profiling) ts.profiler.stopExecEstimation();
            }
        } else if (t_state != null && t_state.shouldAllowUpdates()) {
            LOG.warn("Skipping estimator updates for " + ts);
        }


        // Check whether our plan was caused a mispredict
        // Doing it this way allows us to update the TransactionEstimator before we abort the txn
        if (plan.getMisprediction() != null) {
            MispredictionException ex = plan.getMisprediction(); 
            ts.setPendingError(ex, false);
            assert(ex.getPartitions().isEmpty() == false) :
                "Unexpected empty PartitionSet for mispredicated txn " + ts;


            // Print Misprediction Debug
            if (hstore_conf.site.exec_mispredict_crash) {
                // Use a lock so that only dump out the first txn that fails
                synchronized (PartitionExecutor.class) {
                    LOG.warn("\n" + EstimatorUtil.mispredictDebug(ts, planner, batchStmts, batchParams));
                    LOG.fatal(String.format("Crashing because site.exec_mispredict_crash is true [txn=%s]", ts));
                    this.crash(ex);
                } // SYNCH
            }
            else if (debug.val) {
                if (trace.val)
                    LOG.warn("\n" + EstimatorUtil.mispredictDebug(ts, planner, batchStmts, batchParams));
                LOG.debug(ts + " - Aborting and restarting mispredicted txn.");
            }
            throw ex;
        }
        
        // Keep track of the number of times that we've executed each query for this transaction
        int stmtCounters[] = this.tmp_stmtCounters.getArray(batchSize);
        for (int i = 0; i < batchSize; i++) {
            stmtCounters[i] = ts.updateStatementCounter(batchStmts[i].getStatement());
        } // FOR
        
        if (ts.hasPrefetchQueries()) {
            PartitionSet stmtPartitions[] = plan.getStatementPartitions();
            PrefetchState prefetchState = ts.getPrefetchState();
            QueryTracker queryTracker = prefetchState.getExecQueryTracker();
            assert(prefetchState != null);
            for (int i = 0; i < batchSize; i++) {
                // We always have to update the query tracker regardless of whether
                // the query was prefetched or not. This is so that we can ensure
                // that we execute the queries in the right order.
                Statement stmt = batchStmts[i].getStatement();
                stmtCounters[i] = queryTracker.addQuery(stmt, stmtPartitions[i], batchParams[i]);
            } // FOR
            // FIXME PrefetchQueryUtil.checkSQLStmtBatch(this, ts, plan, batchSize, batchStmts, batchParams);
        } // PREFETCH
        
        VoltTable results[] = null;
        
        // FAST-PATH: Single-partition + Local
        // If the BatchPlan only has WorkFragments that are for this partition, then
        // we can use the fast-path executeLocalPlan() method
        if (plan.isSingledPartitionedAndLocal()) {
            if (trace.val)
                LOG.trace(String.format("%s - Sending %s directly to the ExecutionEngine at partition %d",
                          ts, plan.getClass().getSimpleName(), this.partitionId));
            
            // If this the finalTask flag is set to true, and we're only executing queries at this
            // partition, then we need to notify the other partitions that we're done with them.
            if (hstore_conf.site.exec_early_prepare &&
                    finalTask == true &&
                    ts.isPredictSinglePartition() == false &&
                    ts.isSysProc() == false &&
                    ts.allowEarlyPrepare() == true) {
                tmp_fragmentsPerPartition.clearValues();
                tmp_fragmentsPerPartition.put(this.partitionId, batchSize);
                DonePartitionsNotification notify = this.computeDonePartitions(ts, null, tmp_fragmentsPerPartition, finalTask);
                if (notify != null && notify.hasSitesToNotify()) {
                    this.notifyDonePartitions(ts, notify);
                }
            }


            // Execute the queries right away.
            results = this.executeLocalPlan(ts, plan, batchParams);
        }
        // DISTRIBUTED EXECUTION
        // Otherwise, we need to generate WorkFragments and then send the messages out 
        // to our remote partitions using the HStoreCoordinator
        else {
            List<WorkFragment.Builder> partitionFragments = new ArrayList<WorkFragment.Builder>();
            plan.getWorkFragmentsBuilders(ts.getTransactionId(), stmtCounters, partitionFragments);
            if (debug.val)
                LOG.debug(String.format("%s - Using dispatchWorkFragments to execute %d %ss",
                          ts, partitionFragments.size(), WorkFragment.class.getSimpleName()));
            
            if (needs_profiling) {
                int remote_cnt = 0;
                PartitionSet stmtPartitions[] = plan.getStatementPartitions();
                for (int i = 0; i < batchSize; i++) {
                    if (stmtPartitions[i].get() != ts.getBasePartition()) remote_cnt++;
                    if (trace.val)
                        LOG.trace(String.format("%s - [%02d] stmt:%s / partitions:%s",
                                 ts, i, batchStmts[i].getStatement().getName(), stmtPartitions[i]));
                } // FOR
                if (trace.val) LOG.trace(String.format("%s - Remote Queries Count = %d", ts, remote_cnt));
                ts.profiler.addRemoteQuery(remote_cnt);
            }
            
            // This will block until we get all of our responses.
            results = this.dispatchWorkFragments(ts, batchSize, batchParams, partitionFragments, finalTask);
        }
        if (debug.val && results == null)
            LOG.warn("Got back a null results array for " + ts + "\n" + plan.toString());


        if (needs_profiling) ts.profiler.startExecJava();
        
        return (results);
    }
    
    /**
     * 
     * @param fresponse
     */
    protected WorkResult buildWorkResult(AbstractTransaction ts, DependencySet result, Status status, SerializableException error) {
        WorkResult.Builder builder = WorkResult.newBuilder();
        
        // Partition Id
        builder.setPartitionId(this.partitionId);
        
        // Status
        builder.setStatus(status);
        
        // SerializableException 
        if (error != null) {
            int size = error.getSerializedSize();
            BBContainer bc = this.buffer_pool.acquire(size);
            try {
                error.serializeToBuffer(bc.b);
            } catch (IOException ex) {
                String msg = "Failed to serialize error for " + ts;
                throw new ServerFaultException(msg, ex);
            }
            bc.b.rewind();
            builder.setError(ByteString.copyFrom(bc.b));
            bc.discard();
        }
        
        // Push dependencies back to the remote partition that needs it
        if (status == Status.OK) {
            for (int i = 0, cnt = result.size(); i < cnt; i++) {
                builder.addDepId(result.depIds[i]);
                this.fs.clear();
                try {
                    result.dependencies[i].writeExternal(this.fs);
                    ByteString bs = ByteString.copyFrom(this.fs.getBBContainer().b);
                    builder.addDepData(bs);
                } catch (Exception ex) {
                    throw new ServerFaultException(String.format("Failed to serialize output dependency %d for %s", result.depIds[i], ts), ex);
                }
                if (trace.val)
                    LOG.trace(String.format("%s - Serialized Output Dependency %d\n%s",
                              ts, result.depIds[i], result.dependencies[i]));  
            } // FOR
            this.fs.getBBContainer().discard();
        }
        
        return (builder.build());
    }


    /**
     * This method is invoked when the PartitionExecutor wants to execute work at a remote HStoreSite.
     * The doneNotificationsPerSite is an array where each offset (based on SiteId) may contain
     * a PartitionSet of the partitions that this txn is finished with at the remote node and will
     * not be executing any work in the current batch. 
     * @param ts
     * @param fragmentBuilders
     * @param parameterSets
     * @param doneNotificationsPerSite
     */
    private void requestWork(LocalTransaction ts,
                             Collection<WorkFragment.Builder> fragmentBuilders,
                             List<ByteString> parameterSets,
                             DonePartitionsNotification notify) {
        assert(fragmentBuilders.isEmpty() == false);
        assert(ts != null);
        Long txn_id = ts.getTransactionId();


        if (trace.val)
            LOG.trace(String.format("%s - Wrapping %d %s into a %s",
                      ts, fragmentBuilders.size(),
                      WorkFragment.class.getSimpleName(),
                      TransactionWorkRequest.class.getSimpleName()));
        
        // If our transaction was originally designated as a single-partitioned, then we need to make
        // sure that we don't touch any partition other than our local one. If we do, then we need abort
        // it and restart it as multi-partitioned
        boolean need_restart = false;
        boolean predict_singlepartition = ts.isPredictSinglePartition(); 
        PartitionSet done_partitions = ts.getDonePartitions();
        Estimate t_estimate = ts.getLastEstimate();
        
        // Now we can go back through and start running all of the WorkFragments that were not blocked
        // waiting for an input dependency. Note that we pack all the fragments into a single
        // CoordinatorFragment rather than sending each WorkFragment in its own message
        for (WorkFragment.Builder fragmentBuilder : fragmentBuilders) {
            assert(this.depTracker.isBlocked(ts, fragmentBuilder) == false);
            final int target_partition = fragmentBuilder.getPartitionId();
            final int target_site = catalogContext.getSiteIdForPartitionId(target_partition);
            final PartitionSet doneNotifications = (notify != null ? notify.getNotifications(target_site) : null);
            
            // Make sure that this isn't a single-partition txn trying to access a remote partition
            if (predict_singlepartition && target_partition != this.partitionId) {
                if (debug.val)
                    LOG.debug(String.format("%s - Txn on partition %d is suppose to be " +
                              "single-partitioned, but it wants to execute a fragment on partition %d",
                              ts, this.partitionId, target_partition));
                need_restart = true;
                break;
            }
            // Make sure that this txn isn't trying to access a partition that we said we were
            // done with earlier
            else if (done_partitions.contains(target_partition)) {
                if (debug.val)
                    LOG.warn(String.format("%s on partition %d was marked as done on partition %d " +
                             "but now it wants to go back for more!",
                             ts, this.partitionId, target_partition));
                need_restart = true;
                break;
            }
            // Make sure we at least have something to do!
            else if (fragmentBuilder.getFragmentIdCount() == 0) {
                LOG.warn(String.format("%s - Trying to send a WorkFragment request with 0 fragments", ts));
                continue;
            }
            
            // Add in the specexec query estimate at this partition if needed
            if (hstore_conf.site.specexec_enable && t_estimate != null && t_estimate.hasQueryEstimate(target_partition)) {
                List<CountedStatement> queryEst = t_estimate.getQueryEstimate(target_partition);
                // if (debug.val)
                if (target_partition == 0)
                    if (debug.val)
                        LOG.debug(String.format("%s - Sending remote query estimate to partition %d " +
                                  "containing %d queries\n%s",
                                  ts, target_partition, queryEst.size(), StringUtil.join("\n", queryEst)));
                assert(queryEst.isEmpty() == false);
                QueryEstimate.Builder estBuilder = QueryEstimate.newBuilder();
                for (CountedStatement countedStmt : queryEst) {
                    estBuilder.addStmtIds(countedStmt.statement.getId());
                    estBuilder.addStmtCounters(countedStmt.counter);
                } // FOR
                fragmentBuilder.setFutureStatements(estBuilder);
            }
           
            // Get the TransactionWorkRequest.Builder for the remote HStoreSite
            // We will use this store our serialized input dependencies
            TransactionWorkRequestBuilder requestBuilder = tmp_transactionRequestBuilders[target_site];
            if (requestBuilder == null) {
                requestBuilder = tmp_transactionRequestBuilders[target_site] = new TransactionWorkRequestBuilder();
            }
            TransactionWorkRequest.Builder builder = requestBuilder.getBuilder(ts, doneNotifications);
            
            // Also keep track of what Statements they are executing so that we know
            // we need to send over the wire to them.
            requestBuilder.addParamIndexes(fragmentBuilder.getParamIndexList());
            
            // Input Dependencies
            if (fragmentBuilder.getNeedsInput()) {
                if (debug.val)
                    LOG.debug(String.format("%s - Retrieving input dependencies at partition %d",
                              ts, this.partitionId));
                
                tmp_removeDependenciesMap.clear();
                for (int i = 0, cnt = fragmentBuilder.getInputDepIdCount(); i < cnt; i++) {
                    this.getFragmentInputs(ts, fragmentBuilder.getInputDepId(i), tmp_removeDependenciesMap);
                } // FOR


                for (Entry<Integer, List<VoltTable>> e : tmp_removeDependenciesMap.entrySet()) {
                    if (requestBuilder.hasInputDependencyId(e.getKey())) continue;


                    if (debug.val)
                        LOG.debug(String.format("%s - Attaching %d input dependencies to be sent to %s",
                                  ts, e.getValue().size(), HStoreThreadManager.formatSiteName(target_site)));
                    for (VoltTable vt : e.getValue()) {
                        this.fs.clear();
                        try {
                            this.fs.writeObject(vt);
                            builder.addAttachedDepId(e.getKey().intValue());
                            builder.addAttachedData(ByteString.copyFrom(this.fs.getBBContainer().b));
                        } catch (Exception ex) {
                            String msg = String.format("Failed to serialize input dependency %d for %s", e.getKey(), ts);
                            throw new ServerFaultException(msg, ts.getTransactionId());
                        }
                        if (debug.val)
                            LOG.debug(String.format("%s - Storing %d rows for InputDependency %d to send " +
                                      "to partition %d [bytes=%d]",
                                      ts, vt.getRowCount(), e.getKey(), fragmentBuilder.getPartitionId(),
                                      CollectionUtil.last(builder.getAttachedDataList()).size()));
                    } // FOR
                    requestBuilder.addInputDependencyId(e.getKey());
                } // FOR
                this.fs.getBBContainer().discard();
            }
            builder.addFragments(fragmentBuilder);
        } // FOR (tasks)
        
        // Bad mojo! We need to throw a MispredictionException so that the VoltProcedure
        // will catch it and we can propagate the error message all the way back to the HStoreSite
        if (need_restart) {
            if (trace.val)
                LOG.trace(String.format("Aborting %s because it was mispredicted", ts));
            // This is kind of screwy because we don't actually want to send the touched partitions
            // histogram because VoltProcedure will just do it for us...
            throw new MispredictionException(txn_id, null);
        }


        // Stick on the ParameterSets that each site needs into the TransactionWorkRequest
        for (int target_site = 0; target_site < tmp_transactionRequestBuilders.length; target_site++) {
            TransactionWorkRequestBuilder builder = tmp_transactionRequestBuilders[target_site]; 
            if (builder == null || builder.isDirty() == false) {
                continue;
            }
            assert(builder != null);
            builder.addParameterSets(parameterSets);
            
            // Bombs away!
            this.hstore_coordinator.transactionWork(ts, target_site, builder.build(), this.request_work_callback);
            if (debug.val)
                LOG.debug(String.format("%s - Sent Work request to remote site %s",
                          ts, HStoreThreadManager.formatSiteName(target_site)));


        } // FOR
    }


    /**
     * Figure out what partitions this transaction is done with. This will only return
     * a PartitionSet of what partitions we think we're done with.
     * For each partition that we idenitfy that the txn is done with, we will check to see
     * whether the txn is going to execute a query at its site in this batch. If it's not,
     * then we will notify that HStoreSite through the HStoreCoordinator.
     * If the partition that it doesn't need anymore is local (i.e., it's at the same
     * HStoreSite that we're at right now), then we'll just pass them a quick message
     * to let them know that they can prepare the txn. 
     * @param ts
     * @param estimate
     * @param fragmentsPerPartition A histogram of the number of PlanFragments the
     *                              txn will execute in this batch at each partition.
     * @param finalTask Whether the txn has marked this as the last batch that they will ever execute
     * @return A notification object that can be used to notify partitions that this txn is done with them.
     */
    private DonePartitionsNotification computeDonePartitions(final LocalTransaction ts,
                                                             final Estimate estimate,
                                                             final FastIntHistogram fragmentsPerPartition,
                                                             final boolean finalTask) {
        final PartitionSet touchedPartitions = ts.getPredictTouchedPartitions();
        final PartitionSet donePartitions = ts.getDonePartitions();
        
        // Compute the partitions that the txn will be finished with after this batch
        PartitionSet estDonePartitions = null;
        
        // If the finalTask flag is set to true, then the new done partitions
        // is every partition that this txn has locked
        if (finalTask) {
            estDonePartitions = new PartitionSet(touchedPartitions);
        }
        // Otherwise, we'll rely on the transaction's current estimate to figure it out.
        else {
            if (estimate == null || estimate.isValid() == false) {
                if (debug.val && estimate != null)
                    LOG.debug(String.format("%s - Unable to compute new done partitions because there " +
                         "is no valid estimate for the txn",
                              ts, estimate.getClass().getSimpleName()));
                return (null);
            }
            
            estDonePartitions = estimate.getDonePartitions(this.thresholds);
            if (estDonePartitions == null || estDonePartitions.isEmpty()) {
                if (debug.val)
                    LOG.debug(String.format("%s - There are no new done partitions identified by %s",
                              ts, estimate.getClass().getSimpleName()));
                return (null);
            }
        }
        assert(estDonePartitions != null) : "Null done partitions for " + ts;
        assert(estDonePartitions.isEmpty() == false) : "Empty done partitions for " + ts;
        
        if (debug.val)
            LOG.debug(String.format("%s - New estimated done partitions %s%s",
                      ts, estDonePartitions,
                      (trace.val ? "\n"+estimate : "")));


        // Note that we can actually be done with ourself, if this txn is only going to execute queries
        // at remote partitions. But we can't actually execute anything because this partition's only 
        // execution thread is going to be blocked. So we always do this so that we're not sending a 
        // useless message
        estDonePartitions.remove(this.partitionId);
        
        // Make sure that we only tell partitions that we actually touched, otherwise they will
        // be stuck waiting for a finish request that will never come!
        DonePartitionsNotification notify = new DonePartitionsNotification();
        LocalPrepareCallback callback = null;
        for (int partition : estDonePartitions.values()) {
            // Only mark the txn done at this partition if the Estimate says we were done
            // with it after executing this batch and it's a partition that we've locked.
            if (donePartitions.contains(partition) || touchedPartitions.contains(partition) == false)
                continue;
                
            if (trace.val)
                LOG.trace(String.format("%s - Marking partition %d as done for txn", ts, partition));
            notify.donePartitions.add(partition);
            if (hstore_conf.site.txn_profiling && ts.profiler != null) ts.profiler.markEarly2PCPartition(partition);
            
            // Check whether we're executing a query at this partition in this batch.
            // If we're not, then we need to check whether we can piggyback the "done" message
            // in another WorkFragment going to that partition or whether we have to
            // send a separate TransactionPrepareRequest
            if (fragmentsPerPartition.get(partition, 0) == 0) {
                // We need to let them know that the party is over!
                if (hstore_site.isLocalPartition(partition)) {
                    if (debug.val)
                        LOG.debug(String.format("%s - Notifying local partition %d that the txn is finished with it",
                                  ts, partition));
                    if (callback == null) callback = ts.getPrepareCallback();
                    hstore_site.getPartitionExecutor(partition).queuePrepare(ts, callback);
                }
                // Check whether we can piggyback on another WorkFragment that is going to
                // the same site
                else {
                    Site remoteSite = catalogContext.getSiteForPartition(partition);
                    boolean found = false;
                    for (Partition remotePartition : remoteSite.getPartitions().values()) {
                        if (fragmentsPerPartition.get(remotePartition.getId(), 0) != 0) {
                            found = true;
                            break;
                        }
                    } // FOR
                    notify.addSiteNotification(remoteSite, partition, (found == false));
                }
            }
        } // FOR
        return (notify);
    }
    
    /**
     * Send asynchronous notification messages to any remote site to tell them that we
     * are done with partitions that they have. 
     * @param ts
     * @param notify
     */
    private void notifyDonePartitions(LocalTransaction ts, DonePartitionsNotification notify) {
        if (debug.val)
            LOG.debug(String.format("%s - Sending done partitions notifications to remote sites %s",
                      ts, notify._sitesToNotify));
        
        // BLAST OUT NOTIFICATIONS!
        for (int remoteSiteId : notify._sitesToNotify) {
            assert(notify.notificationsPerSite[remoteSiteId] != null);
            if (debug.val)
                LOG.debug(String.format("%s - Notifying %s that txn is finished with partitions %s",
                          ts, HStoreThreadManager.formatSiteName(remoteSiteId),
                          notify.notificationsPerSite[remoteSiteId]));
            hstore_coordinator.transactionPrepare(ts, ts.getPrepareCallback(),
                                                  notify.notificationsPerSite[remoteSiteId]);
            
            // Make sure that we remove the PartitionSet for this site so that we don't
            // try to send the notifications again.
            notify.notificationsPerSite[remoteSiteId] = null; 
        } // FOR
    }
    
    
    /**
     * Execute the given tasks and then block the current thread waiting for the list of dependency_ids to come
     * back from whatever it was we were suppose to do...
     * This is the slowest way to execute a bunch of WorkFragments and therefore should only be invoked
     * for batches that need to access non-local partitions
     * @param ts The txn handle that is executing this query batch
     * @param batchSize The number of SQLStmts that the txn queued up using voltQueueSQL()
     * @param batchParams The input parameters for the SQLStmts
     * @param allFragmentBuilders
     * @param finalTask Whether the txn has marked this as the last batch that they will ever execute
     * @return
     */
    public VoltTable[] dispatchWorkFragments(final LocalTransaction ts,
                                             final int batchSize,
                                             final ParameterSet batchParams[],
                                             final Collection<WorkFragment.Builder> allFragmentBuilders,
                                             boolean finalTask) {
        assert(allFragmentBuilders.isEmpty() == false) :
            "Unexpected empty WorkFragment list for " + ts;
        final boolean needs_profiling = (hstore_conf.site.txn_profiling && ts.profiler != null);
        
        // *********************************** DEBUG ***********************************
        if (debug.val) {
            LOG.debug(String.format("%s - Preparing to dispatch %d messages and wait for the results [needsProfiling=%s]",
                      ts, allFragmentBuilders.size(), needs_profiling));
            if (trace.val) {
                StringBuilder sb = new StringBuilder();
                sb.append(ts + " - WorkFragments:\n");
                for (WorkFragment.Builder fragment : allFragmentBuilders) {
                    sb.append(StringBoxUtil.box(fragment.toString()) + "\n");
                } // FOR
                sb.append(ts + " - ParameterSets:\n");
                for (ParameterSet ps : batchParams) {
                    sb.append(ps + "\n");
                } // FOR
                LOG.trace(sb);
            }
        }
        // *********************************** DEBUG *********************************** 
        
        // OPTIONAL: Check to make sure that this request is valid 
        //  (1) At least one of the WorkFragments needs to be executed on a remote partition
        //  (2) All of the PlanFragments ids in the WorkFragments match this txn's Procedure
        if (hstore_conf.site.exec_validate_work && ts.isSysProc() == false) {
            LOG.warn(String.format("%s - Checking whether all of the WorkFragments are valid", ts));
            boolean has_remote = false; 
            for (WorkFragment.Builder frag : allFragmentBuilders) {
                if (frag.getPartitionId() != this.partitionId) {
                    has_remote = true;
                }
                for (int frag_id : frag.getFragmentIdList()) {
                    PlanFragment catalog_frag = CatalogUtil.getPlanFragment(catalogContext.database, frag_id);
                    Statement catalog_stmt = catalog_frag.getParent();
                    assert(catalog_stmt != null);
                    Procedure catalog_proc = catalog_stmt.getParent();
                    if (catalog_proc.equals(ts.getProcedure()) == false) {
                        LOG.warn(ts.debug() + "\n" + allFragmentBuilders + "\n---- INVALID ----\n" + frag);
                        String msg = String.format("%s - Unexpected %s", ts, catalog_frag.fullName());
                        throw new ServerFaultException(msg, ts.getTransactionId());
                    }
                }
            } // FOR
            if (has_remote == false) {
                LOG.warn(ts.debug() + "\n" + allFragmentBuilders);
                String msg = ts + "Trying to execute all local single-partition queries using the slow-path!";
                throw new ServerFaultException(msg, ts.getTransactionId());
            }
        }


        boolean first = true;
        boolean serializedParams = false;
        CountDownLatch latch = null;
        boolean all_local = true;
        boolean is_localSite;
        boolean is_localPartition;
        boolean is_localReadOnly = true;
        int num_localPartition = 0;
        int num_localSite = 0;
        int num_remote = 0;
        int num_skipped = 0;
        int total = 0;
        Collection<WorkFragment.Builder> fragmentBuilders = allFragmentBuilders;


        // Make sure our txn is in our DependencyTracker
        if (trace.val)
            LOG.trace(String.format("%s - Added transaction to %s",
                      ts, this.depTracker.getClass().getSimpleName()));
        this.depTracker.addTransaction(ts);
        
        // Count the number of fragments that we're going to send to each partition and
        // figure out whether the txn will always be read-only at this partition
        tmp_fragmentsPerPartition.clearValues();
        for (WorkFragment.Builder fragmentBuilder : allFragmentBuilders) {
            int partition = fragmentBuilder.getPartitionId();
            tmp_fragmentsPerPartition.put(partition);
            if (this.partitionId == partition && fragmentBuilder.getReadOnly() == false) {
                is_localReadOnly = false;
            }
        } // FOR
        long undoToken = this.calculateNextUndoToken(ts, is_localReadOnly);
        ts.initFirstRound(undoToken, batchSize);
        final boolean predict_singlePartition = ts.isPredictSinglePartition();
        
        // Calculate whether we are finished with partitions now
        final Estimate lastEstimate = ts.getLastEstimate();
        DonePartitionsNotification notify = null;
        if (hstore_conf.site.exec_early_prepare && ts.isSysProc() == false && ts.allowEarlyPrepare()) {
            notify = this.computeDonePartitions(ts, lastEstimate, tmp_fragmentsPerPartition, finalTask);
            if (notify != null && notify.hasSitesToNotify())
                this.notifyDonePartitions(ts, notify);
        }
        
        // Attach the ParameterSets to our transaction handle so that anybody on this HStoreSite
        // can access them directly without needing to deserialize them from the WorkFragments
        ts.attachParameterSets(batchParams);
        
        // Now if we have some work sent out to other partitions, we need to wait until they come back
        // In the first part, we wait until all of our blocked WorkFragments become unblocked
        final BlockingDeque<Collection<WorkFragment.Builder>> queue = this.depTracker.getUnblockedWorkFragmentsQueue(ts);


        // Run through this loop if:
        //  (1) We have no pending errors
        //  (2) This is our first time in the loop (first == true)
        //  (3) If we know that there are still messages being blocked
        //  (4) If we know that there are still unblocked messages that we need to process
        //  (5) The latch for this round is still greater than zero
        while (ts.hasPendingError() == false && 
              (first == true || this.depTracker.stillHasWorkFragments(ts) || (latch != null && latch.getCount() > 0))) {
            if (trace.val)
                LOG.trace(String.format("%s - %s loop [first=%s, stillHasWorkFragments=%s, latch=%s]",
                          ts, ClassUtil.getCurrentMethodName(),
                          first, this.depTracker.stillHasWorkFragments(ts), queue.size(), latch));
            
            // If this is the not first time through the loop, then poll the queue
            // to get our list of fragments
            if (first == false) {
                all_local = true;
                is_localSite = false;
                is_localPartition = false;
                num_localPartition = 0;
                num_localSite = 0;
                num_remote = 0;
                num_skipped = 0;
                total = 0;
                
                if (trace.val)
                    LOG.trace(String.format("%s - Waiting for unblocked tasks on partition %d",
                              ts, this.partitionId));
                fragmentBuilders = queue.poll(); // NON-BLOCKING
                
                // If we didn't get back a list of fragments here, then we will spin through
                // and invoke utilityWork() to try to do something useful until what we need shows up
                if (needs_profiling) ts.profiler.startExecDtxnWork();
                if (hstore_conf.site.exec_profiling) this.profiler.sp1_time.start();
                try {
                    while (fragmentBuilders == null) {
                        // If there is more work that we could do, then we'll just poll the queue
                        // without waiting so that we can go back and execute it again if we have
                        // more time.
                        if (this.utilityWork()) {
                            fragmentBuilders = queue.poll();
                        }
                        // Otherwise we will wait a little so that we don't spin the CPU
                        else {
                            fragmentBuilders = queue.poll(WORK_QUEUE_POLL_TIME, TimeUnit.MILLISECONDS);
                        }
                    } // WHILE
                } catch (InterruptedException ex) {
                    if (this.hstore_site.isShuttingDown() == false) {
                        LOG.error(String.format("%s - We were interrupted while waiting for blocked tasks", ts), ex);
                    }
                    return (null);
                } finally {
                    if (needs_profiling) ts.profiler.stopExecDtxnWork();
                    if (hstore_conf.site.exec_profiling) this.profiler.sp1_time.stopIfStarted();
                }
            }
            assert(fragmentBuilders != null);
            
            // If the list to fragments unblock is empty, then we 
            // know that we have dispatched all of the WorkFragments for the
            // transaction's current SQLStmt batch. That means we can just wait 
            // until all the results return to us.
            if (fragmentBuilders.isEmpty()) {
                if (trace.val)
                    LOG.trace(String.format("%s - Got an empty list of WorkFragments at partition %d. " +
                              "Blocking until dependencies arrive",
                              ts, this.partitionId));
                break;
            }


            this.tmp_localWorkFragmentBuilders.clear();
            if (predict_singlePartition == false) {
                this.tmp_remoteFragmentBuilders.clear();
                this.tmp_localSiteFragmentBuilders.clear();
            }
            
            // -------------------------------
            // FAST PATH: Assume everything is local
            // -------------------------------
            if (predict_singlePartition) {
                for (WorkFragment.Builder fragmentBuilder : fragmentBuilders) {
                    if (first == false || this.depTracker.addWorkFragment(ts, fragmentBuilder, batchParams)) {
                        this.tmp_localWorkFragmentBuilders.add(fragmentBuilder);
                        total++;
                        num_localPartition++;
                    }
                } // FOR
                
                // We have to tell the transaction handle to start the round before we send off the
                // WorkFragments for execution, since they might start executing locally!
                if (first) {
                    ts.startRound(this.partitionId);
                    latch = this.depTracker.getDependencyLatch(ts);
                }
                
                // Execute all of our WorkFragments quickly at our local ExecutionEngine
                for (WorkFragment.Builder fragmentBuilder : this.tmp_localWorkFragmentBuilders) {
                    if (debug.val)
                        LOG.debug(String.format("%s - Got unblocked %s to execute locally",
                                  ts, fragmentBuilder.getClass().getSimpleName()));
                    assert(fragmentBuilder.getPartitionId() == this.partitionId) :
                        String.format("Trying to process %s for %s on partition %d but it should have been " +
                                      "sent to partition %d [singlePartition=%s]\n%s",
                                      fragmentBuilder.getClass().getSimpleName(), ts, this.partitionId,
                                      fragmentBuilder.getPartitionId(), predict_singlePartition, fragmentBuilder);
                    WorkFragment fragment = fragmentBuilder.build();
                    this.processWorkFragment(ts, fragment, batchParams);
                } // FOR
            }
            // -------------------------------
            // SLOW PATH: Mixed local and remote messages
            // -------------------------------
            else {
                // Look at each task and figure out whether it needs to be executed at a remote
                // HStoreSite or whether we can execute it at one of our local PartitionExecutors.
                for (WorkFragment.Builder fragmentBuilder : fragmentBuilders) {
                    int partition = fragmentBuilder.getPartitionId();
                    is_localSite = hstore_site.isLocalPartition(partition);
                    is_localPartition = (partition == this.partitionId);
                    all_local = all_local && is_localPartition;
                    
                    // If this is the last WorkFragment that we're going to send to this partition for
                    // this batch, then we will want to check whether we know that this is the last
                    // time this txn will ever need to go to that txn. If so, then we'll want to 
                    if (notify != null && notify.donePartitions.contains(partition) &&
                        tmp_fragmentsPerPartition.dec(partition) == 0) {
                        if (debug.val)
                            LOG.debug(String.format("%s - Setting last fragment flag in %s for partition %d",
                                      ts, WorkFragment.class.getSimpleName(), partition));
                        fragmentBuilder.setLastFragment(true);
                    }
                    
                    if (first == false || this.depTracker.addWorkFragment(ts, fragmentBuilder, batchParams)) {
                        total++;
                        
                        // At this point we know that all the WorkFragment has been registered
                        // in the LocalTransaction, so then it's safe for us to look to see
                        // whether we already have a prefetched result that we need
//                        if (prefetch && is_localPartition == false) {
//                            boolean skip_queue = true;
//                            for (int i = 0, cnt = fragmentBuilder.getFragmentIdCount(); i < cnt; i++) {
//                                int fragId = fragmentBuilder.getFragmentId(i);
//                                int paramIdx = fragmentBuilder.getParamIndex(i);
//                                
//                                VoltTable vt = this.queryCache.getResult(ts.getTransactionId(),
//                                                                         fragId,
//                                                                         partition,
//                                                                         parameters[paramIdx]);
//                                if (vt != null) {
//                                    if (trace.val)
//                                        LOG.trace(String.format("%s - Storing cached result from partition %d for fragment %d",
//                                                  ts, partition, fragId));
//                                    this.depTracker.addResult(ts, partition, fragmentBuilder.getOutputDepId(i), vt);
//                                } else {
//                                    skip_queue = false;
//                                }
//                            } // FOR
//                            // If we were able to get cached results for all of the fragmentIds in
//                            // this WorkFragment, then there is no need for us to send the message
//                            // So we'll just skip queuing it up! How nice!
//                            if (skip_queue) {
//                                if (debug.val)
//                                    LOG.debug(String.format("%s - Using prefetch result for all fragments from partition %d",
//                                              ts, partition));
//                                num_skipped++;
//                                continue;
//                            }
//                        }
                        
                        // Otherwise add it to our list of WorkFragments that we want
                        // queue up right now
                        if (is_localPartition) {
                            is_localReadOnly = (is_localReadOnly && fragmentBuilder.getReadOnly());
                            this.tmp_localWorkFragmentBuilders.add(fragmentBuilder);
                            num_localPartition++;
                        } else if (is_localSite) {
                            this.tmp_localSiteFragmentBuilders.add(fragmentBuilder);
                            num_localSite++;
                        } else {
                            this.tmp_remoteFragmentBuilders.add(fragmentBuilder);
                            num_remote++;
                        }
                    }
                } // FOR
                assert(total == (num_remote + num_localSite + num_localPartition + num_skipped)) :
                    String.format("Total:%d / Remote:%d / LocalSite:%d / LocalPartition:%d / Skipped:%d",
                                  total, num_remote, num_localSite, num_localPartition, num_skipped);


                // We have to tell the txn to start the round before we send off the
                // WorkFragments for execution, since they might start executing locally!
                if (first) {
                    ts.startRound(this.partitionId);
                    latch = this.depTracker.getDependencyLatch(ts);
                }
        
                // Now request the fragments that aren't local
                // We want to push these out as soon as possible
                if (num_remote > 0) {
                    // We only need to serialize the ParameterSets once
                    if (serializedParams == false) {
                        if (needs_profiling) ts.profiler.startSerialization();
                        tmp_serializedParams.clear();
                        for (int i = 0; i < batchParams.length; i++) {
                            if (batchParams[i] == null) {
                                tmp_serializedParams.add(ByteString.EMPTY);
                            } else {
                                this.fs.clear();
                                try {
                                    batchParams[i].writeExternal(this.fs);
                                    ByteString bs = ByteString.copyFrom(this.fs.getBBContainer().b);
                                    tmp_serializedParams.add(bs);
                                } catch (Exception ex) {
                                    String msg = "Failed to serialize ParameterSet " + i + " for " + ts;
                                    throw new ServerFaultException(msg, ex, ts.getTransactionId());
                                }
                            }
                        } // FOR
                        if (needs_profiling) ts.profiler.stopSerialization();
                    }
                    
                    //if (trace.val)
                    //    LOG.trace(String.format("%s - Requesting %d %s to be executed on remote partitions " +
                    //              "[doneNotifications=%s]",
                    //              ts, WorkFragment.class.getSimpleName(), num_remote, notify!=null));
                    this.requestWork(ts, tmp_remoteFragmentBuilders, tmp_serializedParams, notify);
                    if (needs_profiling) ts.profiler.markRemoteQuery();
                }
                
                // Then dispatch the task that are needed at the same HStoreSite but 
                // at a different partition than this one
                if (num_localSite > 0) {
                    if (trace.val)
                        LOG.trace(String.format("%s - Executing %d WorkFragments on local site's partitions",
                                  ts, num_localSite));
                    for (WorkFragment.Builder builder : this.tmp_localSiteFragmentBuilders) {
                        PartitionExecutor other = hstore_site.getPartitionExecutor(builder.getPartitionId());
                        other.queueWork(ts, builder.build());
                    } // FOR
                    if (needs_profiling) ts.profiler.markRemoteQuery();
                }
        
                // Then execute all of the tasks need to access the partitions at this HStoreSite
                // We'll dispatch the remote-partition-local-site fragments first because they're going
                // to need to get queued up by at the other PartitionExecutors
                if (num_localPartition > 0) {
                    if (trace.val)
                        LOG.trace(String.format("%s - Executing %d WorkFragments on local partition",
                                  ts, num_localPartition));
                    for (WorkFragment.Builder fragmentBuilder : this.tmp_localWorkFragmentBuilders) {
                        this.processWorkFragment(ts, fragmentBuilder.build(), batchParams);
                    } // FOR
                }
            }
            if (trace.val)
                LOG.trace(String.format("%s - Dispatched %d WorkFragments " +
                          "[remoteSite=%d, localSite=%d, localPartition=%d]",
                          ts, total, num_remote, num_localSite, num_localPartition));
            first = false;
        } // WHILE
        this.fs.getBBContainer().discard();
        
        if (trace.val)
            LOG.trace(String.format("%s - BREAK OUT [first=%s, stillHasWorkFragments=%s, latch=%s]",
                      ts, first, this.depTracker.stillHasWorkFragments(ts), latch));
//        assert(ts.stillHasWorkFragments() == false) :
//            String.format("Trying to block %s before all of its WorkFragments have been dispatched!\n%s\n%s",
//                          ts,
//                          StringUtil.join("** ", "\n", tempDebug),
//                          this.getVoltProcedure(ts.getProcedureName()).getLastBatchPlan());
                
        // Now that we know all of our WorkFragments have been dispatched, we can then
        // wait for all of the results to come back in.
        if (latch == null) latch = this.depTracker.getDependencyLatch(ts);
        assert(latch != null) :
            String.format("Unexpected null dependency latch for " + ts);
        if (latch.getCount() > 0) {
            if (debug.val) {
                LOG.debug(String.format("%s - All blocked messages dispatched. Waiting for %d dependencies",
                          ts, latch.getCount()));
                if (trace.val) LOG.trace(ts.toString());
            }
            boolean timeout = false;
            long startTime = EstTime.currentTimeMillis();
            
            if (needs_profiling) ts.profiler.startExecDtxnWork();
            if (hstore_conf.site.exec_profiling) this.profiler.sp1_time.start();
            try {
                while (latch.getCount() > 0 && ts.hasPendingError() == false) {
                    if (this.utilityWork() == false) {
                        timeout = latch.await(WORK_QUEUE_POLL_TIME, TimeUnit.MILLISECONDS);
                        if (timeout == false) break;
                    }
                    if ((EstTime.currentTimeMillis() - startTime) > hstore_conf.site.exec_response_timeout) {
                        timeout = true;
                        break;
                    }
                } // WHILE
            } catch (InterruptedException ex) {
                if (this.hstore_site.isShuttingDown() == false) {
                    LOG.error(String.format("%s - We were interrupted while waiting for results", ts), ex);
                }
                timeout = true;
            } catch (Throwable ex) {
                String msg = String.format("Fatal error for %s while waiting for results", ts);
                throw new ServerFaultException(msg, ex);
            } finally {
                if (needs_profiling) ts.profiler.stopExecDtxnWork();
                if (hstore_conf.site.exec_profiling) this.profiler.sp1_time.stopIfStarted();
            }
            
            if (timeout && this.isShuttingDown() == false) {
                LOG.warn(String.format("Still waiting for responses for %s after %d ms [latch=%d]\n%s",
                         ts, hstore_conf.site.exec_response_timeout, latch.getCount(), ts.debug()));
                LOG.warn("Procedure Parameters:\n" + ts.getProcedureParameters());
                hstore_conf.site.exec_profiling = true;
                LOG.warn(hstore_site.statusSnapshot());
                
                String msg = "The query responses for " + ts + " never arrived!";
                throw new ServerFaultException(msg, ts.getTransactionId());
            }
        }
        // Update done partitions
        if (notify != null && notify.donePartitions.isEmpty() == false) {
            if (debug.val)
                LOG.debug(String.format("%s - Marking new done partitions %s", ts, notify.donePartitions));
            ts.getDonePartitions().addAll(notify.donePartitions);
        }
        
        // IMPORTANT: Check whether the fragments failed somewhere and we got a response with an error
        // We will rethrow this so that it pops the stack all the way back to VoltProcedure.call()
        // where we can generate a message to the client 
        if (ts.hasPendingError()) {
            if (debug.val) LOG.warn(String.format("%s was hit with a %s",
                                          ts, ts.getPendingError().getClass().getSimpleName()));
            throw ts.getPendingError();
        }
        
        // IMPORTANT: Don't try to check whether we got back the right number of tables because the batch
        // may have hit an error and we didn't execute all of them.
        VoltTable results[] = null;
        try {
            results = this.depTracker.getResults(ts);
        } catch (AssertionError ex) {
            LOG.error("Failed to get final results for batch\n" + ts.debug());
            throw ex;
        }
        ts.finishRound(this.partitionId);
         if (debug.val) {
            if (trace.val) LOG.trace(ts + " is now running and looking for love in all the wrong places...");
            LOG.debug(String.format("%s - Returning back %d tables to VoltProcedure", ts, results.length));
        }
        return (results);
    }


    // ---------------------------------------------------------------
    // COMMIT + ABORT METHODS
    // ---------------------------------------------------------------


    /**
     * Queue a speculatively executed transaction to send its ClientResponseImpl message
     */
    private void blockClientResponse(LocalTransaction ts, ClientResponseImpl cresponse) {
        assert(ts.isPredictSinglePartition() == true) :
            String.format("Specutatively executed multi-partition %s [mode=%s, status=%s]",
                          ts, this.currentExecMode, cresponse.getStatus());
        assert(ts.isSpeculative() == true) :
            String.format("Blocking ClientResponse for non-specutative %s [mode=%s, status=%s]",
                          ts, this.currentExecMode, cresponse.getStatus());
        assert(ts.getClientResponse() != null) :
            String.format("Missing ClientResponse for %s [mode=%s, status=%s]",
                          ts, this.currentExecMode, cresponse.getStatus());
        assert(cresponse.getStatus() != Status.ABORT_MISPREDICT) : 
            String.format("Trying to block ClientResponse for mispredicted %s [mode=%s, status=%s]",
                          ts, this.currentExecMode, cresponse.getStatus());
        assert(this.currentExecMode != ExecutionMode.COMMIT_ALL) :
            String.format("Blocking ClientResponse for %s when in non-specutative mode [mode=%s, status=%s]",
                          ts, this.currentExecMode, cresponse.getStatus());
        
        this.specExecBlocked.push(ts);
        this.specExecModified = (this.specExecModified && ts.isExecReadOnly(this.partitionId));
        if (debug.val)
            LOG.debug(String.format("%s - Blocking %s ClientResponse [partitions=%s, blockQueue=%d]",
                      ts, cresponse.getStatus(),
                      ts.getTouchedPartitions().values(), this.specExecBlocked.size()));
    }
    
    /**
     * For the given transaction's ClientResponse, figure out whether we can send it back to the client
     * right now or whether we need to initiate two-phase commit.
     * @param ts
     * @param cresponse
     */
    protected void processClientResponse(LocalTransaction ts, ClientResponseImpl cresponse) {
        // IMPORTANT: If we executed this locally and only touched our partition, then we need to commit/abort right here
        // 2010-11-14: The reason why we can do this is because we will just ignore the commit
        // message when it shows from the Dtxn.Coordinator. We should probably double check with Evan on this...
        Status status = cresponse.getStatus();


        if (debug.val) {
            LOG.debug(String.format("%s - Processing ClientResponse at partition %d " +
                      "[status=%s, singlePartition=%s, local=%s, clientHandle=%d]",
                      ts, this.partitionId, status, ts.isPredictSinglePartition(),
                      ts.isExecLocal(this.partitionId), cresponse.getClientHandle()));
            if (trace.val) {
                LOG.trace(ts + " Touched Partitions: " + ts.getTouchedPartitions().values());
                if (ts.isPredictSinglePartition() == false)
                    LOG.trace(ts + " Done Partitions: " + ts.getDonePartitions());
            }
        }
        
        // -------------------------------
        // ALL: Transactions that need to be internally restarted
        // -------------------------------
        if (status == Status.ABORT_MISPREDICT ||
            status == Status.ABORT_SPECULATIVE ||
            status == Status.ABORT_EVICTEDACCESS) {
            
            // If the txn was mispredicted, then we will pass the information over to the
            // HStoreSite so that it can re-execute the transaction. We want to do this 
            // first so that the txn gets re-executed as soon as possible...
            if (debug.val)
                LOG.debug(String.format("%s - Restarting because transaction was hit with %s",
                          ts, (ts.getPendingError() != null ? ts.getPendingError().getClass().getSimpleName() : "")));


            // We don't want to delete the transaction here because whoever is going to requeue it for
            // us will need to know what partitions that the transaction touched when it executed before
            if (ts.isPredictSinglePartition()) {
                if (ts.isMarkedFinished(this.partitionId) == false)
                    this.finishTransaction(ts, status);
                this.hstore_site.transactionRequeue(ts, status);
            }
            // Send a message all the partitions involved that the party is over
            // and that they need to abort the transaction. We don't actually care when we get the
            // results back because we'll start working on new txns right away.
            // Note that when we call transactionFinish() right here this thread will then go on 
            // to invoke HStoreSite.transactionFinish() for us. That means when it returns we will
            // have successfully aborted the txn at least at all of the local partitions at this site.
            else {
                if (hstore_conf.site.txn_profiling && ts.profiler != null) ts.profiler.startPostFinish();
                LocalFinishCallback finish_callback = ts.getFinishCallback();
                finish_callback.init(ts, status);
                finish_callback.markForRequeue();
                if (hstore_conf.site.exec_profiling) this.profiler.network_time.start();
                this.hstore_coordinator.transactionFinish(ts, status, finish_callback);
                if (hstore_conf.site.exec_profiling) this.profiler.network_time.stopIfStarted();
            }
        }
        // -------------------------------
        // ALL: Single-Partition Transactions
        // -------------------------------
        else if (ts.isPredictSinglePartition()) {
            // Commit or abort the transaction only if we haven't done it already
            // This can happen when we commit speculative txns out of order
            if (ts.isMarkedFinished(this.partitionId) == false) {
                this.finishTransaction(ts, status);
            }
            
            // We have to mark it as loggable to prevent the response
            // from getting sent back to the client
            if (hstore_conf.site.commandlog_enable) ts.markLogEnabled();
            
            if (hstore_conf.site.exec_profiling) this.profiler.network_time.start();
            this.hstore_site.responseSend(ts, cresponse);
            if (hstore_conf.site.exec_profiling) this.profiler.network_time.stopIfStarted();
            this.hstore_site.queueDeleteTransaction(ts.getTransactionId(), status);
        } 
        // -------------------------------
        // COMMIT: Distributed Transaction
        // -------------------------------
        else if (status == Status.OK) {
            // We need to set the new ExecutionMode before we invoke transactionPrepare
            // because the LocalTransaction handle might get cleaned up immediately
            ExecutionMode newMode = null;
            if (hstore_conf.site.specexec_enable) {
                newMode = (ts.isExecReadOnly(this.partitionId) ? ExecutionMode.COMMIT_READONLY :
                                                                 ExecutionMode.COMMIT_NONE);
            } else {
                newMode = ExecutionMode.DISABLED;
            }
            this.setExecutionMode(ts, newMode);


            if (hstore_conf.site.txn_profiling && ts.profiler != null) ts.profiler.startPostPrepare();
            if (hstore_conf.site.exec_profiling) {
                this.profiler.network_time.start();
                this.profiler.sp3_local_time.start();
            }
            
            // We will send a prepare message to all of our remote HStoreSites
            // The coordinator needs to be smart enough to know whether a txn 
            // has already been marked as prepared at a partition (i.e., it's gotten
            // responses).
            PartitionSet partitions = ts.getPredictTouchedPartitions();
            LocalPrepareCallback callback = ts.getPrepareCallback();
            this.hstore_coordinator.transactionPrepare(ts, callback, partitions);
            if (hstore_conf.site.exec_profiling) this.profiler.network_time.stopIfStarted();
            ts.getDonePartitions().addAll(partitions);
        }
        // -------------------------------
        // ABORT: Distributed Transaction
        // -------------------------------
        else {
            // Send back the result to the client right now, since there's no way 
            // that we're magically going to be able to recover this and get them a result
            // This has to come before the network messages above because this will clean-up the 
            // LocalTransaction state information
            this.hstore_site.responseSend(ts, cresponse);
            
            // Send a message all the partitions involved that the party is over
            // and that they need to abort the transaction. We don't actually care when we get the
            // results back because we'll start working on new txns right away.
            // Note that when we call transactionFinish() right here this thread will then go on 
            // to invoke HStoreSite.transactionFinish() for us. That means when it returns we will
            // have successfully aborted the txn at least at all of the local partitions at this site.
            if (hstore_conf.site.txn_profiling && ts.profiler != null) ts.profiler.startPostFinish();
            LocalFinishCallback callback = ts.getFinishCallback();
            callback.init(ts, status);
            if (hstore_conf.site.exec_profiling) this.profiler.network_time.start();
            try {
                this.hstore_coordinator.transactionFinish(ts, status, callback);
            } finally {
                if (hstore_conf.site.exec_profiling) this.profiler.network_time.stopIfStarted();
            }
        }
    }
    
    /**
     * Enable speculative execution mode for this partition. The given transaction is 
     * the one that we will need to wait to finish before we can release the ClientResponses 
     * for any speculatively executed transactions. 
     * @param txn_id
     * @return true if speculative execution was enabled at this partition
     */
    private Status prepareTransaction(AbstractTransaction ts,
                                      PartitionCountingCallback<? extends AbstractTransaction> callback) {
        assert(ts != null) :
            "Unexpected null transaction handle at partition " + this.partitionId;
        assert(ts.isInitialized()) :
            String.format("Trying to prepare uninitialized transaction %s at partition %d", ts, this.partitionId);
        assert(ts.isMarkedFinished(this.partitionId) == false) :
            String.format("Trying to prepare %s again after it was already finished at partition %d", ts, this.partitionId);
        
        Status status = Status.OK; 
        
        // Skip if we've already invoked prepared for this txn at this partition
        if (ts.isMarkedPrepared(this.partitionId) == false) {
            if (debug.val)
                LOG.debug(String.format("%s - Preparing to commit txn at partition %d [specBlocked=%d]",
                          ts, this.partitionId, this.specExecBlocked.size()));
            
            ExecutionMode newMode = ExecutionMode.COMMIT_NONE;
            
            if (hstore_conf.site.exec_profiling && 
                   this.partitionId != ts.getBasePartition() &&
                   ts.needsFinish(this.partitionId)) {
                profiler.sp3_remote_time.start();
            }
            
            if (hstore_conf.site.specexec_enable) {
                // Check to see if there were any conflicts with the dtxn and any of its speculative
                // txns at this partition. If there were, then we know that we can't commit the txn here.
                if (this.specExecSkipAfter == false) {
                    for (LocalTransaction spec_ts : this.specExecBlocked) {
                        // Check whether we can quickly ignore this speculative txn because
                        // it was executed at a stall point where conflicts don't matter.
                        SpeculationType specType = spec_ts.getSpeculationType();
                        if (specType != SpeculationType.SP2_REMOTE_AFTER && specType != SpeculationType.SP1_LOCAL) {
                            continue;
                        }
                        
                        if (debug.val)
                            LOG.debug(String.format("%s - Checking for conflicts with speculative %s at partition %d [%s]",
                                      ts, spec_ts, this.partitionId,
                                      this.specExecChecker.getClass().getSimpleName()));
                        if (this.specExecChecker.hasConflictAfter(ts, spec_ts, this.partitionId)) {
                            if (debug.val)
                                LOG.debug(String.format("%s - Conflict found with speculative txn %s at partition %d",
                                          ts, spec_ts, this.partitionId));
                            status = Status.ABORT_RESTART;
                            break;
                        }
                    } // FOR
                }
                // Check whether the txn that we're waiting for is read-only.
                // If it is, then that means all read-only transactions can commit right away
                if (status == Status.OK && ts.isExecReadOnly(this.partitionId)) {
                    if (debug.val)
                        LOG.debug(String.format("%s - Txn is read-only at partition %d [readOnly=%s]",
                                  ts, this.partitionId, ts.isExecReadOnly(this.partitionId)));
                    newMode = ExecutionMode.COMMIT_READONLY;
                }
            }
            if (this.currentDtxn != null) this.setExecutionMode(ts, newMode);
        }
        // It's ok if they try to prepare the txn twice. That might just mean that they never
        // got the acknowledgement back in time if they tried to send an early commit message.
        else if (debug.val) {
            LOG.debug(String.format("%s - Already marked 2PC:PREPARE at partition %d", ts, this.partitionId));
        }


        // IMPORTANT
        // When we do an early 2PC-PREPARE, we won't have this callback ready
        // because we don't know what callback to use to send the acknowledgements
        // back over the network
        if (status == Status.OK) {
            if (callback.isInitialized()) {
                try {
                    callback.run(this.partitionId);
                } catch (Throwable ex) {
                    LOG.warn("Unexpected error for " + ts, ex);
                }
            }
            // But we will always mark ourselves as prepared at this partition
            ts.markPrepared(this.partitionId);
        } else {
            if (debug.val)
                LOG.debug(String.format("%s - Aborting txn from partition %d [%s]",
                          ts, this.partitionId, status));
            callback.abort(this.partitionId, status);
        }
        
        return (status);
    }
        
    /**
     * Internal call to abort/commit the transaction down in the execution engine
     * @param ts
     * @param commit
     */
    private void finishTransaction(AbstractTransaction ts, Status status) {
        assert(ts != null) :
            "Unexpected null transaction handle at partition " + this.partitionId;
        assert(ts.isInitialized()) :
            String.format("Trying to commit uninitialized transaction %s at partition %d", ts, this.partitionId);
        assert(ts.isMarkedFinished(this.partitionId) == false) :
            String.format("Trying to commit %s twice at partition %d", ts, this.partitionId);
        
        // Figure out what undoToken we need to process. This can be null if they haven't
        // submitted any work to the EE at this partition.
        // The logic for what we're doing is as follows:
        // (1) If we are committing, then we want the *last* undoToken because that
        //     will automatically commit everything up to that token (i.e., all the earlier
        //     tokens used by the txn.
        // (2) If we are aborting, then we want the *first* undo token
        //     because that will automatically rollback all of the tokens used by the txn
        //     that came after it.
        boolean commit = (status == Status.OK);
        long undoToken = (commit ? ts.getLastUndoToken(this.partitionId) :
                                   ts.getFirstUndoToken(this.partitionId));
        
        // Only commit/abort this transaction if:
        //  (2) We have the last undo token used by this transaction
        //  (3) The transaction was executed with undo buffers
        //  (4) The transaction actually submitted work to the EE
        //  (5) The transaction modified data at this partition
        if (ts.needsFinish(this.partitionId) && undoToken != HStoreConstants.NULL_UNDO_LOGGING_TOKEN) {
            if (trace.val)
                LOG.trace(String.format("%s - Invoking EE to finish work for txn [%s / speculative=%s]",
                          ts, status, ts.isSpeculative()));
            this.finishWorkEE(ts, undoToken, commit);
        }
        
        // We always need to do the following things regardless if we hit up the EE or not
        if (commit) this.lastCommittedTxnId = ts.getTransactionId();
        
        if (trace.val)
            LOG.trace(String.format("%s - Telling queue manager that txn is finished at partition %d",
                      ts, this.partitionId));
        this.queueManager.lockQueueFinished(ts, status, this.partitionId);
        
        if (debug.val)
            LOG.debug(String.format("%s - Successfully %sed transaction at partition %d",
                      ts, (commit ? "committ" : "abort"), this.partitionId));
        this.markTransactionFinished(ts);


    }
    
    /**
     * The real method that actually reaches down into the EE and commits/undos the changes 
     * for the given token.
     * Unless you know what you're doing, you probably want to be calling finishTransaction()
     * instead of calling this directly.
     * @param ts
     * @param undoToken
     * @param commit If true, then this txn will be committed. If false, the txn will be aborted.
     */
    private void finishWorkEE(AbstractTransaction ts, long undoToken, boolean commit) {
        assert(ts.isMarkedFinished(this.partitionId) == false) :
            String.format("Trying to commit %s twice at partition %d", ts, this.partitionId);
        
        // If the txn is completely read-only and they didn't use undo-logging, then
        // there is nothing that we need to do, except to check to make sure we aren't
        // trying to abort this txn
        if (undoToken == HStoreConstants.DISABLE_UNDO_LOGGING_TOKEN) {
            // SANITY CHECK: Make sure that they're not trying to undo a transaction that
            // modified the database but did not use undo logging
            if (ts.isExecReadOnly(this.partitionId) == false && commit == false) {
                String msg = String.format("TRYING TO ABORT TRANSACTION ON PARTITION %d WITHOUT UNDO LOGGING [undoToken=%d]",
                                           this.partitionId, undoToken); 
                LOG.fatal(msg + "\n" + ts.debug());
                this.crash(new ServerFaultException(msg, ts.getTransactionId()));
            }
            if (debug.val) LOG.debug(String.format("%s - undoToken == DISABLE_UNDO_LOGGING_TOKEN", ts));
        }
        // COMMIT / ABORT
        else {
            boolean needs_profiling = false;
            if (hstore_conf.site.txn_profiling && ts.isExecLocal(this.partitionId) && ((LocalTransaction)ts).profiler != null) {
                needs_profiling = true;
                ((LocalTransaction)ts).profiler.startPostEE();
            }
            assert(this.lastCommittedUndoToken != undoToken) :
                String.format("Trying to %s undoToken %d for %s twice at partition %d",
                              (commit ? "COMMIT" : "ABORT"), undoToken, ts, this.partitionId);
            
            // COMMIT!
            if (commit) {
                if (debug.val) {
                    LOG.debug(String.format("%s - COMMITING txn on partition %d with undoToken %d " +
                              "[lastTxnId=%d, lastUndoToken=%d, dtxn=%s]%s",
                              ts, this.partitionId, undoToken,
                              this.lastCommittedTxnId, this.lastCommittedUndoToken, this.currentDtxn,
                              (ts instanceof LocalTransaction ? " - " + ((LocalTransaction)ts).getSpeculationType() : "")));
                    if (this.specExecBlocked.isEmpty() == false && ts.isPredictSinglePartition() == false) {
                        LOG.debug(String.format("%s - # of Speculatively Executed Txns: %d ", ts, this.specExecBlocked.size()));
                    }
                }
                    
                assert(this.lastCommittedUndoToken < undoToken) :
                    String.format("Trying to commit undoToken %d for %s but it is less than the " +
                                  "last committed undoToken %d at partition %d\n" +
                                  "Last Committed Txn: %d",
                                  undoToken, ts, this.lastCommittedUndoToken, this.partitionId,
                                  this.lastCommittedTxnId);
                this.ee.releaseUndoToken(undoToken);
                this.lastCommittedUndoToken = undoToken;
            }
            // ABORT!
            else {
                // Evan says that txns will be aborted LIFO. This means the first txn that
                // we get in abortWork() will have a the greatest undoToken, which means that 
                // it will automagically rollback all other outstanding txns.
                // I'm lazy/tired, so for now I'll just rollback everything I get, but in theory
                // we should be able to check whether our undoToken has already been rolled back
                if (debug.val) {
                    LOG.debug(String.format("%s - ABORTING txn on partition %d with undoToken %d " +
                              "[lastTxnId=%d, lastUndoToken=%d, dtxn=%s]%s",
                              ts, this.partitionId, undoToken,
                              this.lastCommittedTxnId, this.lastCommittedUndoToken, this.currentDtxn,
                              (ts instanceof LocalTransaction ? " - " + ((LocalTransaction)ts).getSpeculationType() : "")));
                    if (this.specExecBlocked.isEmpty() == false && ts.isPredictSinglePartition() == false) {
                        LOG.debug(String.format("%s - # of Speculatively Executed Txns: %d ", ts, this.specExecBlocked.size()));
                    }
                }
                assert(this.lastCommittedUndoToken < undoToken) :
                    String.format("Trying to abort undoToken %d for %s but it is less than the " +
                                  "last committed undoToken %d at partition %d " +
                                  "[lastTxnId=%d, lastUndoToken=%d, dtxn=%s]%s",
                                  undoToken, ts, this.lastCommittedUndoToken, this.partitionId,
                                  this.lastCommittedTxnId, this.lastCommittedUndoToken, this.currentDtxn,
                                  (ts instanceof LocalTransaction ? " - " + ((LocalTransaction)ts).getSpeculationType() : ""));
                this.ee.undoUndoToken(undoToken);
            }
            if (needs_profiling) ((LocalTransaction)ts).profiler.stopPostEE();
        }
    }
    
    /**
     * Somebody told us that our partition needs to abort/commit the given transaction id.
     * This method should only be used for distributed transactions, because
     * it will do some extra work for speculative execution
     * @param ts - The transaction to finish up.
     * @param status - The final status of the transaction
     */
    private void finishDistributedTransaction(final AbstractTransaction ts, final Status status) {
        if (debug.val)
            LOG.debug(String.format("%s - Processing finish request at partition %d " +
                      "[status=%s, readOnly=%s]",
                      ts, this.partitionId,
                      status, ts.isExecReadOnly(this.partitionId)));
        if (this.currentDtxn == ts) {
            // 2012-11-22 -- Yes, today is Thanksgiving and I'm working on my database.
            // That's just grad student life I guess. Anyway, if you're reading this then 
            // you know that this is an important part of the system. We have a dtxn that 
            // we have been told is completely finished and now we need to either commit 
            // or abort any changes that it may have made at this partition. The tricky thing 
            // is that if we have speculative execution enabled, then we need to make sure
            // that we process any transactions that were executed while the dtxn was running
            // in the right order to ensure that we maintain serializability.
            // Here is the basic logic of what's about to happen:
            // 
            //  (1) If the dtxn is commiting, then we just need to commit the the last txn that 
            //      was executed (since this will have the largest undo token).
            //      The EE will automatically commit all undo tokens less than that.
            //  (2) If the dtxn is aborting, then we can commit any speculative txn that was 
            //      executed before the dtxn's first non-readonly undo token.
            //  
            //  Note that none of the speculative txns in the blocked queue will need to be
            //  aborted at this point, because we will have rolled back their changes immediately 
            //  when they aborted, so that our dtxn doesn't read dirty data.  
            if (this.specExecBlocked.isEmpty() == false) {
                // First thing we need to do is get the latch that will be set by any transaction
                // that was in the middle of being executed when we were called
                if (debug.val)
                    LOG.debug(String.format("%s - Checking %d blocked speculative transactions at " +
                              "partition %d [currentMode=%s]",
                              ts, this.specExecBlocked.size(), this.partitionId, this.currentExecMode));
                
                // -------------------------------
                // DTXN NON-READ-ONLY ABORT
                // If the dtxn did not modify this partition, then everthing can commit 
                // Otherwise, we want to commit anything that was executed before the dtxn started
                // -------------------------------
                if (status != Status.OK && ts.isExecReadOnly(this.partitionId) == false) {
                    // We need to get the first undo tokens for our distributed transaction
                    long dtxnUndoToken = ts.getFirstUndoToken(this.partitionId);
                    if (debug.val)
                        LOG.debug(String.format("%s - Looking for speculative txns to commit before we rollback undoToken %d",
                                  ts, dtxnUndoToken));
                    
                    // Queue of speculative txns that should be committed/aborted either 
                    // *before* or *after* we abort the distributed transaction
                    Collection<AbstractTransaction> allTxns = new TreeSet<AbstractTransaction>(this.specExecComparator);
                    allTxns.addAll(this.specExecBlocked);
                    allTxns.add(ts);
                    final List<LocalTransaction> toCommit = new ArrayList<LocalTransaction>();
                    final List<LocalTransaction> toAbortBefore = new ArrayList<LocalTransaction>();
                    final List<LocalTransaction> toAbortAfter = new ArrayList<LocalTransaction>();
                    
                    // Go through once and figure out which txns we need to abort
                    // We have to do this first because if we abort our dtxn then we
                    // could lose its read/write tracking set if we're using OCC
                    boolean useAfterQueue = true;
                    for (AbstractTransaction next : allTxns) {
                        if (ts == next) {
                            useAfterQueue = false;
                            continue;
                        }
                        // Otherwise it's as speculative txn.
                        // Let's figure out what we need to do with it.
                        LocalTransaction spec_ts = (LocalTransaction)next;
                        boolean shouldCommit = false;
                        long spec_token = spec_ts.getFirstUndoToken(this.partitionId);
                        if (debug.val)
                            LOG.debug(String.format("Speculative Txn %s [undoToken=%d, %s]",
                                      spec_ts, spec_token, spec_ts.getSpeculationType()));
                        
                        // Speculative txns should never be executed without an undo token
                        assert(spec_token != HStoreConstants.DISABLE_UNDO_LOGGING_TOKEN);
                        assert(spec_ts.isSpeculative()) : spec_ts + " is not marked as speculative!";
                        
                        // If the speculative undoToken is null, then this txn didn't execute
                        // any queries. That means we can always commit it
                        if (spec_token == HStoreConstants.NULL_UNDO_LOGGING_TOKEN) {
                            if (debug.val)
                                LOG.debug(String.format("Speculative Txn %s has a null undoToken at partition %d",
                                          spec_ts, this.partitionId));
                            toCommit.add(spec_ts);
                            continue;
                        }
                        
                        // Otherwise, look to see if this txn was speculatively executed before the 
                        // first undo token of the distributed txn. That means we know that this guy
                        // didn't read any modifications made by the dtxn.
                        if (spec_token < dtxnUndoToken) {
                            if (debug.val)
                                LOG.debug(String.format("Speculative Txn %s has an undoToken less than the dtxn %s " +
                                          "at partition %d [%d < %d]",
                                          spec_ts, ts, this.partitionId, spec_token, dtxnUndoToken));
                            shouldCommit = true;
                        }
                        // Ok so at this point we know that our spec txn came *after* the distributed txn
                        // started. So we need to use our checker to see whether there is a conflict
                        else if (this.specExecSkipAfter || this.specExecChecker.hasConflictAfter(ts, spec_ts, this.partitionId) == false) {
                            if (debug.val)
                                LOG.debug(String.format("Speculative Txn %s does not conflict with dtxn %s at partition %d",
                                          spec_ts, ts, this.partitionId));
                            shouldCommit = true;
                        }
                        if (useAfterQueue == false || shouldCommit == false) {
                            ClientResponseImpl spec_cr = spec_ts.getClientResponse();
                            MispredictionException error = new MispredictionException(spec_ts.getTransactionId(),
                                                                                      spec_ts.getTouchedPartitions());
                            spec_ts.setPendingError(error, false);
                            spec_cr.setStatus(Status.ABORT_SPECULATIVE);
                            (useAfterQueue ? toAbortAfter : toAbortBefore).add(spec_ts);
                        } else {
                            toCommit.add(spec_ts);
                        }
                        
                    } // FOR
                    
                    // (1) Process all of the aborting txns that need to come *before* 
                    //     we abort the dtxn
                    if (toAbortBefore.isEmpty() == false)
                        this.processClientResponseBatch(toAbortBefore, Status.ABORT_SPECULATIVE);
                    
                    // (2) Now abort the dtxn
                    this.finishTransaction(ts, status);
                    
                    // (3) Then abort all of the txn that need to come *after* we abort the dtxn
                    if (toAbortAfter.isEmpty() == false)
                        this.processClientResponseBatch(toAbortAfter, Status.ABORT_SPECULATIVE);
                    
                    // (4) Then blast out all of the txns that we want to commit
                    if (toCommit.isEmpty() == false)
                        this.processClientResponseBatch(toCommit, Status.OK);
                }
                // -------------------------------
                // DTXN READ-ONLY ABORT or DTXN COMMIT
                // -------------------------------
                else {
                    // **IMPORTANT**
                    // If the dtxn needs to commit, then all we need to do is get the 
                    // last undoToken that we've generated (since we know that it had to 
                    // have been used either by our distributed txn or for one of our 
                    // speculative txns).
                    //
                    // If the read-only dtxn needs to abort, then there's nothing we need to
                    // do, because it didn't make any changes. That means we can just
                    // commit the last speculatively executed transaction
                    //
                    // Once we have this token, we can just make a direct call to the EE
                    // to commit any changes that came before it. Note that we are using our
                    // special 'finishWorkEE' method that does not require us to provide
                    // the transaction that we're committing.
                    long undoToken = this.lastUndoToken;
                    if (debug.val)
                        LOG.debug(String.format("%s - Last undoToken at partition %d => %d",
                                  ts, this.partitionId, undoToken));
                    // Bombs away!
                    if (undoToken != this.lastCommittedUndoToken) {
                        this.finishWorkEE(ts, undoToken, true);
                        
                        // IMPORTANT: Make sure that we remove the dtxn from the lock queue!
                        // This is normally done in finishTransaction() but because we're trying
                        // to be clever and invoke the EE directly, we have to make sure that
                        // we call it ourselves.
                        this.queueManager.lockQueueFinished(ts, status, this.partitionId);
                    }
                    
                    // Make sure that we mark the dtxn as finished so that we don't
                    // try to do anything with it later on.
                    if (hstore_conf.site.exec_readwrite_tracking)
                        this.markTransactionFinished(ts);
                    else
                        ts.markFinished(this.partitionId);
                
                    // Now make sure that all of the speculative txns are processed without 
                    // committing (since we just committed any change that they could have made
                    // up above).
                    LocalTransaction spec_ts = null;
                    while ((spec_ts = this.specExecBlocked.pollFirst()) != null) {
                        ClientResponseImpl spec_cr = spec_ts.getClientResponse();
                        assert(spec_cr != null);
                        if (hstore_conf.site.exec_readwrite_tracking) 
                            this.markTransactionFinished(spec_ts);
                        else
                            spec_ts.markFinished(this.partitionId);
                            
                        try {
                            if (trace.val)
                                LOG.trace(String.format("%s - Releasing blocked ClientResponse for %s [status=%s]",
                                          ts, spec_ts, spec_cr.getStatus()));
                            this.processClientResponse(spec_ts, spec_cr);
                        } catch (Throwable ex) {
                            String msg = "Failed to complete queued response for " + spec_ts;
                            throw new ServerFaultException(msg, ex, ts.getTransactionId());
                        }
                    } // WHILE
                }
                this.specExecBlocked.clear();
                this.specExecModified = false;
                if (trace.val)
                    LOG.trace(String.format("Finished processing all queued speculative txns for dtxn %s", ts));
            }
            // -------------------------------
            // NO SPECULATIVE TXNS
            // -------------------------------
            else {
                // There are no speculative txns waiting for this dtxn, 
                // so we can just commit it right away
                if (debug.val)
                    LOG.debug(String.format("%s - No speculative txns at partition %d. Just %s txn by itself",
                              ts, this.partitionId, (status == Status.OK ? "commiting" : "aborting")));
                this.finishTransaction(ts, status);
            }
            
            // Clear our cached query results that are specific for this transaction
            // this.queryCache.purgeTransaction(ts.getTransactionId());
            
            // TODO: Remove anything in our queue for this txn
            // if (ts.hasQueuedWork(this.partitionId)) {
            // }
            
            // Check whether this is the response that the speculatively executed txns have been waiting for
            // We could have turned off speculative execution mode beforehand 
            if (debug.val)
                LOG.debug(String.format("%s - Attempting to unmark as the current DTXN at partition %d and " +
                          "setting execution mode to %s",
                          ts, this.partitionId, ExecutionMode.COMMIT_ALL));
            try {
                // Resetting the current_dtxn variable has to come *before* we change the execution mode
                this.resetCurrentDtxn();
                this.setExecutionMode(ts, ExecutionMode.COMMIT_ALL);
    
                // Release blocked transactions
                this.releaseBlockedTransactions(ts);
            } catch (Throwable ex) {
                String msg = String.format("Failed to finish %s at partition %d", ts, this.partitionId);
                throw new ServerFaultException(msg, ex, ts.getTransactionId());
            }
            
            if (hstore_conf.site.exec_profiling) {
                this.profiler.sp3_local_time.stopIfStarted();
                this.profiler.sp3_remote_time.stopIfStarted();
            }
        }
        // We were told told to finish a dtxn that is not the current one
        // at this partition. That's ok as long as it's aborting and not trying
        // to commit.
        else {
            assert(status != Status.OK) :
                String.format("Trying to commit %s at partition %d but the current dtxn is %s",
                              ts, this.partitionId, this.currentDtxn);
            this.queueManager.lockQueueFinished(ts, status, this.partitionId);
        }
        
        // -------------------------------
        // FINISH CALLBACKS
        // -------------------------------
        
        // MapReduceTransaction
        if (ts instanceof MapReduceTransaction) {
            PartitionCountingCallback<AbstractTransaction> callback = ((MapReduceTransaction)ts).getCleanupCallback();
            // We don't want to invoke this callback at the basePartition's site
            // because we don't want the parent txn to actually get deleted.
            if (this.partitionId == ts.getBasePartition()) {
                if (debug.val) 
                    LOG.debug(String.format("%s - Notifying %s that the txn is finished at partition %d",
                              ts, callback.getClass().getSimpleName(), this.partitionId));
                callback.run(this.partitionId);
            }
        }
        else {
            PartitionCountingCallback<AbstractTransaction> callback = ts.getFinishCallback();
            if (debug.val)
                LOG.debug(String.format("%s - Notifying %s that the txn is finished at partition %d",
                          ts, callback.getClass().getSimpleName(), this.partitionId));
            callback.run(this.partitionId);
        }
    }
    
    /**
     * Mark a transaction as being finished at this partition and clear out
     * any internal tracking stuff that we may have down in the EE.
     * @param ts
     */
    private void markTransactionFinished(AbstractTransaction ts) {
        if (hstore_conf.site.exec_readwrite_tracking && ts.hasExecutedWork(this.partitionId)) {
            this.ee.trackingFinish(ts.getTransactionId());
        }
        ts.markFinished(this.partitionId);
    }
    
    /**
     * Process a batch of completed txns. The first txn in the batch will be
     * committed/aborted in the EE. The rest will be marked as finished. 
     * @param batch
     * @param status
     */
    private void processClientResponseBatch(Collection<LocalTransaction> batch, Status status) {
        // Only processs the last txn in the list, since it will have the
        // the greatest undo token value.
        LocalTransaction targetTxn = null;
        if (status == Status.OK) {
            targetTxn = CollectionUtil.last(batch);
        } else {
            targetTxn = CollectionUtil.first(batch);
        }
        assert(targetTxn != null);
        long lastUndoToken = targetTxn.getFirstUndoToken(this.partitionId);
        this.finishWorkEE(targetTxn, lastUndoToken, (status == Status.OK));
        
        for (LocalTransaction ts : batch) {
            // Marking the txn as finished will prevent us from going down
            // into the EE to finish up the transaction.
            if (hstore_conf.site.exec_readwrite_tracking)
                this.markTransactionFinished(ts);
            else
                ts.markFinished(this.partitionId);
            
            // Send out the ClientResponse to whomever wants it!
            if (debug.val)
                LOG.debug(String.format("%s - Releasing blocked ClientResponse for %s [status=%s]",
                          ts, ts, ts.getClientResponse().getStatus()));
            try {
                this.processClientResponse(ts, ts.getClientResponse());
            } catch (Throwable ex) {
                String msg = "Failed to complete queued response for " + ts;
                throw new ServerFaultException(msg, ex, ts.getTransactionId());
            }
        } // FOR
    }
    
    private void blockTransaction(InternalTxnMessage work) {
        if (debug.val)
            LOG.debug(String.format("%s - Adding %s work to blocked queue",
                      work.getTransaction(), work.getClass().getSimpleName()));
        assert(this.currentDtxn != null) :
            String.format("Trying to block %s for %s at partition %d but the current dtxn is null",
                          work, work.getTransaction(), this.partitionId);
        assert(this.currentDtxn != work.getTransaction()) :
            String.format("Trying to block %s for %s at partition %d but it is the current dtxn",
                          work, work.getTransaction(), this.partitionId);
        this.currentBlockedTxns.add(work);
    }
    
    private void blockTransaction(LocalTransaction ts) {
        this.blockTransaction(new StartTxnMessage(ts));
    }


    /**
     * Release all the transactions that are currently in this partition's blocked queue 
     * into the work queue.
     * @param ts
     */
    private void releaseBlockedTransactions(AbstractTransaction ts) {
        if (this.currentBlockedTxns.isEmpty() == false) {
            if (debug.val)
                LOG.debug(String.format("Attempting to release %d blocked transactions at partition %d because of %s",
                          this.currentBlockedTxns.size(), this.partitionId, ts));
            this.work_queue.addAll(this.currentBlockedTxns);
            int released = this.currentBlockedTxns.size();
            this.currentBlockedTxns.clear();
            if (debug.val) LOG.debug(String.format("Released %d blocked transactions at partition %d because of %s",
                                           released, this.partitionId, ts));
        }
        assert(this.currentBlockedTxns.isEmpty());
    }
    
    // ---------------------------------------------------------------
    // SNAPSHOT METHODS
    // ---------------------------------------------------------------
    
    /**
     * Do snapshot work exclusively until there is no more. Also blocks
     * until the syncing and closing of snapshot data targets has completed.
     */
    public void initiateSnapshots(Deque<SnapshotTableTask> tasks) {
        m_snapshotter.initiateSnapshots(ee, tasks);
    }


    public Collection<Exception> completeSnapshotWork() throws InterruptedException {
        LOG.warn("completeSnapshotWork at partition :"+this.getPartitionId());
        return m_snapshotter.completeSnapshotWork(ee);
    }    
    
    
    // ---------------------------------------------------------------
    // SHUTDOWN METHODS
    // ---------------------------------------------------------------
    
    /**
     * Cause this PartitionExecutor to make the entire HStore cluster shutdown
     * This won't return!
     */
    public synchronized void crash(Throwable ex) {
        String msg = String.format("PartitionExecutor for Partition #%d is crashing", this.partitionId);
        if (ex == null) LOG.warn(msg);
        else LOG.warn(msg, ex);
        
        assert(this.hstore_coordinator != null);
        this.hstore_coordinator.shutdownClusterBlocking(ex);
    }
    
    @Override
    public boolean isShuttingDown() {
        return (this.hstore_site.isShuttingDown()); // shutdown_state == State.PREPARE_SHUTDOWN || this.shutdown_state == State.SHUTDOWN);
    }
    
    @Override
    public void prepareShutdown(boolean error) {
        this.shutdown_state = ShutdownState.PREPARE_SHUTDOWN;
    }
    
    /**
     * Somebody from the outside wants us to shutdown
     */
    public synchronized void shutdown() {
        if (this.shutdown_state == ShutdownState.SHUTDOWN) {
            if (debug.val) LOG.debug(String.format("Partition #%d told to shutdown again. Ignoring...", this.partitionId));
            return;
        }
        this.shutdown_state = ShutdownState.SHUTDOWN;
        
        if (debug.val) LOG.debug(String.format("Shutting down PartitionExecutor for Partition #%d", this.partitionId));
        
        // Clear the queue
        this.work_queue.clear();
        
        // Knock out this ma
        if (this.m_snapshotter != null) this.m_snapshotter.shutdown();
        
        // Make sure we shutdown our threadpool
        // this.thread_pool.shutdownNow();
        if (this.self != null) this.self.interrupt();
        
        if (this.shutdown_latch != null) {
            try {
                this.shutdown_latch.acquire();
            } catch (InterruptedException ex) {
                // Ignore
            } catch (Exception ex) {
                LOG.fatal("Unexpected error while shutting down", ex);
            }
        }
    }
    
    // ----------------------------------------------------------------------------
    // DEBUG METHODS
    // ----------------------------------------------------------------------------
    
    @Override
    public String toString() {
        return String.format("%s{%s}", this.getClass().getSimpleName(),
                                       HStoreThreadManager.formatPartitionName(siteId, partitionId));
    }
    
    public class Debug implements DebugContext {
        public VoltProcedure getVoltProcedure(String procName) {
            Procedure proc = catalogContext.procedures.getIgnoreCase(procName);
            return (PartitionExecutor.this.getVoltProcedure(proc.getId()));
        }
        public SpecExecScheduler getSpecExecScheduler() {
            return (PartitionExecutor.this.specExecScheduler);
        }
        public AbstractConflictChecker getSpecExecConflictChecker() {
            return (PartitionExecutor.this.specExecChecker);
        }
        public Collection<BatchPlanner> getBatchPlanners() {
            return (PartitionExecutor.this.batchPlanners.values());
        }
        public PartitionExecutorProfiler getProfiler() {
            return (PartitionExecutor.this.profiler);
        }
        public Thread getExecutionThread() {
            return (PartitionExecutor.this.self);
        }
        public Queue<InternalMessage> getWorkQueue() {
            return (PartitionExecutor.this.work_queue);
        }
        public void setExecutionMode(AbstractTransaction ts, ExecutionMode newMode) {
            PartitionExecutor.this.setExecutionMode(ts, newMode);
        }
        public ExecutionMode getExecutionMode() {
            return (PartitionExecutor.this.currentExecMode);
        }
        public Long getLastExecutedTxnId() {
            return (PartitionExecutor.this.lastExecutedTxnId);
        }
        public Long getLastCommittedTxnId() {
            return (PartitionExecutor.this.lastCommittedTxnId);
        }
        public long getLastCommittedIndoToken() {
            return (PartitionExecutor.this.lastCommittedUndoToken);
        }
        /**
         * Get the VoltProcedure handle of the current running txn. This could be null.
         * <B>FOR TESTING ONLY</B> 
         */
        public VoltProcedure getCurrentVoltProcedure() {
            return (PartitionExecutor.this.currentVoltProc);
        }
        /**
         * Get the txnId of the current distributed transaction at this partition
         * <B>FOR TESTING ONLY</B> 
         */
        public AbstractTransaction getCurrentDtxn() {
            return (PartitionExecutor.this.currentDtxn);
        }
        /**
         * Get the txnId of the current distributed transaction at this partition
         * <B>FOR TESTING ONLY</B>
         */
        public Long getCurrentDtxnId() {
            Long ret = null;
            // This is a race condition, so we'll just ignore any errors
            if (PartitionExecutor.this.currentDtxn != null) { 
                try {
                    ret = PartitionExecutor.this.currentDtxn.getTransactionId();
                } catch (NullPointerException ex) {
                    // IGNORE
                }
            } 
            return (ret);
        }
        public Long getCurrentTxnId() {
            return (PartitionExecutor.this.currentTxnId);
        }
        public int getBlockedWorkCount() {
            return (PartitionExecutor.this.currentBlockedTxns.size());
        }
        /**
         * Return the number of spec exec txns have completed but are waiting
         * for the distributed txn to finish at this partition
         */
        public int getBlockedSpecExecCount() {
            return (PartitionExecutor.this.specExecBlocked.size());
        }
        public int getWorkQueueSize() {
            return (PartitionExecutor.this.work_queue.size());
        }
        public void updateMemory() {
            PartitionExecutor.this.updateMemoryStats(EstTime.currentTimeMillis());
        }
        /**
         * Replace the ConflictChecker. This should only be used for testing
         * @param checker
         */
        protected void setConflictChecker(AbstractConflictChecker checker) {
            LOG.warn(String.format("Replacing original checker %s with %s at partition %d",
                     specExecChecker.getClass().getSimpleName(),
                     checker.getClass().getSimpleName(),
                     partitionId));
            setSpecExecChecker(checker);
        }
    }
    
    private Debug cachedDebugContext;
    public Debug getDebugContext() {
        if (this.cachedDebugContext == null) {
            // We don't care if we're thread-safe here...
            this.cachedDebugContext = new Debug();
        }
        return this.cachedDebugContext;
    }
}
Source Code of edu.brown.hstore.PartitionExecutor$DonePartitionsNotification

Related Classes of edu.brown.hstore.PartitionExecutor$DonePartitionsNotification