package edu.brown.hstore;
import java.net.InetSocketAddress;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeMap;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import org.apache.log4j.Logger;
import org.voltdb.CatalogContext;
import org.voltdb.ParameterSet;
import org.voltdb.VoltTable;
import org.voltdb.catalog.Database;
import org.voltdb.catalog.Host;
import org.voltdb.catalog.Partition;
import org.voltdb.catalog.Procedure;
import org.voltdb.catalog.Site;
import org.voltdb.catalog.Table;
import org.voltdb.exceptions.SerializableException;
import org.voltdb.exceptions.ServerFaultException;
import org.voltdb.messaging.FastSerializer;
import org.voltdb.utils.EstTime;
import org.voltdb.utils.Pair;
import com.google.protobuf.ByteString;
import com.google.protobuf.RpcCallback;
import com.google.protobuf.RpcController;
import edu.brown.catalog.CatalogUtil;
import edu.brown.hstore.Hstoreservice.HStoreService;
import edu.brown.hstore.Hstoreservice.HeartbeatRequest;
import edu.brown.hstore.Hstoreservice.HeartbeatResponse;
import edu.brown.hstore.Hstoreservice.InitializeRequest;
import edu.brown.hstore.Hstoreservice.InitializeResponse;
import edu.brown.hstore.Hstoreservice.SendDataRequest;
import edu.brown.hstore.Hstoreservice.SendDataResponse;
import edu.brown.hstore.Hstoreservice.ShutdownPrepareRequest;
import edu.brown.hstore.Hstoreservice.ShutdownPrepareResponse;
import edu.brown.hstore.Hstoreservice.ShutdownRequest;
import edu.brown.hstore.Hstoreservice.ShutdownResponse;
import edu.brown.hstore.Hstoreservice.Status;
import edu.brown.hstore.Hstoreservice.TimeSyncRequest;
import edu.brown.hstore.Hstoreservice.TimeSyncResponse;
import edu.brown.hstore.Hstoreservice.TransactionDebugRequest;
import edu.brown.hstore.Hstoreservice.TransactionDebugResponse;
import edu.brown.hstore.Hstoreservice.TransactionFinishRequest;
import edu.brown.hstore.Hstoreservice.TransactionFinishResponse;
import edu.brown.hstore.Hstoreservice.TransactionInitRequest;
import edu.brown.hstore.Hstoreservice.TransactionInitResponse;
import edu.brown.hstore.Hstoreservice.TransactionMapRequest;
import edu.brown.hstore.Hstoreservice.TransactionMapResponse;
import edu.brown.hstore.Hstoreservice.TransactionPrefetchAcknowledgement;
import edu.brown.hstore.Hstoreservice.TransactionPrefetchResult;
import edu.brown.hstore.Hstoreservice.TransactionPrepareRequest;
import edu.brown.hstore.Hstoreservice.TransactionPrepareResponse;
import edu.brown.hstore.Hstoreservice.TransactionRedirectRequest;
import edu.brown.hstore.Hstoreservice.TransactionRedirectResponse;
import edu.brown.hstore.Hstoreservice.TransactionReduceRequest;
import edu.brown.hstore.Hstoreservice.TransactionReduceResponse;
import edu.brown.hstore.Hstoreservice.TransactionWorkRequest;
import edu.brown.hstore.Hstoreservice.TransactionWorkResponse;
import edu.brown.hstore.Hstoreservice.UnevictDataRequest;
import edu.brown.hstore.Hstoreservice.UnevictDataRequest.Builder;
import edu.brown.hstore.Hstoreservice.UnevictDataResponse;
import edu.brown.hstore.Hstoreservice.WorkFragment;
import edu.brown.hstore.callbacks.LocalInitQueueCallback;
import edu.brown.hstore.callbacks.ShutdownPrepareCallback;
import edu.brown.hstore.callbacks.LocalFinishCallback;
import edu.brown.hstore.callbacks.TransactionPrefetchCallback;
import edu.brown.hstore.callbacks.LocalPrepareCallback;
import edu.brown.hstore.callbacks.TransactionRedirectResponseCallback;
import edu.brown.hstore.conf.HStoreConf;
import edu.brown.hstore.dispatchers.TransactionFinishDispatcher;
import edu.brown.hstore.dispatchers.TransactionInitDispatcher;
import edu.brown.hstore.dispatchers.TransactionRedirectDispatcher;
import edu.brown.hstore.handlers.SendDataHandler;
import edu.brown.hstore.handlers.TransactionFinishHandler;
import edu.brown.hstore.handlers.TransactionInitHandler;
import edu.brown.hstore.handlers.TransactionMapHandler;
import edu.brown.hstore.handlers.TransactionPrefetchHandler;
import edu.brown.hstore.handlers.TransactionPrepareHandler;
import edu.brown.hstore.handlers.TransactionReduceHandler;
import edu.brown.hstore.handlers.TransactionWorkHandler;
import edu.brown.hstore.specexec.PrefetchQueryPlanner;
import edu.brown.hstore.txns.AbstractTransaction;
import edu.brown.hstore.txns.DependencyTracker;
import edu.brown.hstore.txns.LocalTransaction;
import edu.brown.hstore.txns.RemoteTransaction;
import edu.brown.hstore.txns.TransactionUtil;
import edu.brown.hstore.util.TransactionCounter;
import edu.brown.interfaces.Shutdownable;
import edu.brown.logging.LoggerUtil;
import edu.brown.logging.LoggerUtil.LoggerBoolean;
import edu.brown.protorpc.NIOEventLoop;
import edu.brown.protorpc.ProtoRpcChannel;
import edu.brown.protorpc.ProtoRpcController;
import edu.brown.protorpc.ProtoServer;
import edu.brown.utils.EventObservable;
import edu.brown.utils.PartitionSet;
import edu.brown.utils.StringUtil;
import edu.brown.utils.ThreadUtil;
/**
*
* @author pavlo
*/
public class HStoreCoordinator implements Shutdownable {
private static final Logger LOG = Logger.getLogger(HStoreCoordinator.class);
private static final LoggerBoolean debug = new LoggerBoolean();
private static final LoggerBoolean trace = new LoggerBoolean();
static {
LoggerUtil.attachObserver(LOG, debug, trace);
}
// ----------------------------------------------------------------------------
// INTERNAL STATE
// ----------------------------------------------------------------------------
private final HStoreSite hstore_site;
private final HStoreConf hstore_conf;
private final CatalogContext catalogContext;
private final Site catalog_site;
private final int num_sites;
private final int local_site_id;
/** SiteId -> HStoreService */
private final HStoreService channels[];
private final Thread listener_thread;
private final ProtoServer listener;
private final HStoreService remoteService;
private final NIOEventLoop eventLoop = new NIOEventLoop();
private Shutdownable.ShutdownState state = ShutdownState.INITIALIZED;
private final ThreadLocal<FastSerializer> serializers = new ThreadLocal<FastSerializer>() {
protected FastSerializer initialValue() {
return new FastSerializer(); // TODO: Use pooled memory
};
};
/**
* Special observable that is invoked when this HStoreCoordinator is on-line
* and ready to communicating with other nodes in the cluster.
*/
private final EventObservable<HStoreCoordinator> ready_observable = new EventObservable<HStoreCoordinator>();
// ----------------------------------------------------------------------------
// HANDLERS
// ----------------------------------------------------------------------------
private final TransactionInitHandler transactionInit_handler;
private final TransactionWorkHandler transactionWork_handler;
private final TransactionPrefetchHandler transactionPrefetch_handler;
private final TransactionMapHandler transactionMap_handler;
private final TransactionReduceHandler transactionReduce_handler;
private final TransactionPrepareHandler transactionPrepare_handler;
private final TransactionFinishHandler transactionFinish_handler;
private final SendDataHandler sendData_handler;
// ----------------------------------------------------------------------------
// DISPATCHERS
// ----------------------------------------------------------------------------
private final TransactionInitDispatcher transactionInit_dispatcher;
private final TransactionFinishDispatcher transactionFinish_dispatcher;
private final TransactionRedirectDispatcher transactionRedirect_dispatcher;
private final List<Thread> dispatcherThreads = new ArrayList<Thread>();
// ----------------------------------------------------------------------------
// QUERY PREFETCHING
// ----------------------------------------------------------------------------
private final TransactionPrefetchCallback transactionPrefetch_callback;
private final PrefetchQueryPlanner prefetchPlanner;
// ----------------------------------------------------------------------------
// MESSENGER LISTENER THREAD
// ----------------------------------------------------------------------------
/**
*
*/
private class MessengerListener implements Runnable {
@Override
public void run() {
Thread self = Thread.currentThread();
self.setName(HStoreThreadManager.getThreadName(hstore_site, HStoreConstants.THREAD_NAME_COORDINATOR));
hstore_site.getThreadManager().registerProcessingThread();
Throwable error = null;
try {
HStoreCoordinator.this.eventLoop.run();
} catch (Throwable ex) {
error = ex;
}
if (error != null) {
if (hstore_site.isShuttingDown() == false) {
LOG.error(this.getClass().getSimpleName() + " has stopped!", error);
}
Throwable cause = null;
if (error instanceof ServerFaultException && error.getCause() != null) {
if (error.getCause().getMessage() != null && error.getCause().getMessage().isEmpty() == false) {
cause = error.getCause();
}
}
if (cause == null) cause = error;
// These errors are ok if we're actually stopping...
if (HStoreCoordinator.this.state == ShutdownState.SHUTDOWN ||
HStoreCoordinator.this.state == ShutdownState.PREPARE_SHUTDOWN ||
HStoreCoordinator.this.hstore_site.isShuttingDown()) {
// IGNORE
} else {
LOG.fatal("Unexpected error in messenger listener thread", cause);
HStoreCoordinator.this.shutdownCluster(error);
}
}
if (trace.val)
LOG.trace("Messenger Thread for Site #" + catalog_site.getId() + " has stopped!");
}
}
// ----------------------------------------------------------------------------
// HEARTBEAT CALLBACK
// ----------------------------------------------------------------------------
private final RpcCallback<HeartbeatResponse> heartbeatCallback = new RpcCallback<HeartbeatResponse>() {
@Override
public void run(HeartbeatResponse response) {
if (response.getStatus() == Status.OK) {
if (trace.val)
LOG.trace(String.format("%s %s -> %s [%s]",
response.getClass().getSimpleName(),
HStoreThreadManager.formatSiteName(response.getSenderSite()),
HStoreThreadManager.formatSiteName(local_site_id),
response.getStatus()));
// FIXME: We need to actually store the heartbeat updates somewhere...
assert(response.getSenderSite() != local_site_id);
}
}
};
// ----------------------------------------------------------------------------
// UNEVICT CALLBACK
// ----------------------------------------------------------------------------
private RpcCallback<UnevictDataResponse> unevictCallback = new RpcCallback<UnevictDataResponse>() {
@Override
public void run(UnevictDataResponse response) {
if (response.getStatus() == Status.OK) {
if (trace.val)
LOG.trace(String.format("%s %s -> %s [%s]",
response.getClass().getSimpleName(),
HStoreThreadManager.formatSiteName(response.getSenderSite()),
HStoreThreadManager.formatSiteName(local_site_id),
response.getStatus()));
long oldTxnId = response.getTransactionId();
// int partition = response.getPartitionId();
LocalTransaction ts = hstore_site.getTransaction(oldTxnId);
assert(response.getSenderSite() != local_site_id);
hstore_site.getTransactionInitializer().resetTransactionId(ts, ts.getBasePartition());
if (debug.val)
LOG.debug(String.format("transaction %d is being restarted", ts.getTransactionId()));
LocalInitQueueCallback initCallback = (LocalInitQueueCallback)ts.getInitCallback();
hstore_site.getCoordinator().transactionInit(ts, initCallback);
}
}
};
// ----------------------------------------------------------------------------
// INITIALIZATION
// ----------------------------------------------------------------------------
/**
* Constructor
* @param hstore_site
*/
public HStoreCoordinator(HStoreSite hstore_site) {
this.hstore_site = hstore_site;
this.hstore_conf = this.hstore_site.getHStoreConf();
this.catalogContext = this.hstore_site.getCatalogContext();
this.catalog_site = this.hstore_site.getSite();
this.local_site_id = this.catalog_site.getId();
this.num_sites = this.hstore_site.getCatalogContext().numberOfSites;
this.channels = new HStoreService[this.num_sites];
if (debug.val)
LOG.debug(String.format("Local Partitions for Site #%d: %s",
hstore_site.getSiteId(), hstore_site.getLocalPartitionIds()));
// Incoming RPC Handler
this.remoteService = this.initHStoreService();
// This listener thread will process incoming messages
this.listener = new ProtoServer(this.eventLoop);
// Special dispatcher threads to handle incoming requests
// These are used so that we can process messages in a different thread than the main HStoreCoordinator thread
// TransactionInitDispatcher
if (hstore_conf.site.coordinator_init_thread) {
this.transactionInit_dispatcher = new TransactionInitDispatcher(this.hstore_site, this);
String name = HStoreThreadManager.getThreadName(this.hstore_site, "coord", "init");
Thread t = new Thread(this.transactionInit_dispatcher, name);
this.dispatcherThreads.add(t);
} else {
this.transactionInit_dispatcher = null;
}
// TransactionFinishDispatcher
if (hstore_conf.site.coordinator_finish_thread) {
this.transactionFinish_dispatcher = new TransactionFinishDispatcher(this.hstore_site, this);
String name = HStoreThreadManager.getThreadName(this.hstore_site, "coord", "finish");
Thread t = new Thread(this.transactionInit_dispatcher, name);
this.dispatcherThreads.add(t);
} else {
this.transactionFinish_dispatcher = null;
}
// TransactionRedirectDispatcher
if (hstore_conf.site.coordinator_redirect_thread) {
this.transactionRedirect_dispatcher = new TransactionRedirectDispatcher(this.hstore_site, this);
String name = HStoreThreadManager.getThreadName(this.hstore_site, "coord", "redirect");
Thread t = new Thread(this.transactionInit_dispatcher, name);
this.dispatcherThreads.add(t);
} else {
this.transactionRedirect_dispatcher = null;
}
this.transactionInit_handler = new TransactionInitHandler(hstore_site, this, this.transactionInit_dispatcher);
this.transactionWork_handler = new TransactionWorkHandler(hstore_site, this);
this.transactionPrefetch_handler = new TransactionPrefetchHandler(hstore_site, this);
this.transactionMap_handler = new TransactionMapHandler(hstore_site, this);
this.transactionReduce_handler = new TransactionReduceHandler(hstore_site,this);
this.transactionPrepare_handler = new TransactionPrepareHandler(hstore_site, this);
this.transactionFinish_handler = new TransactionFinishHandler(hstore_site, this, this.transactionFinish_dispatcher);
this.sendData_handler = new SendDataHandler(hstore_site, this);
// Wrap the listener in a daemon thread
this.listener_thread = new Thread(new MessengerListener());
this.listener_thread.setDaemon(true);
this.eventLoop.setExitOnSigInt(true);
// Initialize the PrefetchQueryPlanner if we're allowed to execute
// speculative queries and we actually have some in the catalog
PrefetchQueryPlanner tmpPlanner = null;
if (hstore_conf.site.exec_prefetch_queries) {
boolean has_prefetch = false;
for (Procedure catalog_proc : this.catalogContext.procedures.values()) {
if (catalog_proc.getPrefetchable()) {
has_prefetch = true;
break;
}
}
if (has_prefetch) {
tmpPlanner = new PrefetchQueryPlanner(this.catalogContext,
hstore_site.getPartitionEstimator());
}
}
this.prefetchPlanner = tmpPlanner;
this.transactionPrefetch_callback = (this.prefetchPlanner != null ? new TransactionPrefetchCallback() : null);
}
protected HStoreService initHStoreService() {
return (new RemoteServiceHandler());
}
/**
* Start the messenger. This is a blocking call that will initialize the connections
* and start the listener thread!
*/
public synchronized void start() {
assert(this.state == ShutdownState.INITIALIZED) : "Invalid MessengerState " + this.state;
this.state = ShutdownState.STARTED;
if (debug.val) LOG.debug("Initializing connections");
this.initConnections();
for (Thread t : this.dispatcherThreads) {
if (debug.val) LOG.debug("Starting dispatcher thread: " + t.getName());
t.setDaemon(true);
t.start();
} // FOR
if (debug.val) LOG.debug("Starting listener thread");
this.listener_thread.start();
// If we're at site zero, then we'll announce our instanceId
// to everyone in the cluster
if (this.local_site_id == 0) {
this.initCluster();
}
if (hstore_conf.site.coordinator_sync_time) {
syncClusterTimes();
}
this.ready_observable.notifyObservers(this);
}
/**
* Returns true if the messenger has started
* @return
*/
public boolean isStarted() {
return (this.state == ShutdownState.STARTED);
}
/**
* Internal call for testing to hide errors
*/
@Override
public void prepareShutdown(boolean error) {
if (this.state != ShutdownState.PREPARE_SHUTDOWN) {
assert(this.state == ShutdownState.STARTED) : "Invalid HStoreCoordinator State " + this.state;
this.state = ShutdownState.PREPARE_SHUTDOWN;
}
}
/**
* Stop this HStoreCoordinator. This kills the ProtoRPC messenger event loop
*/
@Override
public synchronized void shutdown() {
assert(this.state == ShutdownState.STARTED || this.state == ShutdownState.PREPARE_SHUTDOWN) :
"Invalid MessengerState " + this.state;
this.state = ShutdownState.SHUTDOWN;
try {
// Kill all of our dispatchers
for (Thread thread : this.dispatcherThreads) {
if (trace.val) LOG.trace("Stopping dispatcher thread " + thread.getName());
thread.interrupt();
} // FOR
if (trace.val) LOG.trace("Stopping eventLoop for Site #" + this.getLocalSiteId());
this.eventLoop.exitLoop();
if (trace.val) LOG.trace("Stopping listener thread for Site #" + this.getLocalSiteId());
this.listener_thread.interrupt();
if (trace.val) LOG.trace("Joining on listener thread for Site #" + this.getLocalSiteId());
this.listener_thread.join();
} catch (InterruptedException ex) {
// IGNORE
} catch (Throwable ex) {
LOG.error("Unexpected error when trying to stop messenger for Site #" + this.getLocalSiteId(), ex);
} finally {
if (trace.val) LOG.trace("Closing listener socket for Site #" + this.getLocalSiteId());
this.listener.close();
}
}
/**
* Returns true if the messenger has stopped
* @return
*/
@Override
public boolean isShuttingDown() {
return (this.state == ShutdownState.PREPARE_SHUTDOWN);
}
public boolean isShutdownOrPrepareShutDown() {
return (this.state == ShutdownState.PREPARE_SHUTDOWN || this.state == ShutdownState.SHUTDOWN);
}
protected int getLocalSiteId() {
return (this.local_site_id);
}
protected int getLocalMessengerPort() {
return (this.hstore_site.getSite().getMessenger_port());
}
protected final Thread getListenerThread() {
return (this.listener_thread);
}
public HStoreService getChannel(int site_id) {
return (this.channels[site_id]);
}
public HStoreService getHandler() {
return (this.remoteService);
}
public EventObservable<HStoreCoordinator> getReadyObservable() {
return (this.ready_observable);
}
public TransactionInitHandler getTransactionInitHandler() {
return (this.transactionInit_handler);
}
public TransactionFinishHandler getTransactionFinishHandler() {
return (this.transactionFinish_handler);
}
public void setUnevictCallback(RpcCallback<UnevictDataResponse> callback){
this.unevictCallback = callback;
}
/**
* Initialize all the network connections to remote
*
*/
private void initConnections() {
if (debug.val) LOG.debug("Configuring outbound network connections for Site #" + this.catalog_site.getId());
// Initialize inbound channel
Integer local_port = this.catalog_site.getMessenger_port();
assert(local_port != null);
if (debug.val) LOG.debug("Binding listener to port " + local_port + " for Site #" + this.catalog_site.getId());
this.listener.register(this.remoteService);
this.listener.bind(local_port);
// Find all the destinations we need to connect to
// Make the outbound connections
List<Pair<Integer, InetSocketAddress>> destinations = HStoreCoordinator.getRemoteCoordinators(this.catalog_site);
if (destinations.isEmpty()) {
if (debug.val) LOG.debug("There are no remote sites so we are skipping creating connections");
}
else {
if (debug.val) LOG.debug("Connecting to " + destinations.size() + " remote site messengers");
ProtoRpcChannel[] channels = null;
InetSocketAddress arr[] = new InetSocketAddress[destinations.size()];
for (int i = 0; i < arr.length; i++) {
arr[i] = destinations.get(i).getSecond();
if (debug.val) LOG.debug("Attemping to connect to " + arr[i]);
} // FOR
int tries = hstore_conf.site.network_startup_retries;
boolean success = false;
Throwable error = null;
while (tries-- > 0 && success == false) {
try {
channels = ProtoRpcChannel.connectParallel(this.eventLoop,
arr,
hstore_conf.site.network_startup_wait);
success = true;
} catch (Throwable ex) {
if (tries > 0) {
LOG.warn("Failed to connect to remote sites. Going to try again...");
continue;
}
}
} // WHILE
if (success == false) {
LOG.fatal("Site #" + this.getLocalSiteId() + " failed to connect to remote sites");
this.listener.close();
throw new RuntimeException(error);
}
assert channels.length == destinations.size();
for (int i = 0; i < channels.length; i++) {
Pair<Integer, InetSocketAddress> p = destinations.get(i);
this.channels[p.getFirst()] = HStoreService.newStub(channels[i]);
} // FOR
if (debug.val) LOG.debug("Site #" + this.getLocalSiteId() + " is fully connected to all sites");
}
}
protected void initCluster() {
long instanceId = EstTime.currentTimeMillis();
hstore_site.setInstanceId(instanceId);
InitializeRequest request = InitializeRequest.newBuilder()
.setSenderSite(0)
.setInstanceId(instanceId)
.build();
final CountDownLatch latch = new CountDownLatch(this.num_sites-1);
RpcCallback<InitializeResponse> callback = new RpcCallback<InitializeResponse>() {
@Override
public void run(InitializeResponse parameter) {
if (debug.val)
LOG.debug(String.format("Initialization Response: %s / %s",
HStoreThreadManager.formatSiteName(parameter.getSenderSite()),
parameter.getStatus()));
latch.countDown();
}
};
for (int site_id = 0; site_id < this.num_sites; site_id++) {
if (site_id == this.local_site_id) continue;
ProtoRpcController controller = new ProtoRpcController();
this.channels[site_id].initialize(controller, request, callback);
} // FOR
if (latch.getCount() > 0) {
if (debug.val)
LOG.debug(String.format("Waiting for %s initialization responses", latch.getCount()));
boolean finished = false;
try {
finished = latch.await(10, TimeUnit.SECONDS);
} catch (InterruptedException ex) {
throw new ServerFaultException("Unexpected interruption", ex);
}
assert(finished);
}
}
// ----------------------------------------------------------------------------
// HSTORE RPC SERVICE METHODS
// ----------------------------------------------------------------------------
/**
* We want to make this a private inner class so that we do not expose
* the RPC methods to other parts of the code.
*/
private class RemoteServiceHandler extends HStoreService {
@Override
public void transactionInit(RpcController controller, TransactionInitRequest request, RpcCallback<TransactionInitResponse> callback) {
try {
transactionInit_handler.remoteQueue(controller, request, callback);
} catch (Throwable ex) {
shutdownCluster(ex);
}
}
@Override
public void transactionWork(RpcController controller, TransactionWorkRequest request, RpcCallback<TransactionWorkResponse> callback) {
try {
transactionWork_handler.remoteHandler(controller, request, callback);
} catch (Throwable ex) {
shutdownCluster(ex);
}
}
@Override
public void transactionPrefetch(RpcController controller, TransactionPrefetchResult request, RpcCallback<TransactionPrefetchAcknowledgement> callback) {
try {
transactionPrefetch_handler.remoteHandler(controller, request, callback);
} catch (Throwable ex) {
shutdownCluster(ex);
}
}
@Override
public void transactionMap(RpcController controller, TransactionMapRequest request, RpcCallback<TransactionMapResponse> callback) {
try {
transactionMap_handler.remoteQueue(controller, request, callback);
} catch (Throwable ex) {
shutdownCluster(ex);
}
}
@Override
public void transactionReduce(RpcController controller, TransactionReduceRequest request, RpcCallback<TransactionReduceResponse> callback) {
try {
transactionReduce_handler.remoteQueue(controller, request, callback);
} catch (Throwable ex) {
shutdownCluster(ex);
}
}
@Override
public void transactionPrepare(RpcController controller, TransactionPrepareRequest request, RpcCallback<TransactionPrepareResponse> callback) {
try {
transactionPrepare_handler.remoteQueue(controller, request, callback);
} catch (Throwable ex) {
shutdownCluster(ex);
}
}
@Override
public void transactionFinish(RpcController controller, TransactionFinishRequest request, RpcCallback<TransactionFinishResponse> callback) {
try {
transactionFinish_handler.remoteQueue(controller, request, callback);
} catch (Throwable ex) {
shutdownCluster(ex);
}
}
@Override
public void transactionRedirect(RpcController controller, TransactionRedirectRequest request, RpcCallback<TransactionRedirectResponse> done) {
// We need to create a wrapper callback so that we can get the output that
// HStoreSite wants to send to the client and forward
// it back to whomever told us about this txn
if (debug.val)
LOG.debug(String.format("Received redirected transaction request from HStoreSite %s",
HStoreThreadManager.formatSiteName(request.getSenderSite())));
ByteBuffer serializedRequest = request.getWork().asReadOnlyByteBuffer();
TransactionRedirectResponseCallback callback = null;
try {
// callback = hstore_site.getObjectPools().CALLBACKS_TXN_REDIRECT_RESPONSE.borrowObject();
callback = new TransactionRedirectResponseCallback(hstore_site);
callback.init(local_site_id, request.getSenderSite(), done);
} catch (Exception ex) {
String msg = "Failed to get " + TransactionRedirectResponseCallback.class.getSimpleName();
throw new RuntimeException(msg, ex);
}
try {
if (transactionRedirect_dispatcher != null) {
transactionRedirect_dispatcher.queue(Pair.of(serializedRequest, callback));
} else {
hstore_site.invocationProcess(serializedRequest, callback);
}
} catch (Throwable ex) {
shutdownCluster(ex);
}
}
@Override
public void sendData(RpcController controller, SendDataRequest request, RpcCallback<SendDataResponse> done) {
// Take the SendDataRequest and pass it to the sendData_handler, which
// will deserialize the embedded VoltTable and wrap it in something that we can
// then pass down into the underlying ExecutionEngine
try {
sendData_handler.remoteQueue(controller, request, done);
} catch (Throwable ex) {
shutdownCluster(ex);
}
}
@Override
public void initialize(RpcController controller, InitializeRequest request, RpcCallback<InitializeResponse> done) {
if (debug.val)
LOG.debug(String.format("Received %s from HStoreSite %s [instanceId=%d]",
request.getClass().getSimpleName(),
HStoreThreadManager.formatSiteName(request.getSenderSite()),
request.getInstanceId()));
hstore_site.setInstanceId(request.getInstanceId());
InitializeResponse response = InitializeResponse.newBuilder()
.setSenderSite(local_site_id)
.setStatus(Status.OK)
.build();
done.run(response);
}
@Override
public void shutdownPrepare(RpcController controller, ShutdownPrepareRequest request, RpcCallback<ShutdownPrepareResponse> done) {
String originName = HStoreThreadManager.formatSiteName(request.getSenderSite());
// See if they gave us the original error. If they did, then we'll
// try to be helpful and print it out here
SerializableException error = null;
if (request.hasError() && request.getError().isEmpty() == false) {
error = SerializableException.deserializeFromBuffer(request.getError().asReadOnlyByteBuffer());
}
LOG.warn(String.format("Got %s from %s [hasError=%s]%s",
request.getClass().getSimpleName(), originName, (error != null),
(error != null ? "\n" + error : "")));
// Tell the HStoreSite to prepare to shutdown
HStoreCoordinator.this.hstore_site.prepareShutdown(request.hasError());
ThreadUtil.sleep(5000);
// Then send back the acknowledgment that we're good to go
ShutdownPrepareResponse response = ShutdownPrepareResponse.newBuilder()
.setSenderSite(HStoreCoordinator.this.local_site_id)
.build();
done.run(response);
LOG.warn(String.format("Sent %s back to %s",
response.getClass().getSimpleName(), originName));
}
@Override
public void shutdown(RpcController controller, ShutdownRequest request, RpcCallback<ShutdownResponse> done) {
String originName = HStoreThreadManager.formatSiteName(request.getSenderSite());
if (debug.val)
LOG.warn(String.format("Got %s from %s", request.getClass().getSimpleName(), originName));
LOG.warn(String.format("Shutting down %s [status=%d]",
hstore_site.getSiteName(), request.getExitStatus()));
// Then send back the acknowledgment right away
ShutdownResponse response = ShutdownResponse.newBuilder()
.setSenderSite(HStoreCoordinator.this.local_site_id)
.build();
done.run(response);
HStoreCoordinator.this.hstore_site.shutdown();
if (debug.val) LOG.debug(String.format("ForwardDispatcher Queue Idle Time: %.2fms",
transactionRedirect_dispatcher.getIdleTime().getTotalThinkTimeMS()));
}
@Override
public void heartbeat(RpcController controller, HeartbeatRequest request, RpcCallback<HeartbeatResponse> done) {
if (debug.val)
LOG.debug(String.format("heartbeat from %d at %d^^^^^^^^^^",
request.getSenderSite(), local_site_id));
HeartbeatResponse.Builder builder = HeartbeatResponse.newBuilder()
.setSenderSite(local_site_id)
.setStatus(Status.OK);
done.run(builder.build());
}
@Override
public void timeSync(RpcController controller, TimeSyncRequest request, RpcCallback<TimeSyncResponse> done) {
if (debug.val)
LOG.debug(String.format("Received %s from HStoreSite %s",
request.getClass().getSimpleName(),
HStoreThreadManager.formatSiteName(request.getSenderSite())));
TimeSyncResponse.Builder builder = TimeSyncResponse.newBuilder()
.setT0R(System.currentTimeMillis())
.setT0S(request.getT0S())
.setSenderSite(local_site_id);
ThreadUtil.sleep(10);
done.run(builder.setT1S(System.currentTimeMillis()).build());
}
@Override
public void transactionDebug(RpcController controller, TransactionDebugRequest request, RpcCallback<TransactionDebugResponse> done) {
if (debug.val)
LOG.debug(String.format("Received %s from HStoreSite %s",
request.getClass().getSimpleName(),
HStoreThreadManager.formatSiteName(request.getSenderSite())));
Long txnId = request.getTransactionId();
AbstractTransaction ts = hstore_site.getTransaction(txnId);
String debug;
Status status;
if (ts != null) {
debug = ts.debug();
status = Status.OK;
} else {
debug = "";
LOG.info("Found the abort!!!");
status = Status.ABORT_UNEXPECTED;
}
TransactionDebugResponse response = TransactionDebugResponse.newBuilder()
.setSenderSite(local_site_id)
.setStatus(status)
.setDebug(debug)
.build();
done.run(response);
}
@Override
public void unevictData(RpcController controller,
UnevictDataRequest request,
RpcCallback<UnevictDataResponse> done) {
LOG.info(String.format("Received %s from HStoreSite %s at HStoreSite %s",
request.getClass().getSimpleName(),
HStoreThreadManager.formatSiteName(request.getSenderSite()),
HStoreThreadManager.formatSiteName(local_site_id)));
AbstractTransaction ts = hstore_site.getTransaction(request.getTransactionId());
System.out.println(hstore_site.getInflightTxns().size());
System.out.println(request.getTransactionId());
assert(ts!=null);
ts.setUnevictCallback(done);
ts.setNewTransactionId(request.getNewTransactionId());
int partition = request.getPartitionId();
Table catalog_tbl = hstore_site.getCatalogContext().getTableById(request.getTableId());
short[] block_ids = new short[request.getBlockIdsList().size()];
for(int i = 0; i < request.getBlockIdsList().size(); i++) block_ids[i] = (short) request.getBlockIds(i);
int [] tuple_offsets = new int[request.getTupleOffsetsList().size()];
for(int i = 0; i < request.getTupleOffsetsList().size(); i++) tuple_offsets[i] = request.getTupleOffsets(i);
hstore_site.getAntiCacheManager().queue(ts, partition, catalog_tbl, block_ids, tuple_offsets);
}
} // END CLASS
// ----------------------------------------------------------------------------
// TRANSACTION METHODS
// ----------------------------------------------------------------------------
/**
* Send a TransactionInitRequest message to all of the sites that have
* the partitions that this transaction will need during its execution
* This must be guaranteed to only be invoked by one thread at a time
* @param ts
* @param callback
*/
public void transactionInit(LocalTransaction ts, RpcCallback<TransactionInitResponse> callback) {
if (debug.val)
LOG.debug(String.format("%s - Sending %s to %d partitions %s",
ts, TransactionInitRequest.class.getSimpleName(),
ts.getPredictTouchedPartitions().size(), ts.getPredictTouchedPartitions()));
assert(callback != null) :
String.format("Trying to initialize %s with a null TransactionInitCallback", ts);
ParameterSet procParams = ts.getProcedureParameters();
FastSerializer fs = this.serializers.get();
// Look at the Procedure to see whether it has prefetchable queries. If it does,
// then embed them in the TransactionInitRequest. We will need to generate a separate
// request for each site that we want to execute different queries on.
// TODO: We probably don't want to bother prefetching for txns that only touch
// partitions that are in its same local HStoreSite
if (hstore_conf.site.exec_prefetch_queries && ts.getProcedure().getPrefetchable() && ts.getEstimatorState() != null) {
if (debug.val)
LOG.debug(String.format("%s - Generating %s with prefetchable queries",
ts, TransactionInitRequest.class.getSimpleName()));
// We also need to add our boy to its base partition's DependencyTracker
// This is so that we can store the prefetch results when they come back
DependencyTracker depTracker = hstore_site.getDependencyTracker(ts.getBasePartition());
TransactionInitRequest.Builder[] builders = this.prefetchPlanner.plan(ts, procParams,
depTracker, fs);
// If the PrefetchQueryPlanner returns a null array, then there is nothing
// that we can actually prefetch, so we'll just send the normal txn init requests
if (builders == null) {
TransactionInitRequest.Builder builder = TransactionUtil.createTransactionInitBuilder(ts, fs);
this.transactionInit_handler.sendMessages(ts, builder.build(), callback, ts.getPredictTouchedPartitions());
return;
}
TransactionCounter.PREFETCH.inc(ts.getProcedure());
int sent_ctr = 0;
int prefetch_ctr = 0;
assert(builders.length == this.num_sites) :
String.format("Expected %d %s but we got %d",
this.num_sites, TransactionInitRequest.class.getSimpleName(), builders.length);
// Send out all of the prefetch requests first
for (int site_id = 0; site_id < this.num_sites; site_id++) {
// Blast out this mofo. Tell them that Rico sent you...
if (builders[site_id] != null && builders[site_id].getPrefetchFragmentsCount() > 0) {
TransactionInitRequest request = builders[site_id].build();
if (site_id == this.local_site_id) {
this.transactionInit_handler.remoteHandler(null, request, null);
} else {
ProtoRpcController controller = ts.getTransactionInitController(site_id);
this.channels[site_id].transactionInit(controller, request, callback);
}
prefetch_ctr += request.getPrefetchFragmentsCount();
sent_ctr++;
builders[site_id] = null;
}
} // FOR
// Then send out the ones without prefetching. These should all be the same
// builder so we have to make sure that we only build it once.
TransactionInitRequest request = null;
for (int site_id = 0; site_id < this.num_sites; site_id++) {
if (builders[site_id] != null) {
if (request == null) request = builders[site_id].build();
if (site_id == this.local_site_id) {
this.transactionInit_handler.remoteHandler(null, request, null);
} else {
ProtoRpcController controller = ts.getTransactionInitController(site_id);
this.channels[site_id].transactionInit(controller, request, callback);
}
sent_ctr++;
}
} // FOR
assert(sent_ctr > 0) :
String.format("No %s available for %s", TransactionInitRequest.class.getSimpleName(), ts);
if (debug.val)
LOG.debug(String.format("%s - Sent %d %s with %d prefetch %s",
ts, sent_ctr, TransactionInitRequest.class.getSimpleName(),
prefetch_ctr, WorkFragment.class.getSimpleName()));
}
// Otherwise we will send the same TransactionInitRequest to all of the remote sites
else {
TransactionInitRequest.Builder builder = TransactionUtil.createTransactionInitBuilder(ts, fs);
this.transactionInit_handler.sendMessages(ts, builder.build(), callback, ts.getPredictTouchedPartitions());
}
// TODO(pavlo): Add the ability to allow a partition that rejects a InitRequest to send notifications
// about the rejection to the other partitions that are included in the InitRequest.
}
/**
* Send the TransactionWorkRequest to the target remote site
* @param builders
* @param callback
*/
public void transactionWork(LocalTransaction ts, int site_id, TransactionWorkRequest request, RpcCallback<TransactionWorkResponse> callback) {
if (debug.val)
LOG.debug(String.format("%s - Sending TransactionWorkRequest to remote site %d " +
"[numFragments=%d, txnId=%d]",
ts, site_id, request.getFragmentsCount(), request.getTransactionId()));
assert(request.getFragmentsCount() > 0) :
String.format("No WorkFragments for Site %d in %s", site_id, ts);
assert(site_id != this.local_site_id) :
String.format("Trying to send %s for %s to local site %d",
request.getClass().getSimpleName(), ts, site_id);
assert(ts.getTransactionId().longValue() == request.getTransactionId()) :
String.format("%s is for txn #%d but the %s has txn #%d",
ts.getClass().getSimpleName(), ts.getTransactionId(),
request.getClass().getSimpleName(), request.getTransactionId());
this.channels[site_id].transactionWork(ts.getTransactionWorkController(site_id), request, callback);
}
/**
* Send the result of a prefetched query back to the txn's base partition.
* @param ts
* @param request
*/
public void transactionPrefetchResult(RemoteTransaction ts, TransactionPrefetchResult request) {
if (debug.val)
LOG.debug(String.format("%s - Sending %s back to base partition %d",
ts, request.getClass().getSimpleName(),
ts.getBasePartition()));
assert(request.hasResult()) :
String.format("No WorkResults in %s for %s", request.getClass().getSimpleName(), ts);
int site_id = catalogContext.getSiteIdForPartitionId(ts.getBasePartition());
assert(site_id != this.local_site_id);
ProtoRpcController controller = ts.getTransactionPrefetchController(request.getSourcePartition());
this.channels[site_id].transactionPrefetch(controller,
request,
this.transactionPrefetch_callback);
}
/**
* Notify the given partitions that this transaction is finished with them
* <B>Note:</B> This can also be used for the "early prepare" optimization.
* @param ts
* @param callback
* @param partitions
*/
public void transactionPrepare(LocalTransaction ts, LocalPrepareCallback callback, PartitionSet partitions) {
if (debug.val)
LOG.debug(String.format("Notifying partitions %s that %s is preparing to commit",
partitions, ts));
// Remove any partitions that we have notified previously *and* we have
// already gotten a response from.
PartitionSet receivedPartitions = callback.getReceivedPartitions();
if (receivedPartitions.isEmpty() == false) {
if (debug.val)
LOG.debug(String.format("Removed partitions %s from %s for %s [origPartitions=%s]",
receivedPartitions, TransactionPrepareRequest.class.getSimpleName(),
ts, partitions));
partitions = new PartitionSet(partitions);
partitions.removeAll(receivedPartitions);
}
// FAST PATH: If all of the partitions that this txn needs are on this
// HStoreSite, then we don't need to bother with making this request
if (hstore_site.allLocalPartitions(partitions)) {
hstore_site.transactionPrepare(ts, partitions, callback);
}
// SLOW PATH: Since we have to go over the network, we have to use our trusty ol'
// TransactionPrepareHandler to route the request to proper sites.
else {
TransactionPrepareRequest request = TransactionPrepareRequest.newBuilder()
.setTransactionId(ts.getTransactionId())
.addAllPartitions(partitions)
.build();
this.transactionPrepare_handler.sendMessages(ts, request, callback, partitions);
}
}
/**
* Notify all remote HStoreSites that the distributed transaction is done with data
* at the given partitions and that they need to commit/abort the results.
* IMPORTANT: Any data that you need from the LocalTransaction handle should be taken
* care of before this is invoked, because it may clean-up that object before it returns
* @param ts
* @param status
* @param callback
*/
public void transactionFinish(LocalTransaction ts, Status status, LocalFinishCallback callback) {
// Check whether we have already begun the finish process for this txn
if (ts.shouldInvokeFinish() == false) {
return;
}
PartitionSet partitions = ts.getPredictTouchedPartitions();
if (debug.val)
LOG.debug(String.format("Notifying partitions %s that %s is finished [status=%s]",
partitions, ts, status));
// FAST PATH: If all of the partitions that this txn needs are on this
// HStoreSite, then we don't need to bother with making this request
if (ts.isPredictAllLocal()) {
hstore_site.transactionFinish(ts.getTransactionId(), status, partitions);
}
// SLOW PATH: Since we have to go over the network, we have to use our trusty ol'
// TransactionFinishHandler to route the request to proper sites.
else {
TransactionFinishRequest request = TransactionFinishRequest.newBuilder()
.setTransactionId(ts.getTransactionId())
.setStatus(status)
.addAllPartitions(partitions)
.build();
this.transactionFinish_handler.sendMessages(ts, request, callback, partitions);
}
}
/**
* Forward a StoredProcedureInvocation request to a remote site for execution
* @param serializedRequest
* @param callback
* @param partition
*/
public void transactionRedirect(byte[] serializedRequest, RpcCallback<TransactionRedirectResponse> callback, int partition) {
int dest_site_id = catalogContext.getSiteIdForPartitionId(partition);
if (debug.val)
LOG.debug(String.format("Redirecting transaction request to partition #%d on %s",
partition, HStoreThreadManager.formatSiteName(dest_site_id)));
ByteString bs = ByteString.copyFrom(serializedRequest);
TransactionRedirectRequest mr = TransactionRedirectRequest.newBuilder()
.setSenderSite(this.local_site_id)
.setWork(bs)
.build();
this.channels[dest_site_id].transactionRedirect(new ProtoRpcController(), mr, callback);
}
// ----------------------------------------------------------------------------
// MapReduce METHODS
// ----------------------------------------------------------------------------
/**
* Tell all remote partitions to start the map phase for this txn
* @param ts
*/
public void transactionMap(LocalTransaction ts, RpcCallback<TransactionMapResponse> callback) {
ByteString paramBytes = null;
try {
ByteBuffer b = ByteBuffer.wrap(FastSerializer.serialize(ts.getProcedureParameters()));
paramBytes = ByteString.copyFrom(b.array());
} catch (Exception ex) {
throw new RuntimeException("Unexpected error when serializing StoredProcedureInvocation", ex);
}
TransactionMapRequest request = TransactionMapRequest.newBuilder()
.setTransactionId(ts.getTransactionId())
.setClientHandle(ts.getClientHandle())
.setBasePartition(ts.getBasePartition())
.setProcedureId(ts.getProcedure().getId())
.setParams(paramBytes)
.build();
PartitionSet partitions = ts.getPredictTouchedPartitions();
if (debug.val){
LOG.debug(String.format("Notifying partitions %s that %s is in Map Phase", partitions, ts));
if (trace.val) LOG.trace("<HStoreCoordinator.TransactionMap> is executing to sendMessages to all partitions");
}
this.transactionMap_handler.sendMessages(ts, request, callback, partitions);
}
/**
* Tell all remote partitions to start the reduce phase for this txn
* @param ts
*/
public void transactionReduce(LocalTransaction ts, RpcCallback<TransactionReduceResponse> callback) {
// We only need to send over the transaction. The remote side should
// already have all the information that it needs about this txn
TransactionReduceRequest request = TransactionReduceRequest.newBuilder()
.setTransactionId(ts.getTransactionId())
.build();
PartitionSet partitions = ts.getPredictTouchedPartitions();
if (debug.val) {
LOG.debug(String.format("Notifying partitions %s that %s is in Reduce Phase", partitions, ts));
if (trace.val) LOG.trace("<HStoreCoordinator.TransactionReduce> is executing to sendMessages to all partitions");
}
this.transactionReduce_handler.sendMessages(ts, request, callback, partitions);
}
// ----------------------------------------------------------------------------
// SEND DATA METHODS
// ----------------------------------------------------------------------------
/**
* This is will be the main method used to send data from one partition to another.
* We will probably to dispatch these messages and handle then on the remote
* side in a separate thread so that we don't block the ExecutionSite threads
* or any networking thread. We also need to make sure that if have to send
* data to a partition that's on our same machine, then we don't want to
* waste time serializing + deserializing the data when didn't have to.
* @param ts
*/
public void sendData(LocalTransaction ts, Map<Integer, VoltTable> data, RpcCallback<SendDataResponse> callback) {
// TODO(xin): Loop through all of the remote HStoreSites and grab their partition data
// out of the map given as input. Create a single SendDataRequest for that
// HStoreSite and then use the direct channel to send the data. Be sure to skip
// the partitions at the local site
//
// this.channels.get(dest_site_id).sendData(new ProtoRpcController(), request, callback);
//
// Then go back and grab the local partition data and invoke sendData_handler.sendLocal
Long txn_id = ts.getTransactionId();
Set<Integer> fake_responses = null;
for (Site remote_site : this.catalogContext.sites.values()) {
int dest_site_id = remote_site.getId();
if (debug.val)
LOG.debug("Dest_site_id: " + dest_site_id + " Local_site_id: " + this.local_site_id);
if (dest_site_id == this.local_site_id) {
// If there is no data for any partition at this remote HStoreSite, then we will fake a response
// message to the callback and tell them that everything is ok
if (fake_responses == null) fake_responses = new HashSet<Integer>();
fake_responses.add(dest_site_id);
if (debug.val)
LOG.debug("Did not send data to " + remote_site + ". Will send a fake response instead");
continue;
}
SendDataRequest.Builder builder = SendDataRequest.newBuilder()
.setTransactionId(txn_id.longValue())
.setSenderSite(local_site_id);
// Loop through and get all the data for this site
for (Partition catalog_part : remote_site.getPartitions()) {
VoltTable vt = data.get(catalog_part.getId());
if (vt == null) {
LOG.warn("No data in " + ts + " for partition " + catalog_part.getId());
continue;
}
ByteString bs = null;
byte bytes[] = null;
try {
bytes = ByteBuffer.wrap(FastSerializer.serialize(vt)).array();
bs = ByteString.copyFrom(bytes);
if (debug.val)
LOG.debug(String.format("%s - Outbound data for partition #%d " +
"[RowCount=%d / MD5=%s / Length=%d]",
ts, catalog_part.getId(),
vt.getRowCount(), StringUtil.md5sum(bytes), bytes.length));
} catch (Exception ex) {
String msg = String.format("Unexpected error when serializing %s data for partition %d",
ts, catalog_part.getId());
throw new ServerFaultException(msg, ex, ts.getTransactionId());
}
if (trace.val)
LOG.trace("Constructing Dependency for " + catalog_part);
builder.addDepId(catalog_part.getId())
.addData(bs);
} // FOR n partitions in remote_site
if (builder.getDataCount() > 0) {
if (debug.val)
LOG.debug(String.format("%s - Sending data to %d partitions at %s for %s",
ts, builder.getDataCount(), remote_site, ts));
this.channels[dest_site_id].sendData(new ProtoRpcController(), builder.build(), callback);
}
} // FOR n sites in this catalog
for (int partition : hstore_site.getLocalPartitionIds().values()) {
VoltTable vt = data.get(Integer.valueOf(partition));
if (vt == null) {
LOG.warn("No data in " + ts + " for partition " + partition);
continue;
}
if (debug.val) LOG.debug(String.format("Storing VoltTable directly at local partition %d for %s", partition, ts));
ts.storeData(partition, vt);
} // FOR
if (fake_responses != null) {
if (debug.val) LOG.debug(String.format("Sending fake responses for %s for partitions %s", ts, fake_responses));
for (int dest_site_id : fake_responses) {
SendDataResponse.Builder builder = SendDataResponse.newBuilder()
.setTransactionId(txn_id.longValue())
.setStatus(Hstoreservice.Status.OK)
.setSenderSite(dest_site_id);
callback.run(builder.build());
} // FOR
}
}
public Map<Integer, String> transactionDebug(Long txn_id) {
assert(txn_id != null);
final CountDownLatch latch = new CountDownLatch(this.num_sites-1);
final Map<Integer, String> responses = new TreeMap<Integer, String>();
RpcCallback<TransactionDebugResponse> callback = new RpcCallback<TransactionDebugResponse>() {
@Override
public void run(TransactionDebugResponse response) {
if (response.getStatus() == Status.OK) {
int site_id = response.getSenderSite();
assert(responses.containsKey(site_id) == false);
responses.put(site_id, response.getDebug());
}
latch.countDown();
}
};
TransactionDebugRequest request = TransactionDebugRequest.newBuilder()
.setSenderSite(this.local_site_id)
.setTransactionId(txn_id)
.build();
for (int site_id = 0; site_id < this.num_sites; site_id++) {
if (site_id == this.local_site_id) continue;
this.channels[site_id].transactionDebug(new ProtoRpcController(), request, callback);
if (trace.val)
LOG.trace(String.format("Sent %s to %s",
request.getClass().getSimpleName(),
HStoreThreadManager.formatSiteName(site_id)));
} // FOR
// Added our own debug info
AbstractTransaction ts = this.hstore_site.getTransaction(txn_id);
if (ts != null) {
responses.put(this.local_site_id, ts.debug());
}
// Then wait for all of our responses
boolean success = false;
try {
success = latch.await(5, TimeUnit.SECONDS);
} catch (InterruptedException ex) {
// nothing
}
if (success == false) {
LOG.warn(String.format("Failed to recieve debug responses from %d remote HStoreSites",
this.num_sites-1));
}
return (responses);
}
// ----------------------------------------------------------------------------
// HEARTBEAT METHODS
// ----------------------------------------------------------------------------
/**
* Send a heartbeat notification message to all the other sites in the cluster.
*/
public void sendHeartbeat() {
HeartbeatRequest request = HeartbeatRequest.newBuilder()
.setSenderSite(this.local_site_id)
.setLastTransactionId(-1) // FIXME
.build();
for (int site_id = 0; site_id < this.num_sites; site_id++) {
if (site_id == this.local_site_id) continue;
if (this.isShuttingDown()) break;
try {
this.channels[site_id].heartbeat(new ProtoRpcController(), request, this.heartbeatCallback);
if (trace.val)
LOG.trace(String.format("Sent %s to %s",
request.getClass().getSimpleName(),
HStoreThreadManager.formatSiteName(site_id)));
} catch (RuntimeException ex) {
// Silently ignore these errors...
}
} // FOR
}
// ----------------------------------------------------------------------------
// UNEVICT DATA
// ----------------------------------------------------------------------------
/**
* Send a message to a remote site to unevict data
* @param tuple_offsets
* @param block_ids
* @param catalog_tbl
* @param partition_id
* @param txn
* @return
*/
public void sendUnevictDataMessage(int remote_site_id, LocalTransaction txn, int partition_id, Table catalog_tbl, short[] block_ids, int[] tuple_offsets) {
Builder builder = UnevictDataRequest.newBuilder()
.setSenderSite(this.local_site_id)
.setTransactionId(txn.getOldTransactionId())
.setNewTransactionId(txn.getTransactionId())
.setPartitionId(partition_id)
.setTableId(catalog_tbl.getRelativeIndex());
for (int i = 0; i< block_ids.length; i++){
builder = builder.addBlockIds(block_ids[i]);
}
for (int i=0; i< tuple_offsets.length; i++){
builder = builder.addTupleOffsets(tuple_offsets[i]);
}
UnevictDataRequest request = builder.build();
try {
this.channels[remote_site_id].unevictData(new ProtoRpcController(), request, this.unevictCallback);
if (trace.val) {
LOG.trace(String.format("Sent unevict message request to remote hstore site %d from base site %d",
remote_site_id, this.hstore_site.getSiteId()));
LOG.trace(String.format("Sent %s to %s",
request.getClass().getSimpleName(),
HStoreThreadManager.formatSiteName(remote_site_id)));
}
} catch (RuntimeException ex) {
// Silently ignore these errors...
ex.printStackTrace();
}
}
// ----------------------------------------------------------------------------
// TIME SYNCHRONZIATION
// ----------------------------------------------------------------------------
/**
* Approximate the time offsets of all the sites in the cluster so that we can offset
* our TransactionIdManager's timestamps by the site with the clock the furthest ahead.
* This is a blocking call and only really needs to be performed once at start-up
*/
public void syncClusterTimes() {
// We don't need to do this if there is only one site
if (this.num_sites == 1) return;
final CountDownLatch latch = new CountDownLatch(this.num_sites-1);
final Map<Integer, Integer> time_deltas = new HashMap<Integer, Integer>();
RpcCallback<TimeSyncResponse> callback = new RpcCallback<TimeSyncResponse>() {
@Override
public void run(TimeSyncResponse request) {
long t1_r = System.currentTimeMillis();
int dt = (int)((request.getT1S() + request.getT0R()) - (t1_r + request.getT0S())) / 2;
time_deltas.put(request.getSenderSite(), dt);
latch.countDown();
}
};
// Send out TimeSync request
for (int site_id = 0; site_id < this.num_sites; site_id++) {
if (site_id == this.local_site_id) continue;
ProtoRpcController controller = new ProtoRpcController();
TimeSyncRequest request = TimeSyncRequest.newBuilder()
.setSenderSite(this.local_site_id)
.setT0S(System.currentTimeMillis())
.build();
this.channels[site_id].timeSync(controller, request, callback);
if (trace.val) LOG.trace("Sent TIMESYNC to " + HStoreThreadManager.formatSiteName(site_id));
} // FOR
if (trace.val) LOG.trace("Sent out all TIMESYNC requests!");
boolean success = false;
try {
success = latch.await(10, TimeUnit.SECONDS);
} catch (InterruptedException ex) {
// nothing
}
if (success == false) {
LOG.warn(String.format("Failed to recieve time synchronization responses " +
"from %d remote sites", this.num_sites-1));
} else if (trace.val) LOG.trace("Received all TIMESYNC responses!");
// Then do the time calculation
long max_dt = 0L;
int culprit = this.local_site_id;
for (Entry<Integer, Integer> e : time_deltas.entrySet()) {
if (debug.val)
LOG.debug(String.format("Time delta to site %s is %d ms",
HStoreThreadManager.formatSiteName(e.getKey()), e.getValue()));
if (e.getValue() > max_dt) {
max_dt = e.getValue();
culprit = e.getKey();
}
} // FOR
this.hstore_site.setTransactionIdManagerTimeDelta(max_dt);
if (debug.val)
LOG.debug(String.format("Setting time delta to %d ms [culprit=%s]",
max_dt, HStoreThreadManager.formatSiteName(culprit)));
}
// ----------------------------------------------------------------------------
// SHUTDOWN METHODS
// ----------------------------------------------------------------------------
/**
* Take down the cluster. This is a non-blocking call. It will return right away
* @param error
*/
public void shutdownCluster(final Throwable error) {
if (debug.val)
LOG.debug(String.format("Invoking non-blocking shutdown protocol [hasError=%s]",
error!=null), error);
// Make this a thread so that we don't block and can continue cleaning up other things
Runnable shutdownRunnable = new Runnable() {
@Override
public void run() {
LOG.debug("Shutting down cluster " + (error != null ? " - " + error : ""));
try {
HStoreCoordinator.this.shutdownClusterBlocking(error); // Never returns!
} catch (Throwable ex) {
ex.printStackTrace();
}
}
};
hstore_site.getThreadManager().scheduleWork(shutdownRunnable, 2500, TimeUnit.MILLISECONDS);
return;
}
/**
* Tell all of the other sites to shutdown and then knock ourselves out...
* This is a non-blocking call.
*/
public void shutdownCluster() {
this.shutdownCluster(null);
}
protected void prepareShutdownCluster(final Throwable error) throws Exception {
final CountDownLatch latch = new CountDownLatch(this.num_sites-1);
if (this.num_sites > 1) {
RpcCallback<ShutdownPrepareResponse> callback = new ShutdownPrepareCallback(this.num_sites, latch);
ShutdownPrepareRequest.Builder builder = ShutdownPrepareRequest.newBuilder()
.setSenderSite(this.catalog_site.getId());
// Pack the error into a SerializableException
if (error != null) {
SerializableException sError = new SerializableException(error);
ByteBuffer buffer = sError.serializeToBuffer();
buffer.rewind();
builder.setError(ByteString.copyFrom(buffer));
if (debug.val)
LOG.debug("Serializing error message in shutdown request");
}
ShutdownPrepareRequest request = builder.build();
if (debug.val)
LOG.debug(String.format("Sending %s to %d remote sites",
request.getClass().getSimpleName(), this.num_sites-1));
for (int site_id = 0; site_id < this.num_sites; site_id++) {
if (site_id == this.local_site_id) continue;
if (this.channels[site_id] == null) {
LOG.error(String.format("Trying to send %s to %s before the connection was established",
request.getClass().getSimpleName(),
HStoreThreadManager.formatSiteName(site_id)));
} else {
this.channels[site_id].shutdownPrepare(new ProtoRpcController(), request, callback);
if (trace.val)
LOG.trace(String.format("Sent %s to %s",
request.getClass().getSimpleName(),
HStoreThreadManager.formatSiteName(site_id)));
}
} // FOR
}
// Tell ourselves to get ready
this.hstore_site.prepareShutdown(error != null);
// Block until the latch releases us
if (this.num_sites > 1) {
LOG.info(String.format("Waiting for %d sites to finish shutting down", latch.getCount()));
boolean result = latch.await(10, TimeUnit.SECONDS);
if (result == false) {
LOG.warn("Failed to recieve all shutdown responses");
}
}
}
/**
* Shutdown the cluster. If the given Exception is not null, then all the nodes will
* exit with a non-zero status. This is will never return
* TODO: Move into HStoreSite
* @param error
*/
protected synchronized void shutdownClusterBlocking(final Throwable error) {
if (this.state == ShutdownState.SHUTDOWN) return;
this.hstore_site.prepareShutdown(error != null);
if (error != null) {
LOG.warn("Shutting down cluster with " + error.getClass().getSimpleName(), error);
} else {
LOG.warn("Shutting down cluster");
}
final int exit_status = (error == null ? 0 : 1);
try {
// Tell everyone that we're getting ready to stop the party
this.prepareShutdownCluster(error);
// Now send the final shutdown request
if (this.num_sites > 1) {
ThreadUtil.sleep(5000); // XXX
LOG.info(String.format("Sending final shutdown message to %d remote sites", this.num_sites-1));
RpcCallback<ShutdownResponse> callback = new RpcCallback<ShutdownResponse>() {
@Override
public void run(ShutdownResponse parameter) {
// Nothing to do...
}
};
ShutdownRequest request = ShutdownRequest.newBuilder()
.setSenderSite(this.catalog_site.getId())
.setExitStatus(exit_status)
.build();
if (debug.val)
LOG.debug(String.format("Sending %s to %d remote sites",
request.getClass().getSimpleName(), this.num_sites));
for (int site_id = 0; site_id < this.num_sites; site_id++) {
if (site_id == this.local_site_id) continue;
this.channels[site_id].shutdown(new ProtoRpcController(), request, callback);
if (debug.val)
LOG.debug(String.format("Sent %s to %s",
request.getClass().getSimpleName(),
HStoreThreadManager.formatSiteName(site_id)));
} // FOR
ThreadUtil.sleep(2000);
}
} catch (Throwable ex) {
ex.printStackTrace();
// IGNORE
} finally {
LOG.info(String.format("Shutting down [site=%d / exitCode=%d]",
this.catalog_site.getId(), exit_status));
if (error != null) {
LOG.fatal("A fatal error caused this shutdown", error);
}
this.hstore_site.shutdown();
}
}
// ----------------------------------------------------------------------------
// UTILITY METHODS
// ----------------------------------------------------------------------------
public static List<Pair<Integer, InetSocketAddress>> getRemoteCoordinators(Site catalog_site) {
List<Pair<Integer, InetSocketAddress>> m = new ArrayList<Pair<Integer,InetSocketAddress>>();
Database catalog_db = CatalogUtil.getDatabase(catalog_site);
Map<Host, Set<Site>> host_partitions = CatalogUtil.getSitesPerHost(catalog_db);
for (Entry<Host, Set<Site>> e : host_partitions.entrySet()) {
String host = e.getKey().getIpaddr();
for (Site remote_site : e.getValue()) {
if (remote_site.getId() != catalog_site.getId()) {
InetSocketAddress address = new InetSocketAddress(host, remote_site.getMessenger_port());
m.add(Pair.of(remote_site.getId(), address));
if (debug.val)
LOG.debug(String.format("Creating RpcChannel to %s for site %s",
address, HStoreThreadManager.formatSiteName(remote_site.getId())));
} // FOR
} // FOR
} // FOR
return (m);
}
/**
* Returns an HStoreService handle that is connected to the given site
* This should not be called directly.
* @param catalog_site
* @return
*/
protected static HStoreService getHStoreService(Site catalog_site) {
NIOEventLoop eventLoop = new NIOEventLoop();
InetSocketAddress addresses[] = new InetSocketAddress[] {
new InetSocketAddress(catalog_site.getHost().getIpaddr(), catalog_site.getMessenger_port())
};
ProtoRpcChannel[] channels = null;
try {
channels = ProtoRpcChannel.connectParallel(eventLoop, addresses);
} catch (Exception ex) {
}
HStoreService channel = HStoreService.newStub(channels[0]);
return (channel);
}
}