package com.linkedin.databus.client;
/*
*
* Copyright 2013 LinkedIn Corp. All rights reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*/
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.nio.channels.Channels;
import java.nio.channels.ReadableByteChannel;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Random;
import java.util.Set;
import java.util.concurrent.locks.ReentrantLock;
import javax.management.MBeanServer;
import org.apache.log4j.Logger;
import com.linkedin.databus.client.ConnectionState.StateId;
import com.linkedin.databus.client.netty.RemoteExceptionHandler;
import com.linkedin.databus.client.pub.ServerInfo;
import com.linkedin.databus.client.pub.mbean.UnifiedClientStats;
import com.linkedin.databus.core.BootstrapCheckpointHandler;
import com.linkedin.databus.core.Checkpoint;
import com.linkedin.databus.core.DatabusComponentStatus;
import com.linkedin.databus.core.DbusClientMode;
import com.linkedin.databus.core.DbusConstants;
import com.linkedin.databus.core.DbusEventBuffer;
import com.linkedin.databus.core.DbusEventFactory;
import com.linkedin.databus.core.DbusEventInternalReadable;
import com.linkedin.databus.core.DbusEventInternalWritable;
import com.linkedin.databus.core.InvalidCheckpointException;
import com.linkedin.databus.core.InvalidEventException;
import com.linkedin.databus.core.PendingEventTooLargeException;
import com.linkedin.databus.core.async.LifecycleMessage;
import com.linkedin.databus.core.util.IdNamePair;
import com.linkedin.databus2.core.BackoffTimer;
import com.linkedin.databus2.core.DatabusException;
import com.linkedin.databus2.core.container.request.BootstrapDatabaseTooOldException;
import com.linkedin.databus2.core.filter.DbusKeyCompositeFilter;
import com.linkedin.databus2.core.filter.DbusKeyCompositeFilterConfig;
import com.linkedin.databus2.core.filter.DbusKeyFilter;
import com.linkedin.databus2.core.filter.KeyFilterConfigHolder;
public class BootstrapPullThread extends BasePullThread
{
public static final Short START_OF_SNAPSHOT_SRCID = (short)(DbusEventInternalWritable.PRIVATE_RANGE_MAX_SRCID - 1);
public static final Short START_OF_CATCHUP_SRCID = (short)(DbusEventInternalWritable.PRIVATE_RANGE_MAX_SRCID - 2);
public static final Short END_OF_BOOTSTRAP_SRCID = (short)(DbusEventInternalWritable.PRIVATE_RANGE_MAX_SRCID - 3);
private static final EnumSet<ConnectionState.StateId> SHOULD_TEAR_CONNECTION =
EnumSet.of(ConnectionState.StateId.START_SCN_REQUEST_SENT,
ConnectionState.StateId.START_SCN_RESPONSE_SUCCESS,
ConnectionState.StateId.START_SCN_REQUEST_ERROR,
ConnectionState.StateId.START_SCN_RESPONSE_ERROR,
ConnectionState.StateId.TARGET_SCN_REQUEST_SENT,
ConnectionState.StateId.TARGET_SCN_RESPONSE_SUCCESS,
ConnectionState.StateId.TARGET_SCN_REQUEST_ERROR,
ConnectionState.StateId.TARGET_SCN_RESPONSE_ERROR,
ConnectionState.StateId.STREAM_REQUEST_SENT,
ConnectionState.StateId.STREAM_REQUEST_SUCCESS,
ConnectionState.StateId.STREAM_REQUEST_ERROR,
ConnectionState.StateId.STREAM_RESPONSE_ERROR,
ConnectionState.StateId.BOOTSTRAP_DONE
);
private Checkpoint _resumeCkpt;
//private long _errorSleepMs = 0;
private final RemoteExceptionHandler _remoteExceptionHandler;
private DbusKeyCompositeFilter _bootstrapFilter;
private final List<DbusKeyCompositeFilterConfig> _bootstrapFilterConfigs;
// track number of events read during the current bootstrap phase
private long numEventsInCurrentState = 0;
private final double _pullerBufferUtilizationPct;
// keep track of the last open bootstrap connection so we can close it on shutdown
private DatabusBootstrapConnection _lastOpenConnection;
private final BackoffTimer _retriesBeforeCkptCleanup;
private ReentrantLock _v3BootstrapLock = null;
public BootstrapPullThread(String name,
DatabusSourcesConnection sourcesConn,
DbusEventBuffer dbusEventBuffer,
ConnectionStateFactory connStateFactory,
Set<ServerInfo> bootstrapServers,
List<DbusKeyCompositeFilterConfig> bootstrapFilterConfigs,
double pullerBufferUtilPct,
MBeanServer mbeanServer,
DbusEventFactory eventFactory
)
{
this(name, sourcesConn, dbusEventBuffer, connStateFactory, bootstrapServers, bootstrapFilterConfigs,
pullerBufferUtilPct, mbeanServer, eventFactory, null, null);
}
public BootstrapPullThread(String name,
DatabusSourcesConnection sourcesConn,
DbusEventBuffer dbusEventBuffer,
ConnectionStateFactory connStateFactory,
Set<ServerInfo> bootstrapServers,
List<DbusKeyCompositeFilterConfig> bootstrapFilterConfigs,
double pullerBufferUtilPct,
MBeanServer mbeanServer,
DbusEventFactory eventFactory,
ReentrantLock v3BootstrapLock,
Logger log)
{
super(name, sourcesConn.getConnectionConfig().getBstPullerRetries(), sourcesConn, dbusEventBuffer,
connStateFactory, bootstrapServers, mbeanServer, eventFactory, log);
_retriesBeforeCkptCleanup = new BackoffTimer("BSPullerRetriesBeforeCkptCleanup",
sourcesConn.getConnectionConfig().getBsPullerRetriesBeforeCkptCleanup());
_bootstrapFilterConfigs = bootstrapFilterConfigs;
_remoteExceptionHandler = new RemoteExceptionHandler(sourcesConn, dbusEventBuffer, eventFactory);
_pullerBufferUtilizationPct = pullerBufferUtilPct;
_v3BootstrapLock = v3BootstrapLock;
// TODO (DDSDBUS-84): if resumeCkpt is not empty, i.e. we are starting fresh, make sure the
// sources passed in are exactly the same as what's stored in the checkpoint -
// the order has to be the same as well. If not the same, we have to start fresh.
// if (!matchSources(resumeCkpt.getAllBootstrapSources(), sources))
//{
// _resumeCkpt = new Checkpoint();
// _snapshotSource = ...
// _catchupSource = ...
// ...
// }
}
@Override
protected boolean shouldDelayTearConnection(StateId stateId)
{
boolean delayTear = SHOULD_TEAR_CONNECTION.contains(stateId);
return delayTear;
}
@Override
protected boolean executeAndChangeState(Object message)
{
boolean success = true;
if (message instanceof ConnectionStateMessage)
{
if (_componentStatus.getStatus() != DatabusComponentStatus.Status.RUNNING)
{
_log.warn("not running: " + message.toString());
}
else
{
ConnectionStateMessage stateMsg = (ConnectionStateMessage)message;
ConnectionState currentState = stateMsg.getConnState();
switch (stateMsg.getStateId())
{
case INITIAL: break;
case BOOTSTRAP_DONE: break; //bootstrap is done -- wait for the next message
case CLOSED: shutdown(); break;
case BOOTSTRAP:
case PICK_SERVER: doPickBootstrapServer(currentState); break;
case REQUEST_START_SCN: doRequestStartScn(currentState); break;
case START_SCN_RESPONSE_SUCCESS: doStartScnResponseSuccess(currentState); break;
case REQUEST_TARGET_SCN: doRequestTargetScn(currentState); break;
case TARGET_SCN_RESPONSE_SUCCESS: doTargetScnResponseSuccess(currentState); break;
// no need to distinguish snapshot and catchup because ckpt has it already
case REQUEST_STREAM: doRequestBootstrapStream(currentState); break;
case STREAM_REQUEST_SUCCESS: doReadBootstrapEvents(currentState); break;
case STREAM_RESPONSE_DONE: doStreamResponseDone(currentState); break;
case STREAM_REQUEST_ERROR: processStreamRequestError(currentState); break;
case STREAM_RESPONSE_ERROR: processStreamResponseError(currentState); break;
case START_SCN_REQUEST_ERROR: processStartScnRequestError(currentState); break;
case START_SCN_RESPONSE_ERROR: processStartScnResponseError(currentState); break;
case TARGET_SCN_REQUEST_ERROR: processTargetScnRequestError(currentState); break;
case TARGET_SCN_RESPONSE_ERROR: processTargetScnResponseError(currentState); break;
default:
{
_log.error("Unknown state in BootstrapPullThread: " + currentState.getStateId());
success = false;
break;
}
}
}
}
else if (message instanceof CheckpointMessage)
{
CheckpointMessage cpMessage = (CheckpointMessage)message;
switch (cpMessage.getTypeId())
{
case SET_CHECKPOINT: doSetResumeCheckpoint(cpMessage); break;
default:
{
_log.error("Unkown CheckpointMessage in BootstrapPullThread: " + cpMessage.getTypeId());
success = false;
break;
}
}
}
else if (message instanceof SourcesMessage)
{
SourcesMessage sourcesMessage = (SourcesMessage)message;
switch (sourcesMessage.getTypeId())
{
case SET_SOURCES_IDS: doSetSourcesIds(sourcesMessage); break;
case SET_SOURCES_SCHEMAS: doSetSourcesSchemas(sourcesMessage); break;
default:
{
_log.error("Unkown CheckpointMessage in BootstrapPullThread: " + sourcesMessage.getTypeId());
success = false;
break;
}
}
}
else
{
success = super.executeAndChangeState(message);
}
return success;
}
private void doSetSourcesSchemas(SourcesMessage sourcesMessage)
{
if (null != _currentState.getSourcesSchemas())
{
final Set<Long> newIds = sourcesMessage.getSourcesSchemas().keySet();
final Set<Long> curIds = _currentState.getSourcesSchemas().keySet();
if (! newIds.containsAll(curIds))
{
String msg = "Expected schemas for sources " + curIds + "; got: " + newIds;
_log.error(msg);
_currentState.switchToClosed();
enqueueMessage(LifecycleMessage.createSuspendOnErroMessage(new DatabusException(msg)));
return;
}
}
_currentState.setSourcesSchemas(sourcesMessage.getSourcesSchemas());
_sourcesConn.getBootstrapDispatcher().enqueueMessage(sourcesMessage);
}
private void doSetSourcesIds(SourcesMessage sourcesMessage)
{
// sourcesMessage.getSources() has the sources that the relay returned. The sources call response handler
// in the relay puller has already verified that all the subscriptions are covered by the list of sources
// returned from the relay (see RelayPullThread.buildSubsList()
_currentState.setSourcesIds(sourcesMessage.getSources());
_currentState.setSourcesIdListString(sourcesMessage.getSourcesIdListString());
_sourcesConn.getBootstrapDispatcher().enqueueMessage(sourcesMessage);
}
private void doSetResumeCheckpoint(CheckpointMessage cpMessage)
{
_resumeCkpt = cpMessage.getCheckpoint();
if (null != _resumeCkpt)
{
DbusEventInternalReadable cpEvent = getEventFactory().createCheckpointEvent(_resumeCkpt);
boolean success;
try
{
success = _currentState.getDataEventsBuffer().injectEvent(cpEvent);
}
catch (InvalidEventException e)
{
_log.error("unable to create checkpoint event for checkpoint " + _resumeCkpt + "; error: " + e, e);
success = false;
}
if (!success)
{
_log.error("Unable to write bootstrap phase marker");
}
}
_log.info("resume checkpoint: " + _resumeCkpt);
}
/**
* Invoked when a LifeCycle message of type "START" is received by bootstrap puller thread
* as defined in AbstractActorMessageQueue#executeAndChangeState(Object)
*
* 1. Acquire lock for Databus V3 bootstrap
* 2. Invoke same method on super class
* 3. Clear and switch state-machine to start choosing a server(relay) to connected to
*/
@Override
protected void doStart(LifecycleMessage lcMessage)
{
lockV3Bootstrap();
super.doStart(lcMessage);
_currentState.clearBootstrapState();
_currentState.switchToPickServer();
enqueueMessage(_currentState);
}
/**
* Invoked when a LifeCycle message of type "RESUME" is received by bootstrap puller thread
* as defined in AbstractActorMessageQueue#executeAndChangeState(Object)
*
* 1. Acquire lock for Databus V3 bootstrap
* 2. Invoke same method in super class
*/
@Override
protected void doResume(LifecycleMessage lcMessage)
{
lockV3Bootstrap();
super.doResume(lcMessage);
}
/**
* Invoked when a LifeCycle message of type "SHUTDOWN" is received by bootstrap puller thread
* as defined in AbstractActorMessageQueue#executeAndChangeState(Object)
*
* 1. Release lock for Databus V3 bootstrap
* 2. Invoke same method in super class
* 3. The currently open connection to server (relay) is tracked as we want "sticky" behavior, meaning
* the ability to be able to connect to the previously connected server. Close the connection if open.
*/
// TODO: seems misleading; stickiness not really achieved with _lastOpenConnection (which is good, since
// it's closed and forgotten here) but rather with lastReadBS in doPickBootstrapServer()
@Override
protected void onShutdown()
{
try
{
if (null != _lastOpenConnection)
{
_log.info("closing open connection");
_lastOpenConnection.close();
_lastOpenConnection = null;
}
}
finally
{
unlockV3Bootstrap(true);
}
_log.info("shutdown complete.");
}
/**
* Invoked when a LifeCycle message of type "PAUSE" is received by bootstrap puller thread
* as defined in AbstractActorMessageQueue#executeAndChangeState(Object)
*
* 1. Release lock for Databus V3 bootstrap
* 2. Invoke same method in super class
*/
@Override
protected void doPause(LifecycleMessage lcMessage)
{
try
{
super.doPause(lcMessage);
}
finally
{
unlockV3Bootstrap();
}
}
/**
* Invoked when a LifeCycle message of type "SUSPEND_ON_ERROR" is received by bootstrap puller thread
* as defined in AbstractActorMessageQueue#executeAndChangeState(Object)
*
* 1. Invoke same method in super class
* 2. Send an "I'm dead" heartbeat value as a failsafe
* 3. Release lock for Databus V3 bootstrap
*/
@Override
protected void doSuspendOnError(LifecycleMessage lcMessage)
{
try
{
super.doSuspendOnError(lcMessage);
sendHeartbeat(_sourcesConn.getUnifiedClientStats(), -1);
}
finally
{
unlockV3Bootstrap();
}
}
/**
* This method is not to be confused with the doResume method. The latter is invoked
* on a LifeCycleMessage. This method is invoked when a RESUME message is received in
* one of the inner workflows.
*/
@Override
protected void onResume()
{
_currentState.switchToPickServer();
enqueueMessage(_currentState);
}
protected void doPickBootstrapServer(ConnectionState curState)
{
int serversNum = _servers.size();
if (0 == serversNum)
{
//enqueueMessage(LifecycleMessage.createSuspendOnErroMessage(
// new DatabusException("No bootstrap services specified")));
_sourcesConn.getConnectionStatus().suspendOnError(new DatabusException("No bootstrap services specified"));
return;
}
if (null == _resumeCkpt)
{
_sourcesConn.getConnectionStatus().suspendOnError(new DatabusException("Bootstrapping checkpoint is not set!"));
return;
}
boolean restartBootstrap = false;
String bsServerInfo = _resumeCkpt.getBootstrapServerInfo();
ServerInfo lastReadBS = null;
if ( null != bsServerInfo)
{
try
{
lastReadBS = ServerInfo.buildServerInfoFromHostPort(bsServerInfo, DbusConstants.HOSTPORT_DELIMITER);
} catch(Exception ex) {
_log.error("Unable to fetch bootstrap serverInfo from checkpoint, ServerInfo :" + bsServerInfo, ex);
}
}
if ( null == lastReadBS)
restartBootstrap = true;
int retriesLeft = 0;
DatabusBootstrapConnection bootstrapConn = null;
ServerInfo serverInfo = lastReadBS;
if ( !restartBootstrap )
{
//attempt to reconnect to the last used bootstrap server
while (null == bootstrapConn && (retriesLeft = _retriesBeforeCkptCleanup.getRemainingRetriesNum()) >= 0
&& !checkForShutdownRequest())
{
_log.info("Retry picking last used bootstrap server :" + serverInfo +
"; retries left:" + retriesLeft);
if (lastReadBS.equals(_curServer) ) // if it is new server do not sleep?
_retriesBeforeCkptCleanup.backoffAndSleep();
try
{
bootstrapConn = _sourcesConn.getBootstrapConnFactory().createConnection(serverInfo, this,
_remoteExceptionHandler);
_log.info("picked last used bootstrap server:" + serverInfo);
}
catch (Exception e)
{
_log.error("Unable to get connection to bootstrap server:" + serverInfo, e);
}
}
if ((null == bootstrapConn) && (_retriesBeforeCkptCleanup.getRemainingRetriesNum() < 0))
{
_log.info("Exhausted retrying the same bootstrap server :" + lastReadBS);
}
}
if(checkForShutdownRequest()) {
_log.info("Shutting down bootstrap");
return;
}
Random rng = new Random();
if ( null == bootstrapConn)
{
_log.info("Restarting bootstrap as client might be getting bootstrap data from different server instance !!");
_log.info("Old Checkpoint :" + _resumeCkpt);
curState.getBstCheckpointHandler().resetForServerChange(_resumeCkpt);
_log.info("New Checkpoint :" + _resumeCkpt);
_retriesBeforeCkptCleanup.reset();
} else {
_curServer = serverInfo;
}
while ((null == bootstrapConn) && (retriesLeft = _status.getRetriesLeft()) >= 0 &&
!checkForShutdownRequest())
{
_log.info("picking a bootstrap server; retries left:" + retriesLeft);
backoffOnPullError();
_curServerIdx = (_curServerIdx < 0) ? rng.nextInt(serversNum)
: (_curServerIdx + 1) % serversNum;
Iterator<ServerInfo> setIter = _servers.iterator();
for (int i = 0; i <= _curServerIdx; ++i) serverInfo = setIter.next();
_curServer = serverInfo;
try
{
bootstrapConn = _sourcesConn.getBootstrapConnFactory().createConnection(serverInfo, this, _remoteExceptionHandler);
_log.info("picked a bootstrap server:" + serverInfo.toSimpleString());
}
catch (Exception e)
{
_log.error("Unable to get connection to bootstrap server:" + serverInfo, e);
}
}
/*
* Close the old bootstrap Connection
*/
DatabusBootstrapConnection oldBootstrapConn = curState.getBootstrapConnection();
if ( null != oldBootstrapConn)
resetConnectionAndSetFlag();
_lastOpenConnection = bootstrapConn;
if (checkForShutdownRequest()) return;
if (null == bootstrapConn)
{
_log.error("bootstrap server retries exhausted");
enqueueMessage(LifecycleMessage.createSuspendOnErroMessage(new DatabusException("bootstrap server retries exhausted")));
return;
}
sendHeartbeat(_sourcesConn.getUnifiedClientStats());
curState.bootstrapServerSelected(serverInfo.getAddress(), bootstrapConn, _curServer);
//determine what to do next based on the current checkpoint
_log.info("resuming bootstrap from checkpoint: " + _resumeCkpt);
curState.setCheckpoint(_resumeCkpt);
determineNextStateFromCheckpoint(curState);
enqueueMessage(curState);
}
private void doRequestTargetScn(ConnectionState curState)
{
_log.debug("Sending /targetScn request");
curState.switchToTargetScnRequestSent();
sendHeartbeat(_sourcesConn.getUnifiedClientStats());
curState.getBootstrapConnection().requestTargetScn(curState.getCheckpoint(), curState);
}
protected void doTargetScnResponseSuccess(ConnectionState curState)
{
if (toTearConnAfterHandlingResponse())
{
tearConnectionAndEnqueuePickServer();
}
else
{
final Checkpoint cp = curState.getCheckpoint();
curState.getBstCheckpointHandler().advanceAfterSnapshotPhase(cp);
curState.getBstCheckpointHandler().advanceAfterTargetScn(cp);
curState.switchToRequestStream(curState.getCheckpoint());
enqueueMessage(curState);
}
}
private void doRequestStartScn(ConnectionState curState)
{
_log.debug("Sending /startScn request");
String sourceNames = curState.getSourcesNameList();
curState.switchToStartScnRequestSent();
sendHeartbeat(_sourcesConn.getUnifiedClientStats());
curState.getBootstrapConnection().requestStartScn(curState.getCheckpoint(), curState, sourceNames);
}
private void doStartScnResponseSuccess(ConnectionState curState)
{
if (toTearConnAfterHandlingResponse())
{
tearConnectionAndEnqueuePickServer();
}
else
{
ServerInfo bsServerInfo = curState.getCurrentBSServerInfo();
if ( null == bsServerInfo)
{
String msg = "Bootstrap Server did not provide its server info in StartSCN !! Switching to PICK_SERVER. CurrentServer :" + _curServer;
_log.error(msg);
curState.switchToStartScnResponseError();
}
else if (! bsServerInfo.equals(_curServer)){
// Possible for VIP case
_log.info("Bootstrap server responded and current server does not match. Switching to Pick Server !! curServer: "
+ _curServer + ", Responded Server :" + bsServerInfo);
_log.info("Checkpoint before clearing :" + _resumeCkpt);
String bsServerInfoStr = _resumeCkpt.getBootstrapServerInfo();
final Long startScn = _resumeCkpt.getBootstrapStartScn();
curState.getBstCheckpointHandler().resetForServerChange(_resumeCkpt);
curState.getBstCheckpointHandler().setStartScnAfterServerChange(_resumeCkpt, startScn);
_resumeCkpt.setBootstrapServerInfo(bsServerInfoStr);
_log.info("Checkpoint after clearing :" + _resumeCkpt);
curState.switchToPickServer();
} else {
curState.switchToRequestStream(curState.getCheckpoint());
}
enqueueMessage(curState);
}
}
protected void doRequestBootstrapStream(ConnectionState curState)
{
boolean debugEnabled = _log.isDebugEnabled();
if (debugEnabled) _log.debug("Checking for free space");
//curState.getDataEventsBuffer().waitForFreeSpace(FREE_BUFFER_THRESHOLD);
int freeBufferThreshold=(int)(_sourcesConn.getConnectionConfig().getFreeBufferThreshold() *
100.0 / _pullerBufferUtilizationPct);
int freeSpace = curState.getDataEventsBuffer().getBufferFreeReadSpace();
if (freeSpace >= freeBufferThreshold)
{
Checkpoint cp = curState.getCheckpoint();
if (debugEnabled) _log.debug("Checkpoint at RequestBootstrapData: " + cp.toString());
_log.debug("Sending /bootstrap request");
Map<String, IdNamePair> srcNameMap = curState.getSourcesNameMap();
String curSrcName = null;
if (cp.getConsumptionMode() == DbusClientMode.BOOTSTRAP_SNAPSHOT)
{
curSrcName = cp.getSnapshotSource();
} else {
curSrcName = cp.getCatchupSource();
}
if ( null == _bootstrapFilter)
{
_bootstrapFilter = new DbusKeyCompositeFilter();
Map<String, IdNamePair> srcNameIdMap = curState.getSourcesNameMap();
for (DbusKeyCompositeFilterConfig conf : _bootstrapFilterConfigs)
{
Map<String, KeyFilterConfigHolder> cMap = conf.getConfigMap();
Map<Long, KeyFilterConfigHolder> fConfMap = new HashMap<Long, KeyFilterConfigHolder>();
for ( Entry<String, KeyFilterConfigHolder> e : cMap.entrySet())
{
IdNamePair idName = srcNameIdMap.get(e.getKey());
if ( null != idName)
{
fConfMap.put(idName.getId(),e.getValue());
}
}
_bootstrapFilter.merge(new DbusKeyCompositeFilter(fConfMap));
}
_bootstrapFilter.dedupe();
}
DbusKeyFilter filter = null;
IdNamePair srcEntry = srcNameMap.get(curSrcName);
if ( null != srcEntry)
{
Map<Long, DbusKeyFilter> fMap = _bootstrapFilter.getFilterMap();
if ( null != fMap)
filter = fMap.get(srcEntry.getId());
}
int fetchSize = (int)((curState.getDataEventsBuffer().getBufferFreeReadSpace() / 100.0) *
_pullerBufferUtilizationPct);
fetchSize = Math.max(freeBufferThreshold, fetchSize);
curState.switchToStreamRequestSent();
sendHeartbeat(_sourcesConn.getUnifiedClientStats());
curState.getBootstrapConnection().requestStream(
curState.getSourcesIdListString(),
filter,
fetchSize,
cp, curState);
}
else
{
try
{
Thread.sleep(50);
}
catch (InterruptedException ie) {}
enqueueMessage(curState);
}
}
protected void doReadBootstrapEvents(ConnectionState curState)
{
boolean success = true;
boolean debugEnabled = _log.isDebugEnabled();
boolean enqueueMessage = true;
try
{
Checkpoint cp = curState.getCheckpoint();
DbusEventBuffer eventBuffer = curState.getDataEventsBuffer();
if (debugEnabled) _log.debug("Sending bootstrap events to buffer");
//eventBuffer.startEvents();
DbusEventInternalReadable cpEvent = getEventFactory().createCheckpointEvent(cp);
byte[] cpEventBytes = new byte[cpEvent.size()];
if (debugEnabled)
{
_log.debug("checkpoint event size: " + cpEventBytes.length);
_log.debug("checkpoint event:" + cpEvent.toString());
}
cpEvent.getRawBytes().get(cpEventBytes);
ByteArrayInputStream cpIs = new ByteArrayInputStream(cpEventBytes);
ReadableByteChannel cpRbc = Channels.newChannel(cpIs);
UnifiedClientStats unifiedClientStats = _sourcesConn.getUnifiedClientStats();
sendHeartbeat(unifiedClientStats);
int ecnt = eventBuffer.readEvents(cpRbc);
success = (ecnt > 0);
if (!success)
{
_log.error("Unable to write bootstrap phase marker");
} else {
ChunkedBodyReadableByteChannel readChannel = curState.getReadChannel();
String remoteErrorName = RemoteExceptionHandler.getExceptionName(readChannel);
Throwable remoteError = _remoteExceptionHandler.getException(readChannel);
if (null != remoteError &&
remoteError instanceof BootstrapDatabaseTooOldException)
{
_log.error("Bootstrap database is too old!");
_remoteExceptionHandler.handleException(remoteError);
curState.switchToStreamResponseError();
}
else if (null != remoteErrorName)
{
//remote processing error
_log.error("read events error: " + RemoteExceptionHandler.getExceptionMessage(readChannel));
curState.switchToStreamResponseError();
}
else
{
sendHeartbeat(unifiedClientStats);
int eventsNum = eventBuffer.readEvents(readChannel, curState.getListeners(),
_sourcesConn.getBootstrapEventsStatsCollector());
if (eventsNum == 0 &&
_remoteExceptionHandler.getPendingEventSize(readChannel) > eventBuffer.getMaxReadBufferCapacity())
{
String err = "ReadBuffer max capacity(" + eventBuffer.getMaxReadBufferCapacity() +
") is less than event size(" +
_remoteExceptionHandler.getPendingEventSize(readChannel) +
"). Increase databus.client.connectionDefaults.bstEventBuffer.maxEventSize and restart.";
_log.fatal(err);
enqueueMessage(LifecycleMessage.createSuspendOnErroMessage(new PendingEventTooLargeException(err)));
return;
}
else
{
resetServerRetries();
if (debugEnabled) _log.debug("Sending events to buffer");
numEventsInCurrentState += eventsNum;
_log.info("Bootstrap events read so far: " + numEventsInCurrentState);
String status = readChannel.getMetadata("PhaseCompleted");
final BootstrapCheckpointHandler ckptHandler = curState.getBstCheckpointHandler();
if (status != null)
{ // set status in checkpoint to indicate that we are done with the current source
if (cp.getConsumptionMode() == DbusClientMode.BOOTSTRAP_CATCHUP)
{
ckptHandler.finalizeCatchupPhase(cp);
}
else if (cp.getConsumptionMode() == DbusClientMode.BOOTSTRAP_SNAPSHOT)
{
ckptHandler.finalizeSnapshotPhase(cp);
}
else
{
throw new RuntimeException("Invalid bootstrap phase: " + cp.getConsumptionMode());
}
_log.info("Bootstrap events read :" + numEventsInCurrentState + " during phase:"
+ cp.getConsumptionMode() + " [" + cp.getBootstrapSnapshotSourceIndex()
+ "," + cp.getBootstrapCatchupSourceIndex() + "]");
numEventsInCurrentState = 0;
}
else
{ // keep on reading more for the given snapshot
// question: how is snapshotOffset maintained in ckpt
if (eventsNum > 0)
{
cp.bootstrapCheckPoint();
}
}
curState.switchToStreamResponseDone();
}
}
}
}
catch (InterruptedException ie)
{
_log.error("interupted", ie);
success = false;
}
catch (InvalidEventException e)
{
_log.error("error reading events from server: " + e.getMessage(), e);
success = false;
}
catch (RuntimeException e)
{
_log.error("runtime error reading events from server: " + e.getMessage(), e);
success = false;
}
if (toTearConnAfterHandlingResponse())
{
tearConnectionAndEnqueuePickServer();
enqueueMessage = false;
} else if (!success) {
curState.switchToPickServer();
}
if ( enqueueMessage )
enqueueMessage(curState);
}
protected void doStreamResponseDone(ConnectionState curState)
{
boolean debugEnabled = _log.isDebugEnabled();
Checkpoint cp = curState.getCheckpoint();
if (debugEnabled) _log.debug("Checkpoint at EventsDone: " + cp);
determineNextStateFromCheckpoint(curState);
// if we successfully got some data - reset retries counter.
_retriesBeforeCkptCleanup.reset();
enqueueMessage(curState);
}
/**
* Update and persist checkpoint at the end of bootstrap phase so that
* the online phase can continue from it.
* @param cp
* @throws IOException
*/
protected void processBootstrapComplete(Checkpoint cp, ConnectionState curState)
throws IOException, DatabusException
{
logBootstrapPhase(DbusClientMode.BOOTSTRAP_CATCHUP, cp.getBootstrapSnapshotSourceIndex(), cp.getBootstrapCatchupSourceIndex());
_log.info("Bootstrap got completed !! Checkpoint is :" + cp.toString());
/*
* DDS-989
* WindowSCN need not match the bootstrapTargetSCN always when we are catching up multiple sources.
* So set the windowSCN to be that of targetSCN as we are consistent as of targetSCN
*/
cp.setWindowScn(cp.getBootstrapTargetScn());
cp.setPrevScn(cp.getBootstrapTargetScn());
cp.setConsumptionMode(DbusClientMode.ONLINE_CONSUMPTION);
cp.resetBootstrap(); // clear Bootstrap scns for future bootstraps
DbusEventBuffer eventBuffer = curState.getDataEventsBuffer();
try
{
DbusEventInternalReadable cpEvent = getEventFactory().createCheckpointEvent(cp);
boolean success = eventBuffer.injectEvent(cpEvent);
if (!success)
{
_log.error("Unable to write bootstrap phase marker");
}
else
{
//TODO need real partition for partitioned bootstrap
DbusEventInternalReadable eopEvent = curState.createEopEvent(cp, getEventFactory());
success = eventBuffer.injectEvent(eopEvent);
if (! success)
{
_log.error("Unable to write bootstrap EOP marker");
}
}
}
catch (InvalidEventException iee)
{
_log.error("Unable to write bootstrap phase marker", iee);
}
unlockV3Bootstrap();
}
//TODO REMOVE ME
/*
private Checkpoint initCheckpointForSnapshot(Checkpoint ckpt, Long sinceScn)
{
String source = _currentState.getSourcesNames().get(ckpt.getBootstrapSnapshotSourceIndex());
if (null == source)
{
throw new RuntimeException("no sources available for snapshot");
}
ckpt.setConsumptionMode(DbusClientMode.BOOTSTRAP_SNAPSHOT);
ckpt.setSnapshotSource(source);
ckpt.startSnapShotSource();
// need to reset scn because it means the rid in the snapshot table
ckpt.setSnapshotOffset(0);
// set since scn
ckpt.setBootstrapSinceScn(sinceScn);
return ckpt;
}
*/
/*
private Checkpoint initCheckpointForCatchup(Checkpoint ckpt)
{
String source = _currentState.getSourcesNames().get(ckpt.getBootstrapCatchupSourceIndex());
if (null == source)
{
throw new RuntimeException("no sources available for catchup");
}
ckpt.setConsumptionMode(DbusClientMode.BOOTSTRAP_CATCHUP);
ckpt.setCatchupSource(source);
ckpt.startCatchupSource();
ckpt.setWindowScn(ckpt.getBootstrapStartScn());
return ckpt;
}
*/
protected void sendErrorEventToDispatcher(ConnectionState curState)
{
// TODO: add implementation after CB's branch merges (DDSDBUS-87)
}
private void logBootstrapPhase(DbusClientMode mode, int snapshotSrcId, int catchupSrcId)
{
_log.info("Bootstrap phase completed - " + mode + " [" + snapshotSrcId + ", " + catchupSrcId + "]");
}
private void processStreamRequestError(ConnectionState state)
{
//TODO add statistics (DDSDBUS-88)
if (toTearConnAfterHandlingResponse())
{
tearConnectionAndEnqueuePickServer();
} else {
state.switchToPickServer();
enqueueMessage(state);
}
}
private void processStreamResponseError(ConnectionState state)
{
//TODO add statistics (DDSDBUS-88)
if (toTearConnAfterHandlingResponse())
{
tearConnectionAndEnqueuePickServer();
} else {
state.switchToPickServer();
enqueueMessage(state);
}
}
private void processTargetScnResponseError(ConnectionState currentState)
{
//TODO add statistics (DDSDBUS-88)
if (toTearConnAfterHandlingResponse())
{
tearConnectionAndEnqueuePickServer();
} else {
currentState.switchToPickServer();
enqueueMessage(currentState);
}
}
private void processTargetScnRequestError(ConnectionState currentState)
{
//TODO add statistics (DDSDBUS-88)
if (toTearConnAfterHandlingResponse())
{
tearConnectionAndEnqueuePickServer();
} else {
currentState.switchToPickServer();
enqueueMessage(currentState);
}
}
private void processStartScnResponseError(ConnectionState currentState)
{
//TODO add statistics((DDSDBUS-88)
if (toTearConnAfterHandlingResponse())
{
tearConnectionAndEnqueuePickServer();
} else {
currentState.switchToPickServer();
enqueueMessage(currentState);
}
}
private void processStartScnRequestError(ConnectionState currentState)
{
//TODO add statistics((DDSDBUS-88)
if (toTearConnAfterHandlingResponse())
{
tearConnectionAndEnqueuePickServer();
} else {
currentState.switchToPickServer();
enqueueMessage(currentState);
}
}
@Override
protected void resetConnection()
{
DatabusServerConnection bootstrapConnection = _currentState.getBootstrapConnection();
if ( null != bootstrapConnection)
{
bootstrapConnection.close();
_currentState.setBootstrapConnection(null);
}
}
protected BackoffTimer getRetriesBeforeCkptCleanup()
{
return _retriesBeforeCkptCleanup;
}
/**
* Determines the next state based on the checkpoint. The idea is to determine where we are in the bootstrap flow and
* move to the next state.
*
* <pre>
* 1. Request startSCN (State=REQUEST_START_SCN, SNAPSHOT, !cp.isBootstrapStartScnSet())
* 2. For each snapshot source:
* 2.1. Start snapshot (State=REQUEST_STREAM, SNAPSHOT, cp.isBootstrapStartScnSet() &&
* 0 == cp.getSnapshotOffset())
* 2.2. While (! cp.isSnapShotSourceCompleted())
* 2.2.1. Continue snapshot (State=REQUEST_STREAM, SNAPSHOT, cp.isBootstrapStartScnSet() &&
* 0 < cp.getSnapshotOffset())
* 2.3. Request targetSCN (State=REQUEST_TARGET_SCN, SNAPSHOT, cp.isBootstrapStartScnSet() &&
* cp.isSnapShotSourceCompleted() && 0 == cp.getWindowOffset())
* 2.4. For each catchup source <= the snapshot source:
* 2.4.1. Start catchup (State=REQUEST_STREAM, CATCHUP, 0==cp.getWindowOffset() && handler.needsMoreCatchup())
* 2.4.2. While (! cp.isCatchupSourceCompleted())
* 2.4.2.1. Continue catchup (State=REQUEST_STREAM, CATCHUP, ! cp.isCatchupSourceCompleted())
* </pre>
* @param curState the bootstrap checkpoint
*/
private void determineNextStateFromCheckpoint(ConnectionState curState)
{
try
{
final Checkpoint cp = curState.getCheckpoint();
final BootstrapCheckpointHandler cpHandler = curState.getBstCheckpointHandler();
cpHandler.assertBootstrapCheckpoint(cp);
switch (cp.getConsumptionMode())
{
case BOOTSTRAP_SNAPSHOT:
determineNextStateFromSnapshotCheckpoint(cp, cpHandler, curState);
break;
case BOOTSTRAP_CATCHUP:
determineNextStateFromCatchupCheckpoint(cp, cpHandler, curState);
break;
default:
_log.fatal("unexpected bootstrap checkpoint type: " + cp + "; shutting down");
curState.switchToClosed();
}
}
catch (InvalidCheckpointException e)
{
_log.fatal("invalid bootstrap checkpoint:", e);
curState.switchToClosed();
}
}
/**
* Determines the next state based on snapshot the checkpoint. See comments for
* {@link #determineNextStateFromCatchupCheckpoint(Checkpoint, BootstrapCheckpointHandler, ConnectionState)
*
* @param cp the checkpoint
* @param cpHandler the handler to modify the checkpoint
* @param curState the state to modify
*/
private void determineNextStateFromSnapshotCheckpoint(Checkpoint cp,
BootstrapCheckpointHandler cpHandler,
ConnectionState curState)
{
if (!cp.isBootstrapStartScnSet())
{
//(*, !cp.isBootstrapStartScnSet()) --> (REQUEST_START_SCN)
curState.switchToRequestStartScn(cp);
}
else if (!cp.isSnapShotSourceCompleted())
{
//(*, cp.isBootstrapStartScnSet() && ! cp.isSnapShotSourceCompleted()) --> (REQUEST_STREAM)
curState.switchToRequestStream(cp);
}
else
{
//Snapshot complete -- send /targetSCN
logBootstrapPhase(DbusClientMode.BOOTSTRAP_SNAPSHOT, cp.getBootstrapSnapshotSourceIndex(),
cp.getBootstrapCatchupSourceIndex());
//cpHandler.advanceAfterSnapshotPhase(cp);
curState.switchToRequestTargetScn(cp);
}
}
/**
* Determines the next state based on catchup the checkpoint. See comments for
* {@link #determineNextStateFromCatchupCheckpoint(Checkpoint, BootstrapCheckpointHandler, ConnectionState)
* @param cp the checkpoint
* @param cpHandler the handler to modify the checkpoint
* @param curState the state to modify
*/
private void determineNextStateFromCatchupCheckpoint(Checkpoint cp,
BootstrapCheckpointHandler cpHandler,
ConnectionState curState)
{
if (!cp.isCatchupSourceCompleted())
{
//Finish current catchup source
curState.switchToRequestStream(cp);
}
else
{
logBootstrapPhase(DbusClientMode.BOOTSTRAP_CATCHUP, cp.getBootstrapSnapshotSourceIndex(), cp.getBootstrapCatchupSourceIndex());
cpHandler.advanceAfterCatchupPhase(cp);
if (cpHandler.needsMoreCatchup(cp))
{
//Current catchup source is done but there are more
curState.switchToRequestStream(cp);
}
else if (cpHandler.needsMoreSnapshot(cp))
{
//All catchup sources are done, try next snapshot source
curState.switchToRequestStream(cp);
}
else
{
//no snapshot or catchup source left -- bootstrap complete
// write endOfPeriodMarker to trigger end sequence callback for bootstrap
DbusEventInternalReadable eopEvent =
getEventFactory().createLongKeyEOPEvent(cp.getBootstrapTargetScn(), (short) 0);
try
{
boolean success = curState.getDataEventsBuffer().injectEvent(eopEvent);
if (success)
{
// persist the checkpoint so BootstrapPullThread will get it and continue streaming
try
{
processBootstrapComplete(cp, curState);
curState.switchToBootstrapDone();
}
catch (IOException e)
{
_log.error("Unable to persist checkpoint at the end of bootstrap", e);
curState.switchToPickServer();
}
catch (DatabusException e)
{
_log.error("Unable to complete bootstrap", e);
curState.switchToPickServer();
}
}
else
{
_log.error("Unable to write bootstrap EOP marker");
curState.switchToPickServer();
}
}
catch (InvalidEventException e1)
{
_log.error("Unable to write bootstrap EOP marker", e1);
}
}
}
}
/**
* A method to safely acquire a lock on underlying re-entrant lock to serialize bootstrap
* across partitions
* The method is a no-op is the lock has *already* been acquired by current thread
*/
private void lockV3Bootstrap()
{
if (null != _v3BootstrapLock)
{
if (_v3BootstrapLock.isHeldByCurrentThread())
{
_log.warn("lockV3Bootstrap is a no-op as the thread is already owner of bootstrap lock. Lock state = " +
_v3BootstrapLock.toString());
return;
}
_log.info("Waiting for bootstrap lock " + toString());
_v3BootstrapLock.lock();
_log.info("Obtained the bootstrap lock " + toString());
}
}
/**
* A method to safely release a lock on underlying re-entrant lock to serialize bootstrap
* across partitions.
* The method is a no-op is the lock has *not* been acquired by current thread
*
* The shutdown flag is used to determine if not possessing the lock should be logged as a warning (rather than info)
* This is because in normal processing:
* Lock is acquired in doStart(), and relinquished in processBootstrapComplete(). By the time, shutdown is invoked,
* the lock is not owned by current thread.
*
*/
private void unlockV3Bootstrap(boolean shutdownCase)
{
if (null != _v3BootstrapLock)
{
// If the lock is not held, invoking an unlock throws an IllegalStateMonitorException
// Check for this case
if (! _v3BootstrapLock.isHeldByCurrentThread())
{
String errMsg = "unlockV3Bootstrap is a no-op as current thread is NOT owner of bootstrap lock. Lock state = " +
_v3BootstrapLock.toString();
if (shutdownCase)
{
_log.info(errMsg);
}
else
{
_log.warn(errMsg);
}
return;
}
_v3BootstrapLock.unlock();
_log.info("Unlocked BootstrapPuller " + this.toString());
}
}
/**
* This unlock method is normally invoked, except in case of shutdown when we want some variation in how we log messages
*/
private void unlockV3Bootstrap()
{
unlockV3Bootstrap(false);
}
protected ReentrantLock getV3BootstrapLock()
{
return _v3BootstrapLock;
}
}