package tcg.syscontrol;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.lang.reflect.Method;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Properties;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.apache.log4j.NDC;
import tcg.syscontrol.cos.CosFailedToRegisterException;
import tcg.syscontrol.cos.CosIndexOutOfBoundException;
import tcg.syscontrol.cos.CosLogLevelEnum;
import tcg.syscontrol.cos.CosOperationModeEnum;
import tcg.syscontrol.cos.CosOperationModeEnumHolder;
import tcg.syscontrol.cos.CosProcessDataStruct;
import tcg.syscontrol.cos.CosProcessRunningException;
import tcg.syscontrol.cos.CosProcessRuntimeDataStruct;
import tcg.syscontrol.cos.CosProcessStatusEnum;
import tcg.syscontrol.cos.CosProcessTypeEnum;
import tcg.syscontrol.cos.CosRunParamStruct;
import tcg.syscontrol.cos.CosTerminationCodeEnum;
import tcg.syscontrol.cos.CosUnknownProcessException;
import tcg.syscontrol.cos.ICosManagedProcess;
import tcg.syscontrol.cos.ICosManagedProcessHelper;
import tcg.syscontrol.cos.ICosMonitoredThread;
import tcg.syscontrol.cos.ICosProcessManager;
import tcg.syscontrol.cos.ICosProcessManagerHelper;
import tcg.syscontrol.cos.ICosProcessManagerPOA;
import tcg.syscontrol.cos.DEFAULT_MGR_PORT;
import tcg.syscontrol.cos.LOG_FILE_KEY;
import tcg.syscontrol.cos.LOG_LEVEL_KEY;
import tcg.syscontrol.cos.MANAGER_PORT_KEY;
import tcg.syscontrol.cos.STR_PROCESS_MANAGER;
import tcg.common.CorbaHelper;
import tcg.common.CorbaManager;
import tcg.common.DatabaseManager;
import tcg.common.Utilities;
import tcg.common.LoggerManager;
import tcg.common.DatabaseManager.DatabaseType;
import tcg.common.util.ProcessExecution;
public class ProcessManager extends ICosProcessManagerPOA
{
private static final String VERSION = "02.00 (20091115)";
private static final int DEF_THREAD_WAIT_MSEC = 1000; //in msec
private static final int DEF_HEARTBEAT_RATE_MSEC = 1000; //in msec
private static final int DEF_MAX_RESTART = 10;
private static final int DEF_POLL_RATE_MSEC = 1000;
private static final int DEF_PEER_SYNC_RATE_MSEC = 1000;
//default timeout value
private static final int DEF_START_TIMEOUT_SEC = 60;
private static final int DEF_CONTROL_TIMEOUT_SEC = 60;
private static final int DEF_MONITOR_TIMEOUT_SEC = 60;
private static final int DEF_TERMINATE_TIMEOUT_SEC = 60;
//configuration file
private static final String DEF_CONFIG_FILE = "scada.properties";
//database table
private static final String DEF_TABLE_MANAGER = "SC_PROCESS_MANAGER";
private static final String DEF_TABLE_PROCESSLIST = "SC_MANAGED_PROCESS";
private static final String DEF_TABLE_PROCESSCONFIG = "SC_PROCESS_CONFIG";
//general error threshold
private static final int DEF_ERROR_THRESHOLD = 3;
//how many times I attempt to do failed corba operation
//NOTE: internally, jacorb has tried several times before reporting failure on a
// corba operation. so we do not need to try multiple times anymore
private static final int CORBA_ERROR_THRESHOLD = 1;
protected static Logger logger_ = LoggerManager.getLogger(ProcessManager.class.toString());
//list of managed process
private HashMap<String, ManagedProcessStruct> managedProcesses_
= new HashMap<String, ManagedProcessStruct>();
//corba server lookup
private HashMap<String, CorbaServerStruct> corbaServers_
= new HashMap<String, CorbaServerStruct>();
//active peer lookup
private HashMap<String, PeerManagerStruct> activePeers_
= new HashMap<String, PeerManagerStruct>();
//server key to uniquely identify this corba server
private String serverKey_ = STR_PROCESS_MANAGER.value;
//runtime configuration
private String name_ = "";
private int portNo_ = 0;
private int maxRestart_ = DEF_MAX_RESTART;
private int pollRateMillis_ = DEF_POLL_RATE_MSEC;
private String configFile_ = "";
private Properties props_ = new Properties();
private String logdir_ = "";
//how often we synch the status with managed process
//this would ensure that the internal state is consistent with the actual process state
//even if the managed process is not very compliance with the process manager.
private int pollSyncInterval_ = 60;
//runtime parameters that we need to pass to managed process
private Properties runtimeProps_ = new Properties();
//configured peers from database
private HashMap<String, PeerManagerStruct> configuredPeers_
= new HashMap<String, PeerManagerStruct>();
//process state
private CosProcessStatusEnum state_ = CosProcessStatusEnum.StatUnstarted;
//worker thread
private ProcessPollerThread processPoller_ = new ProcessPollerThread();
private PeerSynchThread peerSynch_ = new PeerSynchThread();
/**
* Main entry.
* @param args - list of command line arguments
*/
public static void main(String[] args)
{
//if arguments has "--version", print version number and quit
String logfile = "";
for (int i=0; i<args.length; i++)
{
if (args[i].equalsIgnoreCase("--version") || args[i].equalsIgnoreCase("-V"))
{
printVersion();
return;
}
else if (args[i].equalsIgnoreCase("--help") || args[i].equalsIgnoreCase("-h"))
{
printUsage();
return;
}
else if (args[i].equalsIgnoreCase("--logdir") || args[i].equalsIgnoreCase("-l"))
{
if (++i<args.length)
{
logfile = args[i] + File.separatorChar + "processmanager.log";
}
}
}
//create the Scheduling Agent instance
ProcessManager instance = new ProcessManager();
//reset logging
if (logfile.length() > 0)
{
LoggerManager.setLogFile(logfile);
}
else
{
logfile = Utilities.getCurrentDir() + File.separatorChar + "processmanager.log";
LoggerManager.setLogFile(logfile);
}
logger_.info("---- Process Manager starting ----");
//parse command line arguments
if (!instance.initialize(args))
{
logger_.error("Failed to initialize. Quitting!");
logger_.info("---- Process Manager has shut down ----");
return;
}
//Print out all configuration as visual feedback
for(Enumeration<?> enumeration = instance.props_.propertyNames();
enumeration.hasMoreElements();)
{
String str = (String)enumeration.nextElement();
//except if it contain the word "password" in its name
if (str.toLowerCase().contains("password"))
{
logger_.info("Property " + str + " = " +
instance.props_.getProperty(str).replaceAll(".", "*"));
}
else
{
logger_.info("Property " + str + " = " +
instance.props_.getProperty(str));
}
}
//create a shutdown hook to catch CTRL+C and other abrupt termination
ShutdownHook hook = new ShutdownHook(instance);
try
{
Runtime.getRuntime().addShutdownHook(hook);
}
catch(Exception ex)
{
logger_.warn("Can not install shutdown hook. Exception: " + ex.toString());
}
//run the process manager
logger_.info("---- Process Manager is running ----");
instance.run();
//shutting down
logger_.info("---- Process Manager is shutting down ----");
//clean up
//Nothing
//done
logger_.info("---- Process Manager has shut down ----");
}
protected void shutdown()
{
//called when the jvm is shutting down
//stop all managed processes
stop_managed_processes();
}
private static void printVersion()
{
System.out.println("Process Manager Version " + VERSION);
}
private static final void printUsage()
{
System.out.println("Process Manager Version " + VERSION);
System.out.println("");
System.out.println("Command Line Parameters: ");
System.out.println(" -n | --name <hostname> Process Manager hostname");
System.out.println(" -l | --logdir <directory> Log directory");
System.out.println(" -f | --config-file <config-file> Configuration file");
System.out.println(" -cp | --corba-port <port-no> Corba port to bind to");
System.out.println(" -pn | --peer-name <host-name> Peer hostname (if running as stand-alone)");
System.out.println(" -pp | --peer-port <port-no> Peer port number (if running as stand-alone)");
System.out.println(" -h | --help Print out this help");
System.out.println(" -v | --version Print out program version");
System.out.println("");
System.out.println("Java System Properties (java -Dname=value option): ");
System.out.println("");
System.out.println("Other Java Properties (Configuration file or System properties): ");
System.out.println(" tcg.db.type Database type");
System.out.println(" tcg.db.name Database TNS name");
System.out.println(" tcg.db.user Database username");
System.out.println(" tcg.db.password Database password");
System.out.println(" tcg.db.encyrpted Whether the password is encrypted");
System.out.println(" tcg.event.server1 Primary event server");
System.out.println(" tcg.event.server2 Secondary event server");
System.out.println("");
System.out.println("Command line parameters will override java properties and configuration file value.");
System.out.println("");
System.out.println("Component Library Information:");
System.out.println("\t - Quartz Scheduler Ver. 1.6.0");
System.out.println("\t - Log4J Ver. 1.2.12");
System.out.println("\t - Oracle JDBC Ver. 10.2");
System.out.println("\t - JacORB Ver. 2.3");
System.out.println("\t - Avalon Framework Ver. 4.1.5");
System.out.println("");
System.out.println("Other Component Information:");
System.out.println("");
}
/** -------------------------------------------------------------------------------- *
* PROCESS MANAGER OPERATION *
* -------------------------------------------------------------------------------- **/
/*
* (non-Javadoc)
* @see tcg.syscontrol.cos.ICosProcessManagerOperations#cosTerminate()
*/
public void cosTerminate()
{
stop();
}
/*
* (non-Javadoc)
* @see tcg.syscontrol.cos.ICosProcessManagerOperations#cosGetProcessManagerName()
*/
public String cosGetProcessManagerName()
{
return name_;
}
/*
* (non-Javadoc)
* @see tcg.syscontrol.cos.ICosProcessManagerOperations#cosSetLogLevel(tcg.syscontrol.cos.CosLogLevelEnum)
*/
public void cosSetLogLevel(CosLogLevelEnum loglevel)
{
logger_.info("Setting new global log level to " + CorbaHelper.LogLevelToString(loglevel));
//convert to log4j loglevel
Level level = null;
switch(loglevel.value())
{
case CosLogLevelEnum._LogTrace:
level = Level.TRACE;
break;
case CosLogLevelEnum._LogDebug:
level = Level.DEBUG;
break;
case CosLogLevelEnum._LogInfo:
level = Level.INFO;
break;
case CosLogLevelEnum._LogNormal:
//default/normal logging level is INFO
level = Level.INFO;
break;
case CosLogLevelEnum._LogWarn:
level = Level.WARN;
break;
case CosLogLevelEnum._LogError:
level = Level.ERROR;
break;
case CosLogLevelEnum._LogFatal:
level = Level.FATAL;
break;
default:
level = Level.DEBUG;
}
//set the global logging level
LoggerManager.setLogLevel(level);
}
/*
* (non-Javadoc)
* @see tcg.syscontrol.cos.ICosProcessManagerOperations#cosSetLogLevelDetail(java.lang.String, java.lang.String)
*/
public void cosSetLogLevelDetail(String logger, CosLogLevelEnum loglevel)
{
logger_.info("Setting new log level to " + CorbaHelper.LogLevelToString(loglevel)
+ " for Logger " + logger );
//convert to log4j log level
Level level = null;
switch(loglevel.value())
{
case CosLogLevelEnum._LogTrace:
level = Level.TRACE;
break;
case CosLogLevelEnum._LogDebug:
level = Level.DEBUG;
break;
case CosLogLevelEnum._LogInfo:
level = Level.INFO;
break;
case CosLogLevelEnum._LogNormal:
//default/normal logging level is INFO
level = Level.INFO;
break;
case CosLogLevelEnum._LogWarn:
level = Level.WARN;
break;
case CosLogLevelEnum._LogError:
level = Level.ERROR;
break;
case CosLogLevelEnum._LogFatal:
level = Level.FATAL;
break;
default:
level = Level.DEBUG;
}
//set the log level
LoggerManager.setLogLevelDetail(logger, level);
}
/*
* (non-Javadoc)
* @see tcg.syscontrol.cos.ICosProcessManagerOperations#cosPoll()
*/
public void cosPoll()
{
return; //just return
}
/** -------------------------------------------------------------------------------- *
* GENERAL PEER SYNCHRONIZATION *
* -------------------------------------------------------------------------------- **/
/*
* (non-Javadoc)
* @see tcg.syscontrol.cos.ICosProcessManagerOperations#cosRegisterPeer(java.lang.String, tcg.syscontrol.cos.ICosProcessManager)
*/
public void cosRegisterPeer(String peerName, ICosProcessManager p_peerManager)
throws CosFailedToRegisterException
{
logger_.debug("Peer " + peerName + " is trying to register.");
//cannot store process with empty process name
if (peerName.length() == 0)
{
throw new CosFailedToRegisterException();
}
//logging context
NDC.push(peerName);
//verbose
String iorString = CorbaManager.objectToString(p_peerManager);
logger_.trace("Peer Manager IOR: " + iorString );
if (!poll_manager(p_peerManager))
{
logger_.error("Can not poll the registering peer!!!");
NDC.pop();
throw new CosFailedToRegisterException();
}
else
{
logger_.debug("Registering peer manager is running. I can poll!");
}
//End of debugging
//critical section
PeerManagerStruct peer = null;
synchronized(activePeers_)
{
//get the current reference if any
peer = activePeers_.get(peerName);
if (peer != null)
{
//entry exist. update existing reference
peer.reference = p_peerManager;
peer.ior = iorString;
}
else
{
//does not exist. try the pre-configured list
peer = configuredPeers_.get(peerName);
if (peer != null)
{
//update existing reference
peer.reference = p_peerManager;
peer.ior = iorString;
//set it to active and move it to active peer list
peer.isActive = true;
activePeers_.put(peer.name, peer);
}
else
{
//does not exist anywhere. create a new struct.
peer = new PeerManagerStruct();
peer.name = peerName;
peer.adhoc = true;
//use heart beat rate retrieved from database configuration if is available
peer.heartbeatMillis = DEF_HEARTBEAT_RATE_MSEC;
//set the reference
peer.reference = p_peerManager;
peer.ior = iorString;
//add into the list
peer.isActive = true;
activePeers_.put(peerName, peer);
} //can not get from configured peer
} //can not get from active peer
} //end critical section
//don't synchronize with the new peer. let it synchronize with me
//notify all managed processes
//TODO
//start peer poller if it is not already running
peerSynch_.start();
NDC.pop();
logger_.debug("Peer " + peerName +" has sucessfully registered.");
}
/*
* (non-Javadoc)
* @see tcg.syscontrol.cos.ICosProcessManagerOperations#cosUnregisterPeer(java.lang.String)
*/
public void cosUnregisterPeer(String peerName)
{
logger_.debug("Peer " + peerName + " is trying to unregister.");
//logging context
NDC.push(peerName);
//critical section
PeerManagerStruct manager = null;
synchronized(activePeers_)
{
//get the current reference if any
manager = activePeers_.get(peerName);
if (manager == null)
{
logger_.warn("Not recognized peer manager name. Ignored.");
NDC.pop();
return;
}
//delete from the list
activePeers_.remove(peerName);
//if it is not adhoc peer, move it back to list of preconfigured peer list
if (!manager.adhoc)
{
manager.isActive = false;
}
} //end of critical section
//notify all managed processes
//TODO
//no need to stop peer poller if it is running. it will stop itself if there is no more peer.
NDC.pop();
logger_.debug("Peer " + peerName + " has successfully unregistered.");
}
/*
* (non-Javadoc)
* @see tcg.syscontrol.cos.ICosProcessManagerOperations#cosGetPeerManager(java.lang.String)
*/
public ICosProcessManager cosGetPeerManager(String peerName)
{
PeerManagerStruct manager = null;
if (peerName == null || peerName.length() > 0)
{
//get the first peer available
Iterator<PeerManagerStruct> it = activePeers_.values().iterator();
if (it.hasNext())
{
manager = it.next();
}
}
else
{
manager = activePeers_.get(peerName);
}
//return value
if (manager != null)
{
return manager.reference;
}
return null;
}
/*
* (non-Javadoc)
* @see tcg.syscontrol.cos.ICosProcessManagerOperations#cosSynchronizeManagedProcess(java.lang.String, java.lang.String, short, tcg.syscontrol.cos.CosProcessStatusEnum)
*/
public void cosSynchronizeManagedProcess(String peerName, String entity,
short weightage, CosProcessStatusEnum status)
{
//if it is not configure with peer or we are starting/terminating, ignore it
if (activePeers_.size() == 0 || state_ != CosProcessStatusEnum.StatRunningControl)
{
return;
}
logger_.debug("Peer " + peerName + " status change. Entity: " + entity +". Weight: " +weightage +". Status: "
+ CorbaHelper.ProcessStateToString(status) );
//logging context
NDC.push(peerName + ":" + entity);
//get the peer reference
PeerManagerStruct peer = activePeers_.get(peerName);
if (peer == null || peer.reference == null)
{
//can not find the associated peer
logger_.warn("Invalid peer name. Synchronization ignored!");
NDC.pop();
return;
}
//get the runtime info
ManagedProcessStruct process = managedProcesses_.get(entity);
if (process == null || process.reference == null)
{
//no matching managed process or not running. ignore
logger_.warn("Invalid/unknown managed process. Synchronization ignored!");
NDC.pop();
return;
}
//make the current process is running
if (!is_running_status(process.runtime.state))
{
//no matching managed process or not running. ignore
logger_.warn("Current managed process is not running. Synchronization ignored!");
NDC.pop();
return;
}
CosOperationModeEnum curmode;
switch(status.value())
{
//peer try to go into CONTROL
case CosProcessStatusEnum._StatGoingToControl:
case CosProcessStatusEnum._StatRunningControl:
//determine current mode
curmode = translate_process_status(process.runtime.state);
if (curmode == CosOperationModeEnum.OperControl)
{
//my process is currently in CONTROL
//if I have higher or the same weightage, ask peer to stand down
if (process.runtime.weightage >= weightage)
{
logger_.debug("I have higher weightage. Will ask peer process to go to MONITOR");
try
{
peer.reference.cosSetProcessOperationMode(entity, CosOperationModeEnum.OperMonitor);
peer.errorCounter = 0;
}
catch (Exception ex)
{
//ignore other than logging it
logger_.warn("Can not set peer process to go to MONITOR. Exception: "
+ ex.toString());
peer.errorCounter++;
}
}
else
{
//otherwise, ask my process to stand down
logger_.debug("Peer process has higher weightage. Will ask process to go to MONITOR");
try
{
process.reference.cosSetOperationMode(CosOperationModeEnum.OperMonitor);
}
catch (Exception ex)
{
logger_.warn("Can not get process to go to MONITOR. Exception: " + ex.toString());
}
}
}
else
{
//My process is in MONITOR but my process might have higher priority
//ignore it. otherwise, might trigger unnecesseary switching.
//besides, maybe I manually set the current operation mode to MONITOR
}
//update the active reference, if necessary
if (status == CosProcessStatusEnum.StatRunningControl)
{
logger_.trace("Changing the active reference...");
//then get the associated managed process from peer
ICosManagedProcess processRef = null;
try
{
processRef = peer.reference.cosGetManagedProcess2(entity);
peer.errorCounter = 0;
}
catch (Exception ex)
{
logger_.warn("Can not get reference to peer process. Exception: " + ex.toString());
peer.errorCounter++;
}
//the update the active reference
if (processRef != null)
{
process.activeReference = processRef;
}
} //if (status == StatRunningControl)
break;
//everything else, ask our process to take control
default:
try
{
process.reference.cosSetOperationMode(CosOperationModeEnum.OperControl);
}
catch (Exception ex)
{
logger_.warn("Can not get process to go to CONTROL. Exception: " + ex.toString());
}
}
NDC.pop();
}
/*
* (non-Javadoc)
* @see tcg.syscontrol.cos.ICosProcessManagerOperations#cosSynchronizeCorbaServer(java.lang.String, java.lang.String, tcg.syscontrol.cos.CosProcessStatusEnum)
*/
public void cosSynchronizeCorbaServer(String peerName, String uniqueKey,
CosOperationModeEnum mode)
{
//if it is not configure with peer or we are starting/terminating, ignore it
if (activePeers_.size() == 0 || state_ != CosProcessStatusEnum.StatRunningControl)
{
return;
}
logger_.debug("Peer " + peerName + " status change. Server name: " + uniqueKey +". Operation mode: "
+ CorbaHelper.OperationModeToString(mode) );
//since we do not really control a named corba server, the only real purpose of synchronization
//is to update the activeReference value.
//it follows we are only interested if peer corba server switch to CONTROL.
if (mode != CosOperationModeEnum.OperControl)
{
return;
}
//logging context
NDC.push(peerName + ":" + uniqueKey);
//get the peer reference
PeerManagerStruct peer = activePeers_.get(peerName);
if (peer == null || peer.reference == null)
{
//can not find the associated peer
logger_.warn("Invalid peer name. Synchronization ignored!");
NDC.pop();
return;
}
//get the corba server info
CorbaServerStruct server = corbaServers_.get(uniqueKey);
if (server == null || server.reference == null)
{
//no matching managed process or not running. ignore
logger_.warn("Invalid/unknown corba server. Synchronization ignored!");
NDC.pop();
return;
}
//if our corba server is also in control mode, ignore it. we prefer the local server
if (server.operationMode == CosOperationModeEnum.OperControl)
{
logger_.warn("Our corba server " + uniqueKey +" is also in CONTROL. Prefer local server."
+ " Synchronization ignored!");
NDC.pop();
return;
}
//update the active reference
logger_.trace("Changing the active reference...");
//get the associated datapoint server from peer
ICosMonitoredThread serverRef = null;
try
{
serverRef = peer.reference.cosGetCorbaServer(uniqueKey);
peer.errorCounter = 0;
}
catch(Exception ex)
{
logger_.warn("Can not get reference to peer corba server. Exception: " + ex.toString());
peer.errorCounter++;
}
//update the active reference
if (serverRef != null)
{
server.activeReference = serverRef;
}
NDC.pop();
}
/** -------------------------------------------------------------------------------- *
* MANAGED PROCESS LOOKUP *
* -------------------------------------------------------------------------------- **/
/*
* (non-Javadoc)
* @see tcg.syscontrol.cos.ICosProcessManagerOperations#cosRegisterManagedProcess(java.lang.String, tcg.syscontrol.cos.CosProcessTypeEnum, tcg.syscontrol.cos.ICosManagedProcess, int)
*/
public void cosRegisterManagedProcess(String entity, CosProcessTypeEnum processType,
ICosManagedProcess managedProcess, long processId)
throws CosProcessRunningException, CosFailedToRegisterException
{
//cannot store process with empty process name
if (entity == null || entity.length() == 0)
{
throw new CosFailedToRegisterException();
}
logger_.debug("Process " + entity + " is registering (Process ID = " + processId + ")...");
//logging context
NDC.push(entity);
//verbose
String iorString = CorbaManager.objectToString(managedProcess);
logger_.trace("Managed Process IOR: " + iorString );
if (!poll_process(managedProcess))
{
logger_.error("Can not poll the registering process!!!");
NDC.pop();
throw new CosFailedToRegisterException();
}
else
{
logger_.debug("Registering process is running. I can poll!");
}
//End of debugging
//Critical Section
ManagedProcessStruct process = null;
synchronized(managedProcesses_)
{
process = managedProcesses_.get(entity);
if (process == null)
{
//not exist. create new entry
process = new ManagedProcessStruct();
//adhoc process
process.adhoc = true;
process.runtime.entity =entity;
process.runtime.terminationCode = CosTerminationCodeEnum.TermNotKnown;
process.runtime.nStarts = 1;
process.runtime.weightage = 1;
process.runtime.logLevel = CosLogLevelEnum.LogNormal;
//set current value
process.runtime.processId = processId;
process.runtime.processType = processType;
process.runtime.state = CosProcessStatusEnum.StatStarted;
process.runtime.startDateTime = (Calendar.getInstance().getTimeInMillis() / 1000);
process.runtime.requestedState = CosProcessStatusEnum.StatStarted;
process.runtime.lastRequestDateTime = process.runtime.startDateTime;
//default timeout value
process.runtime.startTimeout = DEF_START_TIMEOUT_SEC;
process.runtime.controlTimeout = DEF_CONTROL_TIMEOUT_SEC;
process.runtime.monitorTimeout = DEF_MONITOR_TIMEOUT_SEC;
process.runtime.terminateTimeout = DEF_TERMINATE_TIMEOUT_SEC;
//store the reference
process.reference = managedProcess;
//insert into the list
managedProcesses_.put(process.entity, process);
}
else
{
//already exist. update the runtime value
//do not reset runtime loglevel. this way, any loglevel changes is persisted across restart
//update the runtime value
process.runtime.processId = processId;
process.runtime.processType = processType;
process.runtime.state = CosProcessStatusEnum.StatStarted;
//store the reference
process.reference = managedProcess;
}
//if successful, send runtime parameters
if (process != null)
{
on_managed_process_registration(process);
}
}
//perform misc tasks mostly used for error recovery/handling
if (process != null)
{
//store ior into a file
CorbaManager.persistIor(entity, process.reference);
//kill zombie process
kill_zombie_process(process);
}
NDC.pop();
}
/*
* (non-Javadoc)
* @see tcg.syscontrol.cos.ICosProcessManagerOperations#cosGetParams(java.lang.String)
*/
public CosRunParamStruct[] cosGetParams(String entity)
{
//validation
if (entity == null || entity.length() == 0)
{
return null;
}
//logging context
NDC.push(entity);
//get the process struct
ManagedProcessStruct process = managedProcesses_.get(entity);
if (process == null)
{
//invalid process
logger_.warn("Invalid entity name.");
NDC.pop();
return null;
}
//just pass it the the helper functions
CosRunParamStruct[] params = get_process_runtime_params(process);
NDC.pop();
return params;
}
/*
* (non-Javadoc)
* @see tcg.syscontrol.cos.ICosProcessManagerOperations#cosProcessTerminating(java.lang.String, tcg.syscontrol.cos.CosTerminationCodeEnum)
*/
public void cosProcessTerminating(String entity, CosTerminationCodeEnum p_code)
{
//validation
if (entity == null || entity.length() == 0)
{
return;
}
logger_.debug("Process " + entity + " is terminating...");
//logging context
NDC.push(entity);
//critical section
ManagedProcessStruct process = null;
synchronized(managedProcesses_)
{
process = managedProcesses_.get(entity);
if (process == null)
{
//invalid process
logger_.warn("Invalid entity name.");
NDC.pop();
return;
}
//update runtime info
process.runtime.terminationCode = p_code;
process.runtime.state = CosProcessStatusEnum.StatTerminating;
} //end of critical section
//inform/synchronize the peer
synchronize_managed_process(process);
//update the active reference
process.activeReference = get_active_managed_process(process);
NDC.pop();
}
/*
* (non-Javadoc)
* @see tcg.syscontrol.cos.ICosProcessManagerOperations#cosProcessStatusChanged(java.lang.String, tcg.syscontrol.cos.CosProcessStatusEnum)
*/
public void cosProcessStatusChanged(String entity, CosProcessStatusEnum status)
{
//validation
if (entity == null || entity.length() == 0)
{
return;
}
logger_.debug("Process " + entity + " change status to " + CorbaHelper.ProcessStateToString(status)
+"...");
//logging context
NDC.push(entity);
//critical section
ManagedProcessStruct process = null;
synchronized(managedProcesses_)
{
process = managedProcesses_.get(entity);
if (process == null)
{
//invalid process
logger_.warn("Invalid entity name.");
NDC.pop();
return;
}
if (process.runtime.state == status)
{
//already in the state. ignore it.
logger_.debug("Already in the expected state.");
NDC.pop();
return;
}
//update internal process state
process.runtime.state = status;
//other action
switch (status.value())
{
case CosProcessStatusEnum._StatTerminating:
//update runtime info
if (process.runtime.requestedState == CosProcessStatusEnum.StatStopped)
{
process.runtime.terminationCode = CosTerminationCodeEnum.TermRequestedTerminate;
}
else
{
process.runtime.terminationCode = CosTerminationCodeEnum.TermNotKnown;
}
//update the active reference
process.activeReference = get_active_managed_process(process);
break;
case CosProcessStatusEnum._StatStopped:
//delete the reference in the map
process.reference = null;
//if it is adhoc, delete the runtime info
if (process.adhoc)
{
managedProcesses_.remove(process);
}
else
{
//otherwise, update runtime info
process.runtime.processId = 0;
}
//remove the respective ior file
CorbaManager.removeIor(entity);
//update the active reference
process.activeReference = get_active_managed_process(process);
break;
case CosProcessStatusEnum._StatRunningControl:
//stabil state. reset the requested state
process.runtime.requestedState = status;
//update the active reference
process.activeReference = process.reference;
break;
case CosProcessStatusEnum._StatRunningMonitor:
//stabil state. reset the requested state
process.runtime.requestedState = status;
//update the active reference
process.activeReference = get_active_managed_process(process);
break;
default:
//nothing
break;
}
} //end of critical section
//inform/synchronize the peer
synchronize_managed_process(process);
NDC.pop();
}
/*
* (non-Javadoc)
* @see tcg.syscontrol.cos.ICosProcessManagerOperations#cosProcessGoingToControl(java.lang.String)
*/
public boolean cosProcessGoingToControl(String entity)
{
//validation
if (entity == null || entity.length() == 0)
{
return false;
}
logger_.debug("Process " + entity + " is asking permission to go to CONTROL...");
//logging context
NDC.push(entity);
//get the process struct
ManagedProcessStruct process = managedProcesses_.get(entity);
if (process == null)
{
//invalid process
logger_.warn("Invalid/unknown managed process. Always allow.");
NDC.pop();
return true;
}
//if the requested runtime operation mode = control, proceed
//this means the switching is requested by the manager
if (process.runtime.requestedState == CosProcessStatusEnum.StatRunningControl)
{
logger_.debug("Switching is requested by the manager. Can go to CONTROL.");
NDC.pop();
return true;
}
//if we have no peer, also always allow
if (activePeers_.size() == 0)
{
logger_.debug("No peer. Can go to CONTROL.");
NDC.pop();
return true;
}
//otherwise, need to determine the operation mode
logger_.debug("Determining operation mode for process " + entity);
CosOperationModeEnum expectedOperMode = get_expected_operation_mode(process);
if (expectedOperMode == CosOperationModeEnum.OperControl)
{
NDC.pop();
return true;
}
NDC.pop();
return false;
}
/*
* (non-Javadoc)
* @see tcg.syscontrol.cos.ICosProcessManagerOperations#cosProcessGoingToMonitor(java.lang.String)
*/
public boolean cosProcessGoingToMonitor(String entity)
{
//always allow to go to monitor
return true;
}
/*
* (non-Javadoc)
* @see tcg.syscontrol.cos.ICosProcessManagerOperations#cosGetManagedProcess(short)
*/
public ICosManagedProcess cosGetManagedProcess(short index)
throws CosIndexOutOfBoundException
{
if (index < 0 || index >= managedProcesses_.size())
throw new CosIndexOutOfBoundException();
return managedProcesses_.get(index).reference;
}
/*
* (non-Javadoc)
* @see tcg.syscontrol.cos.ICosProcessManagerOperations#cosGetManagedProcess2(java.lang.String)
*/
public ICosManagedProcess cosGetManagedProcess2(String entity)
throws CosUnknownProcessException
{
ManagedProcessStruct process = managedProcesses_.get(entity);
if (process == null)
{
throw new CosUnknownProcessException();
}
return process.reference;
}
/*
* (non-Javadoc)
* @see tcg.syscontrol.cos.ICosProcessManagerOperations#cosGetActiveManagedProcess(short)
*/
public ICosManagedProcess cosGetActiveManagedProcess(short index)
throws CosIndexOutOfBoundException
{
if (index < 0 || index >= managedProcesses_.size())
throw new CosIndexOutOfBoundException();
return managedProcesses_.get(index).activeReference;
}
/*
* (non-Javadoc)
* @see tcg.syscontrol.cos.ICosProcessManagerOperations#cosGetActiveManagedProcess2(java.lang.String)
*/
public ICosManagedProcess cosGetActiveManagedProcess2(String entity)
throws CosUnknownProcessException
{
ManagedProcessStruct process = managedProcesses_.get(entity);
if (process == null)
{
throw new CosUnknownProcessException();
}
return process.activeReference;
}
/** -------------------------------------------------------------------------------- *
* MANAGED PROCESS MONITORING *
* -------------------------------------------------------------------------------- **/
/*
* (non-Javadoc)
* @see tcg.syscontrol.cos.ICosProcessManagerOperations#cosGetNumberOfManagedProcesses()
*/
public short cosGetNumberOfManagedProcesses()
{
return (short) managedProcesses_.size();
}
/*
* (non-Javadoc)
* @see tcg.syscontrol.cos.ICosProcessManagerOperations#cosGetProcessInfo(short)
*/
public CosProcessRuntimeDataStruct cosGetProcessInfo(short index)
throws CosIndexOutOfBoundException
{
if (index < 0 || index >= managedProcesses_.size())
throw new CosIndexOutOfBoundException();
return managedProcesses_.get(index).runtime;
}
/*
* (non-Javadoc)
* @see tcg.syscontrol.cos.ICosProcessManagerOperations#cosGetProcessInfo2(java.lang.String)
*/
public CosProcessRuntimeDataStruct cosGetProcessInfo2(String entity)
throws CosUnknownProcessException
{
ManagedProcessStruct process = managedProcesses_.get(entity);
if (process == null)
{
throw new CosUnknownProcessException();
}
return process.runtime;
}
/*
* (non-Javadoc)
* @see tcg.syscontrol.cos.ICosProcessManagerOperations#cosGetProcessInfoAll()
*/
public CosProcessRuntimeDataStruct[] cosGetProcessInfoAll()
{
CosProcessRuntimeDataStruct[] retval
= new CosProcessRuntimeDataStruct[managedProcesses_.size()];
for (int i=0; i<managedProcesses_.size(); i++)
{
retval[i] = managedProcesses_.get(i).runtime;
}
return retval;
}
/*
* (non-Javadoc)
* @see tcg.syscontrol.cos.ICosProcessManagerOperations#cosGetProcessConfig(short)
*/
public CosProcessDataStruct cosGetProcessConfig(short index)
throws CosIndexOutOfBoundException
{
if (index < 0 || index >= managedProcesses_.size())
throw new CosIndexOutOfBoundException();
return managedProcesses_.get(index).config;
}
/*
* (non-Javadoc)
* @see tcg.syscontrol.cos.ICosProcessManagerOperations#cosGetProcessConfig2(java.lang.String)
*/
public CosProcessDataStruct cosGetProcessConfig2(String entity)
throws CosUnknownProcessException
{
ManagedProcessStruct process = managedProcesses_.get(entity);
if (process == null)
{
throw new CosUnknownProcessException();
}
return process.config;
}
/*
* (non-Javadoc)
* @see tcg.syscontrol.cos.ICosProcessManagerOperations#cosGetProcessStatusString(short)
*/
public String cosGetProcessStatusString(short index) throws CosIndexOutOfBoundException
{
if (index < 0 || index >= managedProcesses_.size())
throw new CosIndexOutOfBoundException();
ICosManagedProcess processRef = managedProcesses_.get(index).reference;
String status = "";
if (CorbaManager.isValidReference(processRef))
{
try
{
status = processRef.cosGetStatusString();
}
catch(Exception ex)
{
logger_.error("Can not get status string for "
+ managedProcesses_.get(index).entity
+ ". Exception: " + ex.getMessage());
}
}
return status;
}
public String cosGetProcessStatusString2(String entity) throws CosUnknownProcessException
{
ManagedProcessStruct process = managedProcesses_.get(entity);
if (process == null)
{
throw new CosUnknownProcessException();
}
ICosManagedProcess processRef = process.reference;
String status = "";
if (CorbaManager.isValidReference(processRef))
{
try
{
status = processRef.cosGetStatusString();
}
catch(Exception ex)
{
logger_.error("Can not get status string for " + entity
+ ". Exception: " + ex.getMessage());
}
}
return status;
}
public void cosStartProcess(String entity)
{
ManagedProcessStruct process = managedProcesses_.get(entity);
if (process == null)
{
return;
}
start_process(process);
}
public void cosTerminateProcess(String entity)
{
ManagedProcessStruct process = managedProcesses_.get(entity);
if (process == null)
{
return;
}
terminate_process(process);
}
public void cosKillProcess(String entity)
{
ManagedProcessStruct process = managedProcesses_.get(entity);
if (process == null)
{
return;
}
kill_process(process);
}
public void cosResetProcessNumberOfRestart(String entity)
{
ManagedProcessStruct process = managedProcesses_.get(entity);
if (process == null)
{
return;
}
process.runtime.nStarts = 1;
}
public void cosSetProcessOperationMode(String entity, CosOperationModeEnum mode)
{
ManagedProcessStruct process = managedProcesses_.get(entity);
if (process == null || process.reference == null)
{
return;
}
set_operation_mode(process.reference, mode);
}
public void cosSetProcessParams(String entity, CosRunParamStruct[] paramSeq)
{
ManagedProcessStruct process = managedProcesses_.get(entity);
if (process == null || process.reference == null)
{
return;
}
set_runtime_params(process.reference, paramSeq);
}
public void cosSetProcessLogLevel(String entity, CosLogLevelEnum loglevel)
{
ManagedProcessStruct process = managedProcesses_.get(entity);
if (process == null)
{
return;
}
ICosManagedProcess processRef = process.reference;
//just pass it to the process
if (CorbaManager.isValidReference(processRef))
{
try
{
processRef.cosSetLogLevel(loglevel);
}
catch(Exception ex)
{
logger_.error("Can not send log level for " + entity
+ " to " + CorbaHelper.LogLevelToString(loglevel)
+ ". Exception: " + ex.getMessage());
}
} //if valid reference
}
public void cosSetProcessLogLevelDetail(String entity, String logger,
CosLogLevelEnum loglevel)
{
ManagedProcessStruct process = managedProcesses_.get(entity);
if (process == null)
{
return;
}
ICosManagedProcess processRef = process.reference;
//just pass it to the process
if (CorbaManager.isValidReference(processRef))
{
try
{
processRef.cosSetLogLevelDetail(logger, loglevel);
}
catch(Exception ex)
{
logger_.error("Can not send log level for " + logger
+ " @ " + entity + " to " + CorbaHelper.LogLevelToString(loglevel)
+ ". Exception: " + ex.getMessage());
}
} //if valid reference
}
/** -------------------------------------------------------------------------------- *
* SIMPLE NAMING LOOKUP/SERVICE INTERFACES *
* -------------------------------------------------------------------------------- **/
public void cosRegisterCorbaServer(String uniqueKey, ICosMonitoredThread monitoredThread)
throws CosFailedToRegisterException
{
//Debugging
logger_.debug("IOR: " + CorbaManager.objectToString(monitoredThread) );
//input validation
if (!poll_process(monitoredThread))
{
logger_.error("Can not poll the registering dpserver!!!");
throw new CosFailedToRegisterException();
}
else
{
logger_.debug("Registering dpserver is running. I can poll!");
}
//get the subsystem structure in the map
CorbaServerStruct namedProcess = corbaServers_.get(uniqueKey);
if (namedProcess == null)
{
//create a new structure
namedProcess = new CorbaServerStruct();
namedProcess.key = uniqueKey;
namedProcess.reference = monitoredThread;
//insert into the lookup
corbaServers_.put(uniqueKey, namedProcess);
}
else
{
//replace the old reference
namedProcess.reference = monitoredThread;
}
//update the active reference
namedProcess.activeReference = get_active_corba_server(namedProcess);
}
public void cosUnregisterCorbaServer(String uniqueKey)
{
//delete the current reference
CorbaServerStruct namedProcess = corbaServers_.get(uniqueKey);
if (namedProcess != null)
{
corbaServers_.remove(uniqueKey);
}
}
public void cosUpdateCorbaServerOperationMode(String uniqueKey,
CosOperationModeEnum operationMode)
{
//get the reference
CorbaServerStruct server = corbaServers_.get(uniqueKey);
if (server == null)
{
logger_.error("Can not find corba server " + uniqueKey);
return;
}
//update the internal structure
server.operationMode = operationMode;
//update the active reference, if necessary
if (operationMode == CosOperationModeEnum.OperControl)
{
server.activeReference = server.reference;
}
else
{
server.activeReference = get_active_corba_server(server);
}
//inform/synchronize the peer
synchronize_corba_server(server);
};
public ICosMonitoredThread cosGetCorbaServer(String uniqueKey)
throws CosUnknownProcessException
{
//get the named process
CorbaServerStruct namedProcess = corbaServers_.get(uniqueKey);
if (namedProcess == null)
{
throw new CosUnknownProcessException();
}
//return the reference
return namedProcess.reference;
}
public ICosMonitoredThread cosGetCorbaServerOperationMode(String uniqueKey,
CosOperationModeEnumHolder operationMode) throws CosUnknownProcessException
{
operationMode.value = CosOperationModeEnum.OperNotApplicable;
//get the named process
CorbaServerStruct namedProcess = corbaServers_.get(uniqueKey);
if (namedProcess == null)
{
throw new CosUnknownProcessException();
}
//return value
operationMode.value = namedProcess.operationMode;
return namedProcess.reference;
}
public ICosMonitoredThread cosGetActiveCorbaServer(String uniqueKey)
throws CosUnknownProcessException
{
//get the named process
CorbaServerStruct namedProcess = corbaServers_.get(uniqueKey);
if (namedProcess == null)
{
throw new CosUnknownProcessException();
}
//check the current reference to active process. make sure it is in CONTROL
if (CorbaManager.isValidReference(namedProcess.activeReference))
{
try
{
namedProcess.activeReference.cosPollControl();
}
catch (Exception ex)
{
logger_.error("Current active reference of process " + namedProcess.key
+ " is NOT in CONTROL. Resetting it!");
namedProcess.activeReference = null;
}
}
//if we dont have an active reference, try to get it
if (namedProcess.activeReference == null)
{
namedProcess.activeReference = get_active_corba_server(namedProcess);
}
//return the active reference
return namedProcess.activeReference;
}
/*
* HELPER CLASS
*/
private boolean initialize(String[] args)
{
state_ = CosProcessStatusEnum.StatStartup;
//parse command line arguments
if (!parse_arguments(args))
{
state_ = CosProcessStatusEnum.StatUnstarted;
return false;
}
//load configuration from properties file
props_ = load_properties(configFile_);
//set database type
DatabaseType dbType = null;
String dbTypeString = props_.getProperty("tcg.db.type", "");
if (0 == dbTypeString.compareToIgnoreCase("MYSQL"))
{
dbType = DatabaseType.MYSQL;
}
else if (0 == dbTypeString.compareToIgnoreCase("ORACLE"))
{
dbType = DatabaseType.ORACLE;
}
else
{
dbType = DatabaseType.HSQLDB;
}
//initialize database connection
String dbTnsname = props_.getProperty("tcg.db.name", "");
String dbUsername = props_.getProperty("tcg.db.user", "");
String dbPassword = props_.getProperty("tcg.db.password", "");
DatabaseManager.configure(dbType, dbTnsname, dbUsername, dbPassword);
//load the configuration from database
if (!load_configuration())
{
logger_.error("Can not load configuration from database.");
state_ = CosProcessStatusEnum.StatUnstarted;
return false;
}
//initialize corba manager. if corbaPort = 0, the port is randomly allocated
//if it is already initialized by the child class, it does nothing
if (!CorbaManager.initialize(portNo_))
{
logger_.error("Can not initialize CORBA manager.");
state_ = CosProcessStatusEnum.StatUnstarted;
return false;
}
//activate this servant.
//if corbaPort != "", the servant is created as persistent object and
// can be acessed via corbaloc address: corbaloc::<ip-addr>:<port>/<corba-name>
if (!CorbaManager.activate(this, serverKey_))
{
logger_.error("Can not activate CORBA servant.");
state_ = CosProcessStatusEnum.StatUnstarted;
return false;
}
state_ = CosProcessStatusEnum.StatStarted;
return true;
}
private void run()
{
state_ = CosProcessStatusEnum.StatGoingToControl;
logger_.info("-----------------------------------");
logger_.info("Starting control mode...");
logger_.info("-----------------------------------");
//try to connect to pre-existing process
start_managed_processes();
//start the poller thread
processPoller_.start();
//connect to configured peer. this will attempt to validate all configured peers.
//validated peers are then copied over to the list of active peers
connect_to_peer_managers();
//start the peer thread
peerSynch_.start();
logger_.info("-----------------------------------");
logger_.info("Process Manager is running CONTROL.");
logger_.info("-----------------------------------");
state_ = CosProcessStatusEnum.StatRunningControl;
//start the orb. this will block until stop() is called via cosTerminate()
CorbaManager.run();
//this the corba is unblocked that means we are terminating
state_ = CosProcessStatusEnum.StatStopped;
}
protected void stop()
{
state_ = CosProcessStatusEnum.StatTerminating;
//first, shutdown the CORBA. it will prevent any corba call that might interrupt
//our shutdown procedures.
CorbaManager.shutdown();
//stop the peer thread
peerSynch_.stop();
//stop the poller thread
processPoller_.stop();
//stop all managed processes
stop_managed_processes();
//clean up
CorbaManager.cleanup();
//other clean up
managedProcesses_.clear();
corbaServers_.clear();
activePeers_.clear();
state_ = CosProcessStatusEnum.StatStopped;
}
private boolean parse_arguments(String args[])
{
Options options = new Options();
Option arg = new Option("n", "Process Manager hostname (required)");
arg.setRequired(true);
arg.setLongOpt("name");
arg.setArgs(1);
arg.setArgName("name");
options.addOption(arg);
arg = new Option("l", "Log directory (optional. default: current directory)");
arg.setRequired(false);
arg.setLongOpt("logdir");
arg.setArgs(1);
arg.setArgName("log-dir");
options.addOption(arg);
arg = new Option("f", "Configuration file (optional. default: scada.properties)");
arg.setRequired(false);
arg.setLongOpt("config-file");
arg.setArgs(1);
arg.setArgName("file");
options.addOption(arg);
arg = new Option("cp", "Corba port (optional. default: database config)");
arg.setRequired(false);
arg.setLongOpt("corba-port");
arg.setArgs(1);
arg.setArgName("port-no");
options.addOption(arg);
arg = new Option("pn", "Peer hostname (optional. default: database config)");
arg.setRequired(false);
arg.setLongOpt("peer-name");
arg.setArgs(1);
arg.setArgName("host-name");
options.addOption(arg);
arg = new Option("pp", "Peer port number (optional. default: database config)");
arg.setRequired(false);
arg.setLongOpt("peer-port");
arg.setArgs(1);
arg.setArgName("port-no");
options.addOption(arg);
//parser
org.apache.commons.cli.Parser parser = new org.apache.commons.cli.GnuParser();
//parse command line arguments
org.apache.commons.cli.CommandLine cmd = null;
try
{
cmd = parser.parse(options, args);
}
catch(org.apache.commons.cli.ParseException pe)
{
logger_.error("Can not parse arguments: " + pe.toString());
HelpFormatter formatter = new HelpFormatter();
formatter.printHelp( "Command line parameters:", options );
return false;
}
//get the value
//process manager name (normally hostname)
name_ = cmd.getOptionValue("n");
//log directory
logdir_ = cmd.getOptionValue("l");
if (logdir_ == null || logdir_.length() == 0)
{
logdir_ = Utilities.getCurrentDir();
}
//config file
configFile_ = cmd.getOptionValue("f");
if (configFile_ == null || configFile_.length() == 0)
{
configFile_ = DEF_CONFIG_FILE;
}
//corba port
if (cmd.hasOption("cp"))
{
portNo_ = __parseInt(cmd.getOptionValue("cp"));
}
//peer host name
if (cmd.hasOption("pn"))
{
PeerManagerStruct peer = new PeerManagerStruct();
peer.name = cmd.getOptionValue("pn");
//peer port number
if (cmd.hasOption("pp"))
{
peer.portNo = __parseInt(cmd.getOptionValue("pp"));
}
if (peer.portNo == 0)
{
peer.portNo = DEFAULT_MGR_PORT.value;
}
peer.heartbeatMillis = DEF_PEER_SYNC_RATE_MSEC;
//add into the hashmap
configuredPeers_.put(peer.name, peer);
}
return true;
}
private Properties load_properties(String filename)
{
Properties pt2 = new Properties();
//Get from the system properties first
Properties pt = System.getProperties();
for(Enumeration<?> enumeration = pt.propertyNames(); enumeration.hasMoreElements();)
{
String key = (String) enumeration.nextElement();
String value = pt.getProperty(key);
if (key.startsWith("tcg.")
&& value!=null && pt2.getProperty(key)==null)
{
pt2.setProperty(key, value);
}
//special entry. orb
if (key.startsWith("org.omg.") && value!=null && pt2.getProperty(key)==null)
{
pt2.setProperty(key, value);
}
}
if (filename == null || filename.length() == 0)
{
return pt2;
}
//open the property file if given
pt = new Properties();
InputStream stream = Utilities.getInputStream(filename);
if (stream == null)
{
logger_.error("Couldn't find " + filename + " in classpath.");
}
else
{
//load the properties file
try
{
pt.load(stream);
stream.close();
}
catch(IOException ioe)
{
logger_.error("Fail to load " + filename + ". Exception: " + ioe.getMessage());
}
//override the system properties with properties from file
for(Enumeration<?> enumeration = pt.propertyNames(); enumeration.hasMoreElements();)
{
String key = (String) enumeration.nextElement();
String value = pt.getProperty(key);
if (value!=null)
{
pt2.setProperty(key, value);
}
}
} //if (stream == null) - else
return pt2;
}
private boolean load_configuration()
{
Connection conn = DatabaseManager.getConnection();
if (conn == null || !DatabaseManager.isConnected(conn))
{
logger_.warn("Can not get connection to database.");
return false;
}
//get other configuration detail
boolean status = false;
logger_.debug("Getting configuration from database...");
//build the query
String query = "select POLL_RATE, PORT_NO, MAX_RESTART,"
+ " PEER_NAME1, PEER_PORT1, SYNC_RATE1,"
+ " PEER_NAME2, PEER_PORT2, SYNC_RATE2"
+ " from " + DEF_TABLE_MANAGER
+ " where HOSTNAME='" + name_ + "'";
logger_.debug("SQL Query: " + query);
String str = "";
int errorCode = 0;
try
{
Statement stmt = conn.createStatement();
ResultSet rs = stmt.executeQuery(query);
if (!rs.next())
{
//the fep name is not found in the database
logger_.error("Process Manager " + name_ + " is not configured in the database.");
//use default
if (portNo_ == 0)
{
portNo_ = DEFAULT_MGR_PORT.value;
}
pollRateMillis_ = DEF_POLL_RATE_MSEC;
maxRestart_ = DEF_MAX_RESTART;
}
else
{
if (portNo_ == 0)
{
portNo_ = rs.getInt("PORT_NO");
}
pollRateMillis_ = rs.getInt("POLL_RATE");
maxRestart_ = rs.getInt("MAX_RESTART");
//verbose
logger_.trace("SQL Result: Poll Rate = " + pollRateMillis_);
logger_.trace("SQL Result: Manager Port = " + portNo_);
logger_.trace("SQL Result: Max. Restart = " + maxRestart_);
//first peer, if any
str = rs.getString("PEER_NAME1");
if (str != null)
{
PeerManagerStruct peer1_ = new PeerManagerStruct();
peer1_.name = str;
peer1_.portNo = rs.getInt("PEER_PORT1");
if (peer1_.portNo == 0)
{
peer1_.portNo = DEFAULT_MGR_PORT.value;
}
peer1_.heartbeatMillis = rs.getInt("SYNC_RATE1");
if (peer1_.heartbeatMillis == 0)
{
peer1_.heartbeatMillis = DEF_PEER_SYNC_RATE_MSEC;
}
//add into the hashmap
configuredPeers_.put(peer1_.name, peer1_);
}
//second peer, if any
str = rs.getString("PEER_NAME2");
if (str != null)
{
PeerManagerStruct peer2_ = new PeerManagerStruct();
peer2_.name = str;
peer2_.portNo = rs.getInt("PEER_PORT2");
if (peer2_.portNo == 0)
{
peer2_.portNo = DEFAULT_MGR_PORT.value;
}
peer2_.heartbeatMillis = rs.getInt("SYNC_RATE2");
if (peer2_.heartbeatMillis == 0)
{
peer2_.heartbeatMillis = DEF_PEER_SYNC_RATE_MSEC;
}
//add into the hashmap
configuredPeers_.put(peer2_.name, peer2_);
}
//successful
status = true;
}
stmt.close();
}
catch (SQLException sqle)
{
logger_.error("Can not get Process Manager configuration. Exception: " + sqle.toString());
errorCode = sqle.getErrorCode();
}
//check the status
if (!status)
{
DatabaseManager.returnConnection(errorCode);
return false;
}
//read the list of managed processes from database
status = false;
logger_.debug("Getting list of managed processes from database...");
//build the query
query = "select a.ENTITY, b.COMMAND_LINE, b.IN_PROCESS, b.ARGUMENTS,"
+ " a.EXTRA_ARGUMENTS, a.CONTROL_MODE, a.WEIGHTAGE, "
+ " b.START_TIMEOUT, b.TERMINATE_TIMEOUT, b.CONTROL_TIMEOUT, b.MONITOR_TIMEOUT, "
+ " a.KILL_KEYWORDS"
+ " from " + DEF_TABLE_PROCESSLIST + " a, " + DEF_TABLE_PROCESSCONFIG + " b"
+ " where a.HOSTNAME='" + name_ + "' and a.ENABLED='Y' and a.PROCESS_NAME=b.PROCESS_NAME";
logger_.trace("SQL Query: " + query);
errorCode = 0;
try
{
Statement stmt = conn.createStatement();
ResultSet rs = stmt.executeQuery(query);
while(rs.next())
{
ManagedProcessStruct process = new ManagedProcessStruct();
process.entity = rs.getString("ENTITY");
//configuration data
process.config.entity = process.entity;
process.config.commandLine = rs.getString("COMMAND_LINE");
str = rs.getString("IN_PROCESS");
if (str == null || !str.equalsIgnoreCase("Y"))
{
process.inProcess = false;
}
else
{
process.inProcess = true;
}
process.config.arguments = rs.getString("ARGUMENTS");
if (process.config.arguments == null)
{
process.config.arguments = "";
}
str = rs.getString("EXTRA_ARGUMENTS");
if (str == null)
{
str = "";
}
else if (str.length() > 0)
{
process.config.arguments += " " + str;
}
//default operation mode
str = rs.getString("CONTROL_MODE");
if (str == null)
{
process.config.operationMode = CosOperationModeEnum.OperNotApplicable;
}
else if (str.compareToIgnoreCase("Y") == 0)
{
process.config.operationMode = CosOperationModeEnum.OperControl;
}
else if (str.compareToIgnoreCase("N") == 0)
{
process.config.operationMode = CosOperationModeEnum.OperMonitor;
}
process.config.weightage = rs.getShort("WEIGHTAGE");
process.config.startTimeout = rs.getInt("START_TIMEOUT");
process.config.terminateTimeout = rs.getInt("TERMINATE_TIMEOUT");
process.config.controlTimeout = rs.getInt("CONTROL_TIMEOUT");
process.config.monitorTimeout = rs.getInt("MONITOR_TIMEOUT");
//runtime info
process.runtime.entity = process.config.entity;
process.runtime.processId = 0;
process.runtime.processType = CosProcessTypeEnum.ProcThread;
process.runtime.state = CosProcessStatusEnum.StatUnstarted;
process.runtime.requestedState = CosProcessStatusEnum.StatStarted;
process.runtime.terminationCode = CosTerminationCodeEnum.TermNotKnown;
process.runtime.startDateTime = 0;
process.runtime.lastRequestDateTime = 0;
process.runtime.nStarts = 0;
process.runtime.weightage = process.config.weightage;
process.runtime.logLevel = CosLogLevelEnum.LogNormal;
process.runtime.startTimeout = process.config.startTimeout;
process.runtime.terminateTimeout = process.config.terminateTimeout;
process.runtime.controlTimeout = process.config.controlTimeout;
process.runtime.monitorTimeout = process.config.monitorTimeout;
//add into the list
managedProcesses_.put(process.entity, process);
} //while(rs.next())
//successful, even if we somehow fail to insert the structure into the list
status = true;
//close query
stmt.close();
}
catch (SQLException sqle)
{
logger_.error("Can not get Managed Process list. Exception: " + sqle.toString() );
errorCode = sqle.getErrorCode();
}
if (!status)
{
DatabaseManager.returnConnection(errorCode);
return false;
}
//return the connection
DatabaseManager.returnConnection(errorCode);
return true;
}
private boolean poll_process(ICosMonitoredThread thread)
{
if (!CorbaManager.isValidReference(thread))
{
logger_.error("Invalid reference to monitored thread.");
return false;
}
//poll the process
for (int i=0; i<CORBA_ERROR_THRESHOLD; i++)
{
try
{
thread.cosPoll();
return true;
}
catch (Exception ex)
{
logger_.error("(" + i + ") Can not poll monitored thread! Exception: "
+ ex.toString());
}
}
return false;
}
private boolean poll_manager(ICosProcessManager manager)
{
if (!CorbaManager.isValidReference(manager))
{
logger_.error("Invalid reference to process manager.");
return false;
}
//poll the process
for (int i=0; i<CORBA_ERROR_THRESHOLD; i++)
{
try
{
manager.cosPoll();
return true;
}
catch (Exception ex)
{
logger_.error("(" + i + ") Can not poll process manager! Exception: "
+ ex.toString());
}
}
return false;
}
private void start_process(ManagedProcessStruct process)
{
logger_.info("Starting process " + process.entity + "...");
//if it is an adhoc process we will not know how to start it
if (process.adhoc)
{
logger_.warn("Adhoc process. I do not know how to start it!");
return;
}
//build log file
String logfile = "";
if (logdir_.length() > 0)
{
logfile = logdir_ + File.separator + process.entity + ".log";
}
else
{
logfile = process.entity + ".log";
}
//exec the command
if (process.inProcess)
{
String cmd = "--entity " + process.entity + " "
+ "--mgr-port " + portNo_ + " "
+ "--logfile " + logfile + " "
+ "--inprocess "
+ process.config.arguments;
//verbose
logger_.info("Execute: " + cmd);
//run in the same JVM
try
{
Class<?> cls = Class.forName(process.config.commandLine);
Object obj = cls.newInstance();
Method method = cls.getMethod("execute", new Class[] { String.class });
method.invoke(obj, new Object[] { cmd } );
//store the object instance
process.instance = obj;
}
catch(Exception ex)
{
logger_.warn("Fail to execute command. Exception: " + ex.toString());
}
}
else
{
String cmd = process.config.commandLine + " "
+ "--entity " + process.entity + " "
+ "--mgr-port " + portNo_ + " "
+ "--logfile " + logfile + " "
+ process.config.arguments;
//+ "&";
//start the execution on its own thread.
ProcessExecution exec = new ProcessExecution(cmd);
exec.start();
//store the execution context
process.instance = exec;
// //NOTE: maybe it is cleaner to just use a native call to fork out a process.
// // that way we do not have to worry about cleaning up stderr and stdout.
// try
// {
// Process p = Runtime.getRuntime ().exec (cmd);
//
// //wait until it finishes. this way it is cleaner because then we do not need to run a
// //separate thread for cleaning up the stdout and stderr.
// //but this way also forces the command to be running as background.
// //otherwise it will block forever!!!
// try
// {
// p.waitFor();
// }
// catch(InterruptedException ie)
// {
// //ignore
// }
//
// //clean up any output in stderr
// BufferedReader buffer = new BufferedReader (new InputStreamReader (p.getErrorStream ()));
// String line = "";
// while ((line = buffer.readLine ()) != null)
// {
// System.err.println ("[Stderr] " + line);
// }
// buffer.close();
//
// //clean up any output in stdout
// buffer = new BufferedReader (new InputStreamReader (p.getInputStream()));
// line = "";
// while ((line = buffer.readLine ()) != null)
// {
// System.out.println ("[Stdout] " + line);
// }
// buffer.close();
// }
// catch(IOException ioe)
// {
// logger_.warn("Fail to execute command. Exception: " + ioe.toString());
// }
}
//update runtime infos
process.runtime.state = CosProcessStatusEnum.StatStartup;
process.runtime.startDateTime = Utilities.getTimeInSecs();
process.runtime.requestedState = CosProcessStatusEnum.StatStarted;
process.runtime.lastRequestDateTime = process.runtime.startDateTime;
process.runtime.nStarts += 1;
return;
}
private void restart_process(ManagedProcessStruct process)
{
logger_.info("Restarting process " + process.entity + "...");
//kill the process
if (process.runtime.processId > 0)
{
kill_process(process);
}
//if the number of restart has exceeded max restart, disabled the process
//NOTE: this should not prevent the manual start of this process from process monitor
// via the cosStartProcess() call.
if (process.runtime.nStarts > maxRestart_)
{
logger_.warn("Process has exceeded max restart number. It will NOT be restarted!");
process.runtime.state = CosProcessStatusEnum.StatUnstarted;
process.runtime.requestedState = CosProcessStatusEnum.StatUnstarted;
return;
}
//if it is an adhoc process, no need to restart it. we won't know how to anyway
if (process.adhoc)
{
logger_.warn("Adhoc process. It will NOT be restarted!");
return;
}
//start the process
start_process(process);
}
private void kill_process(ManagedProcessStruct process)
{
logger_.info("Killing process " + process.entity + "...");
//kill the process
if (process.inProcess)
{
if (process.instance == null)
{
logger_.error("Invalid process instance for " + process.entity + ". Can not kill.");
}
//run in the same JVM
try
{
Class<?> cls = Class.forName(process.config.commandLine);
Method method = cls.getMethod("stop", new Class[] { });
method.invoke(process.instance, new Object[] { } );
}
catch(Exception ex)
{
logger_.warn("Fail to kill process. " + process.entity
+ "Exception: " + ex.toString());
}
}
else
{
if (process.runtime.processId <= 0)
{
logger_.error("Invalid process id for " + process.entity + ". Can not kill.");
}
//native call
if (0 != Utilities.killProcess(process.runtime.processId))
{
logger_.error("Fail to kill the process " + process.entity
+ ". Process id: " + process.runtime.processId);
try
{
ProcessExecution exec = (ProcessExecution) process.instance;
if (exec != null)
{
exec.stop(100);
}
}
catch (Exception ex)
{
//ignore
}
}
}
//update runtime infos
process.reference = null;
process.instance = null;
process.runtime.processId = 0;
process.runtime.state = CosProcessStatusEnum.StatStopped;
process.runtime.terminationCode = CosTerminationCodeEnum.TermKilled;
process.runtime.requestedState = CosProcessStatusEnum.StatStopped;
process.runtime.lastRequestDateTime = Utilities.getTimeInSecs();
return;
}
private void terminate_process(ManagedProcessStruct process)
{
if (!CorbaManager.isValidReference(process.reference))
{
logger_.error("Invalid reference to managed process.");
return;
}
//ask process to terminate
//if we failed to ask the process to terminate, as long as we update the runtime infos
//properly, then the process will be killed on the next polling cycle.
//because of this, we do not need to attempt to send the command several times!
try
{
process.reference.cosTerminate();
return;
}
catch (Exception ex)
{
logger_.error("Can not ask process to terminate! Exception: "
+ ex.toString());
logger_.error("The process will be killed in the next polling cycle.");
}
//update runtime info
process.runtime.state = CosProcessStatusEnum.StatTerminating;
process.runtime.requestedState = CosProcessStatusEnum.StatStopped;
process.runtime.lastRequestDateTime = Utilities.getTimeInSecs();
}
private CosProcessStatusEnum get_process_status(ICosManagedProcess process)
{
//return null if we fail to get the status from managed process
//otherwise, return the actual status of the managed process
if (!CorbaManager.isValidReference(process))
{
logger_.error("Invalid reference to managed process.");
return null;
}
CosProcessStatusEnum status = null;
//send the operation mode
for (int i=0; i<CORBA_ERROR_THRESHOLD; i++)
{
try
{
status = process.cosGetStatus();
break;
}
catch (Exception ex)
{
logger_.error("(" + i + ") Can not get process' runtime status! Exception: "
+ ex.toString());
}
}
return status;
}
private boolean set_operation_mode(ICosManagedProcess process, CosOperationModeEnum mode)
{
if (!CorbaManager.isValidReference(process))
{
logger_.error("Invalid reference to managed process.");
return false;
}
//send the operation mode
for (int i=0; i<CORBA_ERROR_THRESHOLD; i++)
{
try
{
process.cosSetOperationMode(mode);
return true;
}
catch (Exception ex)
{
logger_.error("(" + i + ") Can not set process' operation mode! Exception: "
+ ex.toString());
}
}
return false;
}
private boolean set_runtime_params(ICosManagedProcess process, CosRunParamStruct[] params)
{
if (!CorbaManager.isValidReference(process))
{
logger_.error("Invalid reference to managed process.");
return false;
}
//send the runtime params
for (int i=0; i<CORBA_ERROR_THRESHOLD; i++)
{
try
{
process.cosSetParams(params);
return true;
}
catch (Exception ex)
{
logger_.error("(" + i + ") Can not set process' runtime parameters! Exception: "
+ ex.toString());
}
}
return false;
}
private boolean register_with_peer(ICosProcessManager peer)
{
if (!CorbaManager.isValidReference(peer))
{
logger_.error("Invalid reference to peer manager.");
return false;
}
//register with peer
for (int i=0; i<CORBA_ERROR_THRESHOLD; i++)
{
try
{
peer.cosRegisterPeer(name_, this._this());
return true;
}
catch (Exception ex)
{
logger_.error("(" + i + ") Can not register with peer! Exception: "
+ ex.toString());
}
}
return false;
}
private boolean synchronize_managed_process(ICosProcessManager peer,
ManagedProcessStruct process)
{
if (!CorbaManager.isValidReference(peer))
{
logger_.error("Invalid reference to peer manager.");
return false;
}
//synchronize the process operation mode to peer process manager
for (int i=0; i<CORBA_ERROR_THRESHOLD; i++)
{
try
{
peer.cosSynchronizeManagedProcess(name_, process.entity, process.runtime.weightage,
process.runtime.state);
return true;
}
catch (Exception ex)
{
logger_.error("(" + i + ") Can not synchronize with peer manager! Exception: "
+ ex.toString());
}
}
return false;
}
private void synchronize_managed_process(ManagedProcessStruct process)
{
Iterator<PeerManagerStruct> it = activePeers_.values().iterator();
while(it.hasNext())
{
PeerManagerStruct peer = it.next();
//make sure the reference is valied
if (peer == null || peer.reference == null)
{
continue;
}
logger_.info("Synchronizing managed process " + process.entity + " with peer " + peer.name);
//synchronize the process operation mode to peer process manager
if (!synchronize_managed_process(peer.reference, process))
{
logger_.warn("Fail to synchronize managed process " + process.entity + " with peer " + peer.name);
peer.errorCounter++;
}
else
{
peer.errorCounter = 0;
}
}
}
// private boolean synchronize_corba_server(ICosProcessManager peer,
// CorbaServerStruct process)
// {
// if (!CorbaManager.isValidReference(peer))
// {
// logger_.error("Invalid reference to peer manager.");
// return false;
// }
//
// //synchronize the process operation mode to peer process manager
// for (int i=0; i<CORBA_ERROR_THRESHOLD; i++)
// {
// try
// {
// peer.cosSynchronizeCorbaServer(name_, process.key, process.operationMode);
// return true;
// }
// catch (Exception ex)
// {
// logger_.error("(" + i + ") Can not synchronize with peer manager! Exception: "
// + ex.toString());
// }
// }
// return false;
// }
private void synchronize_corba_server(CorbaServerStruct server)
{
Iterator<PeerManagerStruct> it = activePeers_.values().iterator();
boolean status = false;
while(it.hasNext())
{
PeerManagerStruct peer = it.next();
//make sure the reference is valied
if (peer == null || peer.reference == null)
{
continue;
}
logger_.info("Synchronizing corba server " + server.key + " with peer " + peer.name);
//synchronize the process operation mode to peer process manager
status = false;
for (int i=0; i<CORBA_ERROR_THRESHOLD && !status; i++)
{
try
{
peer.reference.cosSynchronizeCorbaServer(name_, server.key, server.operationMode);
status = true;
break;
}
catch (Exception ex)
{
logger_.error("(" + i + ") Can not synchronize corba server with peer manager!"
+ " Exception: " + ex.toString());
}
}
//update error counter
if (!status)
{
logger_.info("Fail to synchronize corba server " + server.key + " with peer " + peer.name);
peer.errorCounter++;
}
else
{
peer.errorCounter = 0;
}
}
}
private ICosManagedProcess get_active_managed_process(ManagedProcessStruct process)
{
//try to use local reference if the local corba server is in CONTROL
ICosManagedProcess processRef = null;
if (process.runtime.state == CosProcessStatusEnum.StatRunningControl
&& process.reference != null)
{
processRef = process.reference;
//validate it
try
{
processRef.cosPollControl();
}
catch (Exception ex)
{
//ignore. just reset the reference
logger_.trace("Can not poll control local managed process.");
processRef = null;
}
}
//if the local process is not suitable, try to get from peers. always use the first one found
if (processRef == null)
{
Iterator<PeerManagerStruct> it = activePeers_.values().iterator();
while(it.hasNext() && processRef == null)
{
//precautions
if (it.next().reference == null)
continue;
//get from peer and validate it
try
{
processRef = it.next().reference.cosGetManagedProcess2(process.entity);
if (processRef != null)
{
processRef.cosPollControl();
}
it.next().errorCounter = 0;
}
catch(CosUnknownProcessException ue)
{
processRef = null;
it.next().errorCounter = 0;
}
catch(Exception ex)
{
//ignore. just reset the reference for precaution
processRef = null;
it.next().errorCounter++;
}
}
}
return processRef;
}
private ICosMonitoredThread get_active_corba_server(CorbaServerStruct server)
{
//try to use local reference if the local corba server is in CONTROL
ICosMonitoredThread serverRef = null;
if (server.operationMode == CosOperationModeEnum.OperControl
&& server.reference != null)
{
serverRef = server.reference;
//validate it
try
{
serverRef.cosPollControl();
}
catch (Exception ex)
{
//ignore. just reset the reference
logger_.trace("Can not poll control local corba server.");
serverRef = null;
}
}
//if the local server is not suitable, try to get from peers. always use the first one found
if (serverRef == null)
{
Iterator<PeerManagerStruct> it = activePeers_.values().iterator();
while(it.hasNext() && serverRef == null)
{
//precautions
if (it.next().reference == null)
continue;
//get from peer and validate it
try
{
serverRef = it.next().reference.cosGetCorbaServer(server.key);
if (serverRef != null)
{
serverRef.cosPollControl();
}
it.next().errorCounter = 0;
}
catch(CosUnknownProcessException ue)
{
serverRef = null;
it.next().errorCounter = 0;
}
catch(Exception ex)
{
//ignore. just reset the reference for precaution
serverRef = null;
it.next().errorCounter++;
}
}
}
return serverRef;
}
private void on_managed_process_registration(ManagedProcessStruct process)
{
//create a new thread to do the delayed operation
OnProcessRegistrationThread runnable = new OnProcessRegistrationThread(process);
Thread thread = new Thread(runnable);
//run the thread
thread.start();
}
private void kill_zombie_process(ManagedProcessStruct process)
{
logger_.info("Killing zombie process for process " + process.entity + "...");
//adhoc process, I won't know how to kill the zombies
if (process.adhoc)
{
logger_.warn("Adhoc process. I do not know how to kill the zombies!");
return;
}
//not started?
if (process.runtime.processId == 0 || process.reference == null)
{
return;
}
//build keywords to kill
if (process.config.killKeywords.length() == 0)
{
process.config.killKeywords = process.config.commandLine + " "
+ "--entity " + process.entity +" "
+ "--mgr-port " + portNo_;
}
//build the command
String cmd = "zombie_killer " + process.runtime.processId + " "
+ "\"" + process.config.killKeywords + "\" &";
//verbose
logger_.info("Execute: " + cmd);
//exec the command
//NOTE: maybe it is cleaner to just use a native call to fork out a process.
// that way we do not have to worry about cleaning up stderr and stdout.
try
{
Process p = Runtime.getRuntime ().exec (cmd);
//wait until it finishes. this way it is cleaner because then we do not to run a
//separate thread for cleaning up the stdout and stderr.
//but this way also forces the command to be running as background.
//otherwise it will block forever!!!
try
{
p.waitFor();
}
catch(InterruptedException ie)
{
//ignore
}
//clean up any output in stderr
BufferedReader buffer = new BufferedReader (new InputStreamReader (p.getErrorStream ()));
String line = "";
while ((line = buffer.readLine ()) != null)
{
System.err.println ("[Stderr] " + line);
}
buffer.close();
//clean up any output in stdout
buffer = new BufferedReader (new InputStreamReader (p.getInputStream()));
line = "";
while ((line = buffer.readLine ()) != null)
{
System.out.println ("[Stdout] " + line);
}
buffer.close();
}
catch(IOException ioe)
{
logger_.warn("Fail to execute command. Exception: " + ioe.toString());
}
}
private void start_managed_processes()
{
//no need to synchronize with peer. the startup procedure will do it for us
logger_.info("Reconnecting with running managed processes (if any)...");
//prepare params for zombie processes
CosRunParamStruct[] params = new CosRunParamStruct[1];
//pass in the process manager port
params[0] = new CosRunParamStruct();
params[0].name = MANAGER_PORT_KEY.value;
params[0].value = Integer.toString(portNo_);
ManagedProcessStruct process = null;
String iorString = "";
org.omg.CORBA.Object obj = null;
ICosManagedProcess processRef = null;
long processId = 0;
CosOperationModeEnum operMode = null;
//try connecting to existing/zombie processes
Iterator<ManagedProcessStruct> itProcess = managedProcesses_.values().iterator();
while (itProcess.hasNext())
{
process = itProcess.next();
//get the ior string
iorString = CorbaManager.readIor(process.entity);
if (iorString.length() == 0)
{
continue;
}
//build the managed process reference
obj = CorbaManager.stringToObject(iorString);
if (obj != null)
{
processRef = ICosManagedProcessHelper.narrow(obj);
if (processRef == null)
{
logger_.warn("Invalid IOR to process " + process.entity + ". IOR String: "
+ iorString);
continue;
}
} //if (obj != null)
//critical section
synchronized(managedProcesses_)
{
//reconnect to the zombie process
processId = 0;
for (int i=0; i<CORBA_ERROR_THRESHOLD; i++)
{
try
{
//notify process about our new port
processRef.cosSetParams(params);
//get the process id
processId = processRef.cosGetProcessId();
//get operation mode
operMode = processRef.cosGetOperationMode();
//notify managed process of a restart
//ideally, the managed process should then re-register to the process manager
//TODO
break;
}
catch (Exception ex)
{
logger_.warn("Cannot connect to managed process: " + process.entity);
//TODO: maybe we should for awhile here to accommodate in case the machine is too loaded.
continue;
}
} //for (int i=0; i<CORBA_ERROR_THRESHOLD; i++)
if (processId == 0)
{
continue;
}
//update the runtime value
process.runtime.processId = processId;
process.runtime.processType = CosProcessTypeEnum.ProcThread;
process.runtime.state = CosProcessStatusEnum.StatStartup;
process.runtime.startDateTime = Utilities.getTimeInSecs();
process.runtime.requestedState = CosProcessStatusEnum.StatStarted;
process.runtime.lastRequestDateTime = process.runtime.startDateTime;
//store the reference. not necessary to since it will eventually re-register. hopefully.
//but can be used to terminate the process
process.reference = processRef;
//initialize the active reference
if (operMode == CosOperationModeEnum.OperControl)
{
process.activeReference = processRef;
}
} //end of critical section
} //while (itProcess.hasNext())
}
private void connect_to_peer_managers()
{
//try to connect to every configured peers
Iterator<PeerManagerStruct> itPeer = configuredPeers_.values().iterator();
while (itPeer.hasNext())
{
PeerManagerStruct peer = itPeer.next();
//critical section
synchronized(activePeers_)
{
logger_.info("Establishing peering with peer manager " + peer.name);
//add context
NDC.push(peer.name);
//always make sure that by default every peer is not-active
peer.isActive = false;
//build the peer reference
peer.ior = "corbaloc::" + peer.name + ":" + peer.portNo + "/ProcessManager";
org.omg.CORBA.Object obj = CorbaManager.stringToObject(peer.ior);
try
{
peer.reference = ICosProcessManagerHelper.narrow(obj);
}
catch(Exception ex)
{
logger_.warn("Can not connect to peer. Corbaloc: " + peer.ior);
peer.reference = null;
}
//make sure we have valid reference
if (!CorbaManager.isValidReference(peer.reference))
{
continue;
}
//poll the peer
if (poll_manager(peer.reference))
{
logger_.info("Can not poll peer. Considered it as not-active");
continue;
}
//if we successfully poll a peer, that means the peer is back alive.
//we should register with the peer and re-establish the peering
if (!register_with_peer(peer.reference))
{
logger_.warn("Can not register with peer. Considered it as not-active");
continue;
}
//reset the error counter so that we can start using it
peer.errorCounter = 0;
//synchronized every managed process
Iterator<ManagedProcessStruct> itProcess = managedProcesses_.values().iterator();
while (itProcess.hasNext())
{
ManagedProcessStruct process = itProcess.next();
if (process == null || process.runtime.processId == 0)
{
continue;
}
if (!synchronize_managed_process(peer.reference, process))
{
logger_.warn("Fail to synchronize process " + process.entity);
peer.errorCounter++;
}
}
//check if error threshold has been reached
if (peer.errorCounter >= DEF_ERROR_THRESHOLD)
{
logger_.error("Error threshold has been reached. Fail to establish peering." );
continue;
}
else if (peer.errorCounter > 0)
{
logger_.warn("Fail to synchronize some processes! But since error threshold is not reached, "
+ "we consider it to be acceptable.");
logger_.warn("I sincerely hope that the best-case scenarios will always prevail. :)");
}
//if we get here. that means everything's well. move the peer to active peer list
logger_.info("Everything is OK (or nearly OK). Peering is established.");
peer.isActive = true;
activePeers_.put(peer.name, peer);
NDC.pop();
} //end of critical section
} //for each configured peer
}
private void stop_managed_processes()
{
//no need to synchronize with peer. the startup procedure will do it for us
logger_.info("Stopping running managed processes (if any)...");
//kill all running processes
Iterator<ManagedProcessStruct> itProcess = managedProcesses_.values().iterator();
while (itProcess.hasNext())
{
ManagedProcessStruct process = itProcess.next();
if (process == null)
{
continue;
}
//kill the process
kill_process(process);
} //while (itProcess.hasNext())
}
private CosOperationModeEnum get_expected_operation_mode(ManagedProcessStruct process)
{
//if we do not have peers, always try to go to CONTROL
if (activePeers_.size() == 0)
{
logger_.debug("No peers. Always try to go to CONTROL.");
return CosOperationModeEnum.OperControl;
}
//our preference is to go to CONTROL
boolean canGoToControl = true;
CosProcessRuntimeDataStruct peerProcess = null;
CosOperationModeEnum peerOperMode = null;
//otherwise, check with all peers
Iterator<PeerManagerStruct> it = activePeers_.values().iterator();
while(it.hasNext() && canGoToControl)
{
//precautions
if (it.next().reference == null)
continue;
//get from peer and validate it
try
{
peerProcess = it.next().reference.cosGetProcessInfo2(process.entity);
it.next().errorCounter = 0;
}
catch(CosUnknownProcessException ue)
{
peerProcess = null;
it.next().errorCounter = 0;
continue;
}
catch(Exception ex)
{
//ignore. just reset the reference for precaution
peerProcess = null;
it.next().errorCounter++;
continue;
}
//check peer operation mode
peerOperMode = translate_process_status(peerProcess.state);
//compare with our weightage
if (peerOperMode == CosOperationModeEnum.OperControl &&
(peerProcess.weightage >= process.runtime.weightage))
{
//one of the peer has higher weightage. do not go to control
canGoToControl = false;
break;
}
}
//can we go to control?
if (canGoToControl)
{
logger_.debug("Has peers. Can go to CONTROL.");
return CosOperationModeEnum.OperControl;
}
else
{
logger_.debug("Has peers. DO NOT go to CONTROL.");
return CosOperationModeEnum.OperMonitor;
}
}
private CosRunParamStruct[] get_process_runtime_params(ManagedProcessStruct process)
{
ArrayList<CosRunParamStruct> params = new ArrayList<CosRunParamStruct>();
CosRunParamStruct param = null;
//pass in anything in the runtime properties.
for(Enumeration<?> enumeration = runtimeProps_.propertyNames();
enumeration.hasMoreElements();)
{
param = new CosRunParamStruct();
param.name = (String) enumeration.nextElement();
param.value = runtimeProps_.getProperty(param.name);
//add into the list
params.add(param);
}
//log file. this should have been passed in as command line argument!
String logfile = "";
if (logdir_.length() > 0)
{
logfile = logdir_ + "/" + process.entity + ".log";
} else {
logfile = process.entity + ".log";
}
param = new CosRunParamStruct();
param.name = LOG_FILE_KEY.value;
param.value = logfile;
//add into the list
params.add(param);
//log level. should use cosSetLogLevel() directly
param = new CosRunParamStruct();
param.name = LOG_LEVEL_KEY.value;
param.value = CorbaHelper.LogLevelToString(process.runtime.logLevel);
//add into the list
params.add(param);
CosRunParamStruct[] retval = new CosRunParamStruct[params.size()];
return (CosRunParamStruct[]) params.toArray(retval);
}
private CosOperationModeEnum translate_process_status(CosProcessStatusEnum status)
{
switch(status.value())
{
case CosProcessStatusEnum._StatGoingToControl:
case CosProcessStatusEnum._StatRunningControl:
return CosOperationModeEnum.OperControl;
case CosProcessStatusEnum._StatGoingToMonitor:
case CosProcessStatusEnum._StatRunningMonitor:
return CosOperationModeEnum.OperMonitor;
default:
return CosOperationModeEnum.OperNotApplicable;
}
}
private boolean is_running_status(CosProcessStatusEnum status)
{
switch(status.value())
{
case CosProcessStatusEnum._StatGoingToControl:
case CosProcessStatusEnum._StatRunningControl:
case CosProcessStatusEnum._StatGoingToMonitor:
case CosProcessStatusEnum._StatRunningMonitor:
return true;
default:
return false;
}
}
// public String object_to_string(org.omg.CORBA.Object obj)
// {
// return CorbaManager.objectToString(obj);
// }
//
// public org.omg.CORBA.Object string_to_object(String ior)
// {
// return CorbaManager.stringToObject(ior);
// }
//
// public boolean is_valid(org.omg.CORBA.Object obj)
// {
// return CorbaManager.isValidReference(obj);
// }
protected int __parseInt(String text)
{
int retval = 0;
try {
retval = Integer.parseInt(text);
} catch (NumberFormatException ne) {
logger_.trace("Exception: " + ne.getMessage());
}
return retval;
}
/** -------------------------------------------------------------------------------- *
* WORKER THREADS *
* -------------------------------------------------------------------------------- **/
class ProcessPollerThread implements Runnable
{
private boolean keepRunning_ = false;
private Thread thread_ = null;
public void start()
{
//avoid running more than once
if (thread_ != null && keepRunning_ && thread_.isAlive())
{
return;
}
//create a new tcp server
thread_ = new Thread(this);
//finally, start the tcpserver thread
thread_.start();
}
public void stop()
{
if (thread_ == null)
{
return;
}
//stop the polling thread
keepRunning_ = false;
try
{
thread_.interrupt();
thread_.join(DEF_THREAD_WAIT_MSEC);
}
catch (InterruptedException ie)
{
//ignore
}
thread_ = null;
}
public void run()
{
//must be initialized first and all process must be started
if (state_ != CosProcessStatusEnum.StatGoingToControl
&& state_ != CosProcessStatusEnum.StatRunningControl)
{
return;
}
//time value. used to calculate how long it takes to poll all processes
long polltime, difftime;
//ProcessMap_t::iterator iterMap;
//long timeout;
long elapsetime;
int nActiveProcesses;
ArrayList<String> toDeleteList = new ArrayList<String>();
Iterator<ManagedProcessStruct> itProcess = null;
Iterator<CorbaServerStruct> itServer = null;
int counter = 0;
//CosOperationModeEnum operMode;
CosProcessStatusEnum processStatus;
logger_.debug("Going into main loop...");
NDC.push("ProcessPoller");
keepRunning_ = true;
while(keepRunning_)
{ //infinite loop
//start time
polltime = Utilities.getTimeInSecs();
//verbose. print out number of datapoint servers
logger_.debug("managedProcesses.size(): " + managedProcesses_.size());
//checking every process in the runtime process map
itProcess = managedProcesses_.values().iterator();
//clear list of tobe deleted process. just in case
toDeleteList.clear();
nActiveProcesses = 0;
while (itProcess.hasNext())
{
//if process manager is terminating, stop polling the processes
if (state_ == CosProcessStatusEnum.StatTerminating
|| state_ == CosProcessStatusEnum.StatStopped)
{
break;
}
//if loop is terminated, break out
if (!keepRunning_) break;
//critical section. mostly to keep everything consistent
ManagedProcessStruct process = itProcess.next();
synchronized(managedProcesses_)
{
logger_.trace("Checking managed process '" + process.entity + "\". "
+ "State: " + CorbaHelper.ProcessStateToString(process.runtime.state));
//add logger config
NDC.push(process.entity);
switch (process.runtime.state.value())
{
//not started yet
case CosProcessStatusEnum._StatNotRunning:
//ignore this process. process is disabled
break;
case CosProcessStatusEnum._StatUnstarted:
//if we are terminating, don't start any new process
if (state_ == CosProcessStatusEnum.StatTerminating
|| state_ == CosProcessStatusEnum.StatStopped)
{
break;
}
//normal case: see if we are supposed to start it
if (!process.adhoc &&
process.config.operationMode != CosOperationModeEnum.OperNotApplicable)
{
//start the proces
start_process(process);
}
break;
//started but not register yet
case CosProcessStatusEnum._StatStartup:
//consider it as active process
nActiveProcesses++;
//check if timeout has elapsed. if it has, restart the process
elapsetime = Utilities.getTimeInSecs() - process.runtime.lastRequestDateTime;
if (elapsetime > process.runtime.startTimeout)
{
logger_.warn("Timeout in startup (" + elapsetime + " sec). It will be restarted!");
//restart the process if necessary
restart_process(process);
}
break;
//have registered
case CosProcessStatusEnum._StatStarted:
//consider it as active process
nActiveProcesses++;
//check if I still have the reference to the process
if (process.reference == null || !poll_process(process.reference))
{
//I have lost it, restart it
restart_process(process);
break;
}
//set run time params
//it will be handled by the onManagedProcessRegistration()
break;
//is in transition to control state
case CosProcessStatusEnum._StatGoingToControl:
//consider it as active process
nActiveProcesses++;
//check if I still have the reference to the process
if (process.reference == null || !poll_process(process.reference))
{
//I have lost it, restart it
restart_process(process);
break;
}
//check if timeout has elapsed. if it has, restart the process
elapsetime = Utilities.getTimeInSecs() - process.runtime.lastRequestDateTime;
if (elapsetime > process.runtime.controlTimeout)
{
logger_.warn("Timeout in going to control (" + elapsetime + " sec). It will be restarted.");
//restart the process if necessary
restart_process(process);
}
break;
//is in transition to monitor state
case CosProcessStatusEnum._StatGoingToMonitor:
//consider it as active process
nActiveProcesses++;
//check if I still have the reference to the process
if (process.reference == null || !poll_process(process.reference))
{
//I have lost it, restart it
restart_process(process);
break;
}
//check if timeout has elapsed. if it has, restart the process
elapsetime = Utilities.getTimeInSecs() - process.runtime.lastRequestDateTime;
if (elapsetime > process.runtime.monitorTimeout)
{
logger_.warn("Timeout in going to monitor (" + elapsetime + " sec). It will be restarted.");
//restart the process if necessary
restart_process(process);
}
break;
/**
* Note:
* We only synchronized the stable state of managed process (with our internal structure)
* The transition state is only used internally by both the process manager and managed process
* That is:
* - As soon as process manager send cosSetOperationMode(), it assumes (and sets the internal state)
* that the managed process is in transition mode.
* - But the managed process may not transition right way for example because of pending reconfiguration
* - In this case, the managed process internal state would still be the previous state (say StatStarted)
* eventhough the process manager's process structure has already changed to a transition state.
* Thus, it is not desirable to synchronize the state of managed process with the process manager
* internal process structure when it is in transition state.
**/
//is in the middle of terminating
case CosProcessStatusEnum._StatTerminating:
//consider it as active process
nActiveProcesses++;
//check if I still have the reference to the process
if (process.reference == null || !poll_process(process.reference))
{
//I have lost it, restart it
restart_process(process);
break;
}
//check if timeout has elapsed. if it has, restart the process
elapsetime = Utilities.getTimeInSecs() - process.runtime.lastRequestDateTime;
if (elapsetime > process.runtime.terminateTimeout)
{
logger_.warn("Timeout in temination (" + elapsetime + " sec). It will be killed.");
//just kill it
kill_process(process);
//inform peers
synchronize_managed_process(process);
}
break;
//have already teminated.
case CosProcessStatusEnum._StatStopped:
//if we are terminating, don't restart any new process
if (state_ == CosProcessStatusEnum.StatTerminating
|| state_ == CosProcessStatusEnum.StatStopped)
{
break;
}
//if it is an adhoc process, schedule a deletion
if (process.adhoc)
{
toDeleteList.add(process.entity);
break;
}
//restart the process if the termination is not from process manager
if (process.runtime.requestedState != CosProcessStatusEnum.StatStopped)
{
start_process(process);
}
//just in case we miss it
process.reference = null;
break;
//stable state: either in control or in monitor
default:
//consider it as active process
nActiveProcesses++;
//check if I still have the reference to the process
if (process.reference == null)
{
//I have lost it, restart it
restart_process(process);
break;
}
//when it comes time, we will attempt to resync the status data
//TODO: maybe should use timestamp for each managed process for more proper
// implementation of status synch interval/rate
if (counter == 0)
{
//get the actual status from managed process
processStatus = get_process_status(process.reference);
if (processStatus == null)
{
logger_.warn("Cannot get status. Restarting it.");
restart_process(process);
}
else if (processStatus != process.runtime.state)
{
//update the runtime state. this hopefully will trigger the necessary actions
process.runtime.state = processStatus;
//only reset the requested state if it is a stable state.
//this usually means the managed process has choose to disobey the process manager
// and run its own operation mode based on its own criteria/judgement
if (processStatus == CosProcessStatusEnum.StatRunningControl
|| processStatus == CosProcessStatusEnum.StatRunningMonitor)
{
process.runtime.requestedState = processStatus;
}
}
//force re-synch with all peers
synchronize_managed_process(process);
}
else
{
//other occasion, just try to poll/ping
if (!poll_process(process.reference))
{
//failed to get heartbeat signal from process
logger_.warn("Cannot poll managed process. It will be restarted.");
//restart the process
restart_process(process);
}
}
break;
}
//remove logger context
NDC.pop();
} //end of critical section
//increase the counter
counter++;
//reset the counter if it reaches the interval number
if (counter >= pollSyncInterval_) {
counter = 0;
}
} //for each iterRuntime
//if terminating, just stopped
if (state_ == CosProcessStatusEnum.StatTerminating
|| state_ == CosProcessStatusEnum.StatStopped)
{
keepRunning_ = false;
break;
}
//delete terminated ad-hoc process. critical section for consistency
if (toDeleteList.size() > 0)
{
synchronized(managedProcesses_)
{
//if there is process in the to-be-deleted list, delete it
for (int i=0; i<toDeleteList.size(); i++)
{
managedProcesses_.remove(toDeleteList.get(i));
}
toDeleteList.clear();
} //end clean up section
}
//poll all corba servers
//verbose. print out number of datapoint servers
logger_.debug("corbaServers_.size(): " + corbaServers_.size());
//checking every process in the runtime process map
itServer = corbaServers_.values().iterator();
//clear list of tobe deleted process. just in case
toDeleteList.clear();
while (itServer.hasNext())
{
//if process manager is terminating, stop polling the processes
if (state_ == CosProcessStatusEnum.StatTerminating
|| state_ == CosProcessStatusEnum.StatStopped)
{
break;
}
//if loop is terminated, break out
if (!keepRunning_) break;
//critical section. mostly to keep everything consistent
CorbaServerStruct server = itServer.next();
synchronized(corbaServers_)
{
logger_.trace("Checking corba server '" + server.key + "\". Operation Mode: "
+ CorbaHelper.OperationModeToString(server.operationMode));
//add logger config
NDC.push(server.key);
if (!poll_process(server.reference))
{
logger_.warn("Cannot poll corba server. It will be deleted from the map.");
toDeleteList.add(server.key);
}
NDC.pop();
} //end of critical section
}
//delete terminated corba server. critical section for consistency
if (toDeleteList.size() > 0)
{
synchronized(corbaServers_)
{
//if there is process in the to-be-deleted list, delete it
for (int i=0; i<toDeleteList.size(); i++)
{
corbaServers_.remove(toDeleteList.get(i));
}
toDeleteList.clear();
} //end clean up section
}
//calculate elapse time for the last processing
difftime = polltime + pollRateMillis_ - Utilities.getTimeInSecs();
//sleep between polling
if (keepRunning_ && difftime > 0)
{
try
{
Thread.sleep(difftime);
}
catch(InterruptedException ie)
{
//ignore
}
}
//NOTE:
//maybe should make each iteration of iterRuntime in each own thread.
//this way, pollRate should then be guaranteed.
//but must be aware of possible sending conflicting message (between threads)
// to managed processes
}
//stopped
keepRunning_ = false;
logger_.info("Main poller loop has exited...");
//kill all managed processes. don't bother with gracious kill
//NOTE: moved to stop()
NDC.pop();
}
};
class PeerSynchThread implements Runnable
{
private boolean keepRunning_ = false;
private Thread thread_ = null;
public void start()
{
//avoid running more than once
if (thread_ != null && keepRunning_ && thread_.isAlive())
{
return;
}
//create a new tcp server
thread_ = new Thread(this);
//finally, start the tcpserver thread
thread_.start();
}
public void stop()
{
if (thread_ == null)
{
return;
}
//stop the listen thread
keepRunning_ = false;
try
{
thread_.interrupt();
thread_.join(DEF_THREAD_WAIT_MSEC);
}
catch (InterruptedException ie)
{
//ignore
}
thread_ = null;
}
public void run()
{
//must be initialized first and all process must be started
if (state_ != CosProcessStatusEnum.StatStarted)
return;
logger_.info("Going into peer synch loop...");
NDC.push("PeerSynch");
long curtime, difftime;
ArrayList<String> toDeleteList = new ArrayList<String>();
Iterator<PeerManagerStruct> itPeer = null;
keepRunning_ = true;
while (keepRunning_)
{
curtime = Utilities.getTimeInSecs();
//if process manager is terminating, or no peers is active, quit from the loop
if (activePeers_.size() == 0
|| state_ == CosProcessStatusEnum.StatTerminating
|| state_ == CosProcessStatusEnum.StatStopped)
{
break;
}
//check the active peers one by one
itPeer = activePeers_.values().iterator();
while (itPeer.hasNext())
{
PeerManagerStruct peer = itPeer.next();
//check if it is time to poll this peer
if (peer.nextPollingMillis > curtime)
{
continue;
}
peer.nextPollingMillis = curtime + peer.heartbeatMillis;
//critical section
synchronized(activePeers_)
{
//add context
NDC.push(peer.name);
//poll the peer
if (poll_manager(peer.reference))
{
peer.errorCounter = 0;
}
else
{
peer.errorCounter++;
}
//check if error threshold has been reached
if (peer.errorCounter >= DEF_ERROR_THRESHOLD)
{
logger_.error("Error threshold has been reached. It will be deleted from list of active peers." );
//maximun retries. maybe peer has died
toDeleteList.add(peer.name);
}
NDC.pop();
} //end of critical section
} //for each active peer
//NOTE: not sure the concept of active peer will work in our case
// what about if network split happens for a very short while.
// both process manager will think that the other party has shutdown,
// and since there is no persistent attempt for reconnection when the
// network recover the peering will not recover!
//NOTE: we will always try to poll all pre-configured peers even if it is persistently
// down. this way, if the problem is temporary the peering will be re-established
// as soon as the problem is mitigated.
//check the configured peer one by one. configured peer represent non-active peer.
//this way, if there is a temporary network problem, the peering is reconnected
// as soon as the problem is fixed
itPeer = configuredPeers_.values().iterator();
while (itPeer.hasNext())
{
PeerManagerStruct peer = itPeer.next();
//if it is currently active, it has been handled by the previous segment
if (peer.isActive)
{
continue;
}
//check if it is time to poll this peer
if (peer.nextPollingMillis > curtime)
{
continue;
}
peer.nextPollingMillis = curtime + peer.heartbeatMillis;
//critical section
synchronized(activePeers_)
{
//add context
NDC.push(peer.name);
//build the peer reference if it is not build yet
if (peer.reference == null)
{
if (peer.ior.length() == 0)
{
peer.ior = "corbaloc::" + peer.name + ":" + peer.portNo + "/ProcessManager";
}
org.omg.CORBA.Object obj = CorbaManager.stringToObject(peer.ior);
peer.reference = ICosProcessManagerHelper.narrow(obj);
}
//make sure we have valid reference
if (!CorbaManager.isValidReference(peer.reference))
{
continue;
}
//poll the peer
//do not use the helper function poll_manager() because it will attempt
//ERROR_THRESHOLD times. always assume that any problem is persistent.
try
{
peer.reference.cosPoll();
}
catch(Exception ex)
{
//ignore
logger_.trace("Can not poll configured peer. Exception: "
+ ex.toString());
continue;
}
//if we successfully poll a peer, that means the peer is back alive.
//we should register with the peer and re-establish the peering
logger_.info("Peer " + peer.name + " is back ALIVE. Re-establishing peering...");
if (!register_with_peer(peer.reference))
{
logger_.warn("Can not register with peer.");
continue;
}
//reset the error counter so that we can start using it
peer.errorCounter = 0;
//synchronized every managed process
Iterator<ManagedProcessStruct> itProcess = managedProcesses_.values().iterator();
while (itProcess.hasNext())
{
ManagedProcessStruct process = itProcess.next();
if (process == null || process.runtime.processId == 0)
{
continue;
}
if (!synchronize_managed_process(peer.reference, process))
{
logger_.warn("Fail to synchronize process " + process.entity);
peer.errorCounter++;
}
}
//check if error threshold has been reached
if (peer.errorCounter >= DEF_ERROR_THRESHOLD)
{
logger_.error("Error threshold has been reached. Fail to re-establish peering." );
continue;
}
else if (peer.errorCounter > 0)
{
logger_.warn("Fail to synchronize some processes! But since error threshold is not reached, "
+ "we consider it to be acceptable.");
logger_.warn("I sincerely hope that the best-case scenarios will always prevail. :)");
}
//if we get here. that means everything's well. move the peer to active peer list
logger_.info("Everything is OK (or nearly OK). Peering is re-established.");
peer.isActive = true;
activePeers_.put(peer.name, peer);
NDC.pop();
} //end of critical section
} //for each configured peer
//if there is peer in the to-be-deleted list, delete it
if (toDeleteList.size() > 0)
{
synchronized(activePeers_)
{
//if there is peer manager in the to-be-deleted list, delete it
PeerManagerStruct peer = null;
for (int i=0; i<toDeleteList.size(); i++)
{
peer = activePeers_.remove(toDeleteList.get(i));
//if it is not an adhoc peer, put back to configured peer
if (peer != null && !peer.adhoc)
{
peer.isActive = false;
}
}
toDeleteList.clear();
} //end clean up section
}
//calculate elapse time for the last processing
//NOTE: there is no specific precision configuration to control how often the loop
// is run. thus, we just use the same parameter as the process poller
difftime = curtime + pollRateMillis_ - Utilities.getTimeInSecs();
//sleep between polling
if (keepRunning_ && difftime > 0)
{
try
{
Thread.sleep(difftime);
}
catch(InterruptedException ie)
{
//ignore
}
}
}
//reset. just in case
keepRunning_ = false;
logger_.info("Peer synch loop has exited...");
//start all managed process to control (only when we are not terminating)
logger_.info("Switching all managed processes to CONTROL...");
if (state_ != CosProcessStatusEnum.StatTerminating
&& state_ != CosProcessStatusEnum.StatStopped)
{
Iterator<ManagedProcessStruct> itProcess = managedProcesses_.values().iterator();
while(itProcess.hasNext())
{
ManagedProcessStruct process = itProcess.next();
if (process == null || process.reference == null
|| process.runtime.processId == 0)
{
continue;
}
//set the process to control
if (!set_operation_mode(process.reference, CosOperationModeEnum.OperControl))
{
logger_.warn("Can not switch process " + process.entity + " to CONTROL.");
}
} //for each managed process
} //while not terminating
NDC.pop();
}
}
class OnProcessRegistrationThread implements Runnable
{
ManagedProcessStruct process_ = null;
public OnProcessRegistrationThread(ManagedProcessStruct process)
{
process_ = process;
}
public void run()
{
if (process_ == null || process_.reference == null)
{
return;
}
logger_.info("Performing registration procedure for " + process_.entity);
NDC.push("Registration:" + process_.entity);
//synchronize process status
CosProcessStatusEnum status = get_process_status(process_.reference);
if (status != null)
{
logger_.info("Expected status: " + CorbaHelper.ProcessStateToString(process_.runtime.state) +
". Actual status: " + CorbaHelper.ProcessStateToString(status));
process_.runtime.state = status;
//if it is a stable state, set the requested state so that we do not switch
// existing process unnecessarily
//NOTE: this is not useful because in any case we will resend the operation mode!
if (status == CosProcessStatusEnum.StatRunningControl
|| status == CosProcessStatusEnum.StatRunningMonitor)
{
process_.runtime.requestedState = status;
}
}
//synchronize with peer
synchronize_managed_process(process_);
//get run time params and operation mode
CosOperationModeEnum operMode = get_expected_operation_mode(process_);
CosRunParamStruct[] params = get_process_runtime_params(process_);
if (operMode == CosOperationModeEnum.OperNotApplicable)
{
logger_.error("Can not determine expected operation mode. Restarting it.");
restart_process(process_);
}
//send the runtime params to managed process
if (!set_runtime_params(process_.reference, params))
{
logger_.warn("Can not set runtime parameters. Restarting it.");
restart_process(process_);
}
//send the operation mode to managed process
//TODO: this might switch the current status of managed process! not sure we want to do that!
if (!set_operation_mode(process_.reference, operMode))
{
logger_.warn("Can not set operation mode. Restarting it.");
restart_process(process_);
}
//requested operation mode
if (operMode == CosOperationModeEnum.OperControl)
{
process_.runtime.requestedState = CosProcessStatusEnum.StatRunningControl;
}
else
{
process_.runtime.requestedState = CosProcessStatusEnum.StatRunningMonitor;
}
//NOTE:
//do not set the peer process operation mode
//DON'T!!! There is a danger that both process will be in MONITOR.
//anyway, we have already synchronize out status with peer
//if managed process is already in stable state (control, monitor), the sync would have notified peer.
//if managed process is still starting, when the it switch to stable state it should call cosProcessStatusChanged()
// on which we would synchronize the status with peer!
} //run()
} //class OnProcessRegistrationThread
}
class ShutdownHook extends Thread
{
ProcessManager instance_ = null;
public ShutdownHook(ProcessManager instance)
{
instance_ = instance;
}
public void run()
{
ProcessManager.logger_.warn("JVM is shutting down...");
instance_.stop();
//instance_.shutdown();
}
}
class ManagedProcessStruct
{
public String entity = "";
public CosProcessDataStruct config = null;
public CosProcessRuntimeDataStruct runtime = null;
public ICosManagedProcess reference = null;
public ICosManagedProcess peerReference = null;
public ICosManagedProcess activeReference = null;
public boolean adhoc = false;
//if it is an in-process running, we need to keep reference to the instance
//to stop it
public boolean inProcess = false;
public Object instance = null;
public ManagedProcessStruct()
{
config = new CosProcessDataStruct();
runtime = new CosProcessRuntimeDataStruct();
}
};
class CorbaServerStruct
{
public String key = "";
public CosOperationModeEnum operationMode = CosOperationModeEnum.OperNotApplicable;
public ICosMonitoredThread reference = null;
public ICosMonitoredThread peerReference = null;
public ICosMonitoredThread activeReference = null;
}
class PeerManagerStruct
{
public String name = "";
public ICosProcessManager reference = null;
public int heartbeatMillis = 0;
public int portNo = 0;
public String ior = "";
public long nextPollingMillis = 0;
public boolean adhoc = false;
public boolean isActive = false;
public int errorCounter = 0;
}