Package tcg.syscontrol

Source Code of tcg.syscontrol.ManagedProcessStruct

package tcg.syscontrol;

import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.lang.reflect.Method;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Properties;

import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.apache.log4j.NDC;

import tcg.syscontrol.cos.CosFailedToRegisterException;
import tcg.syscontrol.cos.CosIndexOutOfBoundException;
import tcg.syscontrol.cos.CosLogLevelEnum;
import tcg.syscontrol.cos.CosOperationModeEnum;
import tcg.syscontrol.cos.CosOperationModeEnumHolder;
import tcg.syscontrol.cos.CosProcessDataStruct;
import tcg.syscontrol.cos.CosProcessRunningException;
import tcg.syscontrol.cos.CosProcessRuntimeDataStruct;
import tcg.syscontrol.cos.CosProcessStatusEnum;
import tcg.syscontrol.cos.CosProcessTypeEnum;
import tcg.syscontrol.cos.CosRunParamStruct;
import tcg.syscontrol.cos.CosTerminationCodeEnum;
import tcg.syscontrol.cos.CosUnknownProcessException;
import tcg.syscontrol.cos.ICosManagedProcess;
import tcg.syscontrol.cos.ICosManagedProcessHelper;
import tcg.syscontrol.cos.ICosMonitoredThread;
import tcg.syscontrol.cos.ICosProcessManager;
import tcg.syscontrol.cos.ICosProcessManagerHelper;
import tcg.syscontrol.cos.ICosProcessManagerPOA;
import tcg.syscontrol.cos.DEFAULT_MGR_PORT;
import tcg.syscontrol.cos.LOG_FILE_KEY;
import tcg.syscontrol.cos.LOG_LEVEL_KEY;
import tcg.syscontrol.cos.MANAGER_PORT_KEY;
import tcg.syscontrol.cos.STR_PROCESS_MANAGER;

import tcg.common.CorbaHelper;
import tcg.common.CorbaManager;
import tcg.common.DatabaseManager;
import tcg.common.Utilities;
import tcg.common.LoggerManager;
import tcg.common.DatabaseManager.DatabaseType;
import tcg.common.util.ProcessExecution;

public class ProcessManager extends ICosProcessManagerPOA
{
  private static final String VERSION = "02.00 (20091115)";
 
  private static final int DEF_THREAD_WAIT_MSEC = 1000//in msec
  private static final int DEF_HEARTBEAT_RATE_MSEC = 1000//in msec
  private static final int DEF_MAX_RESTART = 10;
  private static final int DEF_POLL_RATE_MSEC = 1000;
  private static final int DEF_PEER_SYNC_RATE_MSEC = 1000;
 
  //default timeout value
  private static final int DEF_START_TIMEOUT_SEC = 60;
  private static final int DEF_CONTROL_TIMEOUT_SEC = 60;
  private static final int DEF_MONITOR_TIMEOUT_SEC = 60;
  private static final int DEF_TERMINATE_TIMEOUT_SEC = 60;
 
  //configuration file
  private static final String DEF_CONFIG_FILE = "scada.properties";

  //database table
  private static final String DEF_TABLE_MANAGER = "SC_PROCESS_MANAGER";
  private static final String DEF_TABLE_PROCESSLIST = "SC_MANAGED_PROCESS";
  private static final String DEF_TABLE_PROCESSCONFIG = "SC_PROCESS_CONFIG";

  //general error threshold
  private static final int DEF_ERROR_THRESHOLD = 3;
 
  //how many times I attempt to do failed corba operation
  //NOTE: internally, jacorb has tried several times before reporting failure on a
  //      corba operation. so we do not need to try multiple times anymore
  private static final int CORBA_ERROR_THRESHOLD = 1;
 
  protected static Logger logger_ = LoggerManager.getLogger(ProcessManager.class.toString());
 
  //list of managed process
  private HashMap<String, ManagedProcessStruct> managedProcesses_
                         = new HashMap<String, ManagedProcessStruct>();
  //corba server lookup
  private HashMap<String, CorbaServerStruct>  corbaServers_
                         = new HashMap<String, CorbaServerStruct>();
  //active peer lookup
  private HashMap<String, PeerManagerStruct>  activePeers_
                         = new HashMap<String, PeerManagerStruct>();
 
  //server key to uniquely identify this corba server
  private String    serverKey_ = STR_PROCESS_MANAGER.value;

  //runtime configuration
  private String    name_ = "";
  private int      portNo_ = 0;
  private int      maxRestart_ = DEF_MAX_RESTART;
  private int      pollRateMillis_ = DEF_POLL_RATE_MSEC;
 
  private String    configFile_ = "";
  private Properties  props_ = new Properties();
  private String    logdir_ = "";
 
  //how often we synch the status with managed process
  //this would ensure that the internal state is consistent with the actual process state
  //even if the managed process is not very compliance with the process manager.
  private int      pollSyncInterval_ = 60
 
  //runtime parameters that we need to pass to managed process
  private Properties  runtimeProps_ = new Properties();
 
  //configured peers from database
  private HashMap<String, PeerManagerStruct>  configuredPeers_
                         = new HashMap<String, PeerManagerStruct>();
 
  //process state
  private CosProcessStatusEnum state_ = CosProcessStatusEnum.StatUnstarted;
 
  //worker thread
  private ProcessPollerThread    processPoller_ = new ProcessPollerThread();
  private PeerSynchThread      peerSynch_ = new PeerSynchThread();
 
  /**
   * Main entry.
   * @param args   - list of command line arguments
   */
  public static void main(String[] args)
  {
    //if arguments has "--version", print version number and quit
    String logfile = "";
    for (int i=0; i<args.length; i++)
    {
      if (args[i].equalsIgnoreCase("--version") || args[i].equalsIgnoreCase("-V"))
      {
        printVersion();
        return;
      }
      else if (args[i].equalsIgnoreCase("--help") || args[i].equalsIgnoreCase("-h"))
      {
        printUsage();
        return;
      }
      else if (args[i].equalsIgnoreCase("--logdir") || args[i].equalsIgnoreCase("-l"))
      {
        if (++i<args.length)
        {
          logfile = args[i] + File.separatorChar + "processmanager.log";       
        }
      }
    }
       
    //create the Scheduling Agent instance
    ProcessManager instance = new ProcessManager();   
        //reset logging
        if (logfile.length() > 0)
        {
          LoggerManager.setLogFile(logfile);
        }
        else
        {
          logfile = Utilities.getCurrentDir() + File.separatorChar + "processmanager.log";
          LoggerManager.setLogFile(logfile);
        }
           
        logger_.info("---- Process Manager starting ----");
       
        //parse command line arguments
        if (!instance.initialize(args))
        {
          logger_.error("Failed to initialize. Quitting!");
          logger_.info("---- Process Manager has shut down ----");     
          return;
        }
       
    //Print out all configuration as visual feedback
        for(Enumeration<?> enumeration = instance.props_.propertyNames();
            enumeration.hasMoreElements();)
        {
            String str = (String)enumeration.nextElement();
            //except if it contain the word "password" in its name
            if (str.toLowerCase().contains("password"))
            {
              logger_.info("Property " + str + " = " +
                      instance.props_.getProperty(str).replaceAll(".", "*"));
            }
            else
            {
              logger_.info("Property " + str + " = " +
                      instance.props_.getProperty(str));
            }
        }
      
        //create a shutdown hook to catch CTRL+C and other abrupt termination
        ShutdownHook hook = new ShutdownHook(instance);
        try
        {
          Runtime.getRuntime().addShutdownHook(hook);
        }
        catch(Exception ex)
        {
          logger_.warn("Can not install shutdown hook. Exception: " + ex.toString());
        }

        //run the process manager
        logger_.info("---- Process Manager is running ----");
      instance.run();
       
        //shutting down
      logger_.info("---- Process Manager is shutting down ----");        
     
      //clean up
      //Nothing
     
      //done
      logger_.info("---- Process Manager has shut down ----");        
  }
 
  protected void shutdown()
  {
    //called when the jvm is shutting down
    //stop all managed processes
    stop_managed_processes();   
  }
 
  private static void printVersion()
  {
    System.out.println("Process Manager Version " + VERSION);
  }
 
  private static final void printUsage()
  {
    System.out.println("Process Manager Version " + VERSION);
    System.out.println("");
    System.out.println("Command Line Parameters:  ");
    System.out.println(" -n  | --name <hostname>            Process Manager hostname");
    System.out.println(" -l  | --logdir <directory>         Log directory");
    System.out.println(" -f  | --config-file <config-file>  Configuration file");
    System.out.println(" -cp | --corba-port <port-no>       Corba port to bind to");
    System.out.println(" -pn | --peer-name <host-name>      Peer hostname (if running as stand-alone)");
    System.out.println(" -pp | --peer-port <port-no>        Peer port number (if running as stand-alone)");
    System.out.println(" -h  | --help                       Print out this help");
    System.out.println(" -v  | --version                    Print out program version");
    System.out.println("");
    System.out.println("Java System Properties (java -Dname=value option):  ");
    System.out.println("");
    System.out.println("Other Java Properties (Configuration file or System properties):  ");
    System.out.println(" tcg.db.type         Database type");
    System.out.println(" tcg.db.name         Database TNS name");
    System.out.println(" tcg.db.user         Database username");
    System.out.println(" tcg.db.password     Database password");
    System.out.println(" tcg.db.encyrpted    Whether the password is encrypted");
    System.out.println(" tcg.event.server1   Primary event server");
    System.out.println(" tcg.event.server2   Secondary event server");
    System.out.println("");
    System.out.println("Command line parameters will override java properties and configuration file value.");
    System.out.println("");
    System.out.println("Component Library Information:");
    System.out.println("\t - Quartz Scheduler Ver. 1.6.0");
    System.out.println("\t - Log4J Ver. 1.2.12");
    System.out.println("\t - Oracle JDBC Ver. 10.2");
    System.out.println("\t - JacORB Ver. 2.3");
    System.out.println("\t - Avalon Framework Ver. 4.1.5");
    System.out.println("");
    System.out.println("Other Component Information:");
    System.out.println("");
  }
 
  /** -------------------------------------------------------------------------------- *
    * PROCESS MANAGER OPERATION                                                        *
    * -------------------------------------------------------------------------------- **/

  /*
   * (non-Javadoc)
   * @see tcg.syscontrol.cos.ICosProcessManagerOperations#cosTerminate()
   */
  public void cosTerminate()
  {
    stop();
  }

  /*
   * (non-Javadoc)
   * @see tcg.syscontrol.cos.ICosProcessManagerOperations#cosGetProcessManagerName()
   */
  public String cosGetProcessManagerName()
  {
    return name_;
  }

  /*
   * (non-Javadoc)
   * @see tcg.syscontrol.cos.ICosProcessManagerOperations#cosSetLogLevel(tcg.syscontrol.cos.CosLogLevelEnum)
   */
  public void cosSetLogLevel(CosLogLevelEnum loglevel)
  {
    logger_.info("Setting new global log level to " + CorbaHelper.LogLevelToString(loglevel));

    //convert to log4j loglevel
    Level level = null;
    switch(loglevel.value())
    {
    case CosLogLevelEnum._LogTrace:
      level = Level.TRACE;
      break;
    case CosLogLevelEnum._LogDebug:
      level = Level.DEBUG;
      break;
    case CosLogLevelEnum._LogInfo:
      level = Level.INFO;
      break;
    case CosLogLevelEnum._LogNormal:
      //default/normal logging level is INFO
      level = Level.INFO;
      break;
    case CosLogLevelEnum._LogWarn:
      level = Level.WARN;
      break;
    case CosLogLevelEnum._LogError:
      level = Level.ERROR;
      break;
    case CosLogLevelEnum._LogFatal:
      level = Level.FATAL;
      break;
    default:
      level = Level.DEBUG;
    }

    //set the global logging level
    LoggerManager.setLogLevel(level);
  }

  /*
   * (non-Javadoc)
   * @see tcg.syscontrol.cos.ICosProcessManagerOperations#cosSetLogLevelDetail(java.lang.String, java.lang.String)
   */
  public void cosSetLogLevelDetail(String logger, CosLogLevelEnum loglevel)
  {
    logger_.info("Setting new log level to " + CorbaHelper.LogLevelToString(loglevel)
            + " for Logger " + logger );
   
    //convert to log4j log level
    Level level = null;
    switch(loglevel.value())
    {
    case CosLogLevelEnum._LogTrace:
      level = Level.TRACE;
      break;
    case CosLogLevelEnum._LogDebug:
      level = Level.DEBUG;
      break;
    case CosLogLevelEnum._LogInfo:
      level = Level.INFO;
      break;
    case CosLogLevelEnum._LogNormal:
      //default/normal logging level is INFO
      level = Level.INFO;
      break;
    case CosLogLevelEnum._LogWarn:
      level = Level.WARN;
      break;
    case CosLogLevelEnum._LogError:
      level = Level.ERROR;
      break;
    case CosLogLevelEnum._LogFatal:
      level = Level.FATAL;
      break;
    default:
      level = Level.DEBUG;
    }

    //set the log level
    LoggerManager.setLogLevelDetail(logger, level);
  }

  /*
   * (non-Javadoc)
   * @see tcg.syscontrol.cos.ICosProcessManagerOperations#cosPoll()
   */
  public void cosPoll()
  {
    return;      //just return
  }

  /** -------------------------------------------------------------------------------- *
    * GENERAL PEER SYNCHRONIZATION                                                     *
    * -------------------------------------------------------------------------------- **/
 
  /*
   * (non-Javadoc)
   * @see tcg.syscontrol.cos.ICosProcessManagerOperations#cosRegisterPeer(java.lang.String, tcg.syscontrol.cos.ICosProcessManager)
   */
  public void cosRegisterPeer(String peerName, ICosProcessManager p_peerManager)
      throws CosFailedToRegisterException
  {
    logger_.debug("Peer " + peerName + " is trying to register.");

    //cannot store process with empty process name
    if (peerName.length() == 0)
    {
      throw new CosFailedToRegisterException();
    }

    //logging context
    NDC.push(peerName);
   
    //verbose
    String iorString = CorbaManager.objectToString(p_peerManager);
    logger_.trace("Peer Manager IOR: " + iorString );

    if (!poll_manager(p_peerManager))
    {
      logger_.error("Can not poll the registering peer!!!");
      NDC.pop();
      throw new CosFailedToRegisterException();
    }
    else
    {
      logger_.debug("Registering peer manager is running. I can poll!");
   
    //End of debugging
   
    //critical section
    PeerManagerStruct peer = null;
    synchronized(activePeers_)
    {
      //get the current reference if any
      peer = activePeers_.get(peerName);
      if (peer != null)
      {
        //entry exist. update existing reference
        peer.reference = p_peerManager;
        peer.ior = iorString;
      }
      else
      {
        //does not exist. try the pre-configured list
        peer = configuredPeers_.get(peerName);
        if (peer != null)
        {
          //update existing reference
          peer.reference = p_peerManager;
          peer.ior = iorString;
          //set it to active and move it to active peer list
          peer.isActive = true;
          activePeers_.put(peer.name, peer);
        }
        else
        {
          //does not exist anywhere. create a new struct.
          peer = new PeerManagerStruct();
          peer.name = peerName;
          peer.adhoc = true;
          //use heart beat rate retrieved from database configuration if is available
          peer.heartbeatMillis = DEF_HEARTBEAT_RATE_MSEC;
          //set the reference
          peer.reference = p_peerManager;
          peer.ior = iorString;
          //add into the list
          peer.isActive = true;
          activePeers_.put(peerName, peer);
        //can not get from configured peer
      //can not get from active peer
    }   //end critical section

    //don't synchronize with the new peer. let it synchronize with me

    //notify all managed processes
    //TODO

    //start peer poller if it is not already running
    peerSynch_.start();

    NDC.pop();
    logger_.debug("Peer " + peerName +" has sucessfully registered.");
  }

  /*
   * (non-Javadoc)
   * @see tcg.syscontrol.cos.ICosProcessManagerOperations#cosUnregisterPeer(java.lang.String)
   */
  public void cosUnregisterPeer(String peerName)
  {
    logger_.debug("Peer " + peerName + " is trying to unregister.");

    //logging context
    NDC.push(peerName);
   
    //critical section
    PeerManagerStruct manager = null;
    synchronized(activePeers_)
    {
      //get the current reference if any
      manager = activePeers_.get(peerName);
      if (manager == null)
      {
        logger_.warn("Not recognized peer manager name. Ignored.");
        NDC.pop();
        return;
      }
      //delete from the list
      activePeers_.remove(peerName);
      //if it is not adhoc peer, move it back to list of preconfigured peer list
      if (!manager.adhoc)
      {
        manager.isActive = false;
      }
    }   //end of critical section

    //notify all managed processes
    //TODO

    //no need to stop peer poller if it is running. it will stop itself if there is no more peer.

    NDC.pop();
    logger_.debug("Peer " + peerName + " has successfully unregistered.");
  }

  /*
   * (non-Javadoc)
   * @see tcg.syscontrol.cos.ICosProcessManagerOperations#cosGetPeerManager(java.lang.String)
   */
  public ICosProcessManager cosGetPeerManager(String peerName)
  {
    PeerManagerStruct  manager = null;
    if (peerName == null || peerName.length() > 0)
    {
      //get the first peer available
      Iterator<PeerManagerStruct> it = activePeers_.values().iterator();
      if (it.hasNext())
      {
        manager = it.next();
      }
    }
    else
    {
      manager = activePeers_.get(peerName);
    }
   
    //return value
    if (manager != null)
    {
      return manager.reference;
    }
    return null;
  }

  /*
   * (non-Javadoc)
   * @see tcg.syscontrol.cos.ICosProcessManagerOperations#cosSynchronizeManagedProcess(java.lang.String, java.lang.String, short, tcg.syscontrol.cos.CosProcessStatusEnum)
   */
  public void cosSynchronizeManagedProcess(String peerName, String entity,
      short weightage, CosProcessStatusEnum status)
  {
    //if it is not configure with peer or we are starting/terminating, ignore it
    if (activePeers_.size() == 0 || state_ != CosProcessStatusEnum.StatRunningControl)
    {
      return;
    }

    logger_.debug("Peer " + peerName + " status change. Entity: " + entity +". Weight: " +weightage +". Status: "
            + CorbaHelper.ProcessStateToString(status) );

    //logging context
    NDC.push(peerName + ":" + entity);
       
    //get the peer reference
    PeerManagerStruct peer = activePeers_.get(peerName);
    if (peer == null || peer.reference == null)
    {
      //can not find the associated peer
      logger_.warn("Invalid peer name. Synchronization ignored!");
      NDC.pop();
      return;
    }

    //get the runtime info
    ManagedProcessStruct process = managedProcesses_.get(entity);
    if (process == null || process.reference == null)
    {
      //no matching managed process or not running. ignore
      logger_.warn("Invalid/unknown managed process. Synchronization ignored!");
      NDC.pop();
      return;
    }

    //make the current process is running
    if (!is_running_status(process.runtime.state))
    {
      //no matching managed process or not running. ignore
      logger_.warn("Current managed process is not running. Synchronization ignored!");
      NDC.pop();
      return;
    }
   
    CosOperationModeEnum curmode;
    switch(status.value())
    {
      //peer try to go into CONTROL
      case CosProcessStatusEnum._StatGoingToControl:
      case CosProcessStatusEnum._StatRunningControl:
        //determine current mode
        curmode = translate_process_status(process.runtime.state);

        if (curmode == CosOperationModeEnum.OperControl)
        {
          //my process is currently in CONTROL
          //if I have higher or the same weightage, ask peer to stand down
          if (process.runtime.weightage >= weightage)
          {
            logger_.debug("I have higher weightage. Will ask peer process to go to MONITOR");
            try
            {
              peer.reference.cosSetProcessOperationMode(entity, CosOperationModeEnum.OperMonitor);
              peer.errorCounter = 0;
            }
            catch (Exception ex)
            {
              //ignore other than logging it
              logger_.warn("Can not set peer process to go to MONITOR. Exception: "
                      + ex.toString());
              peer.errorCounter++;
            }
          }
          else
          {
            //otherwise, ask my process to stand down
            logger_.debug("Peer process has higher weightage. Will ask process to go to MONITOR");
            try
            {
              process.reference.cosSetOperationMode(CosOperationModeEnum.OperMonitor);
            }
            catch (Exception ex)
            {
              logger_.warn("Can not get process to go to MONITOR. Exception: " + ex.toString());
            }
          }
        }
        else
        {
          //My process is in MONITOR but my process might have higher priority
          //ignore it. otherwise, might trigger unnecesseary switching.
          //besides, maybe I manually set the current operation mode to MONITOR
        }

        //update the active reference, if necessary
        if (status == CosProcessStatusEnum.StatRunningControl)
        {
          logger_.trace("Changing the active reference...");
           //then get the associated managed process from peer
          ICosManagedProcess processRef = null;
          try
          {
            processRef = peer.reference.cosGetManagedProcess2(entity);
            peer.errorCounter = 0;
          }
          catch (Exception ex)
          {
            logger_.warn("Can not get reference to peer process. Exception: " + ex.toString());
            peer.errorCounter++;
          }
          //the update the active reference
          if (processRef != null)
          {
            process.activeReference = processRef;
          }
        }  //if (status == StatRunningControl)

        break;
      //everything else, ask our process to take control
      default:
        try
        {
          process.reference.cosSetOperationMode(CosOperationModeEnum.OperControl);
        }
        catch (Exception ex)
        {
          logger_.warn("Can not get process to go to CONTROL. Exception: " + ex.toString());
        }
    }
   
    NDC.pop();
  }

  /*
   * (non-Javadoc)
   * @see tcg.syscontrol.cos.ICosProcessManagerOperations#cosSynchronizeCorbaServer(java.lang.String, java.lang.String, tcg.syscontrol.cos.CosProcessStatusEnum)
   */
  public void cosSynchronizeCorbaServer(String peerName, String uniqueKey,
      CosOperationModeEnum mode)
  {
    //if it is not configure with peer or we are starting/terminating, ignore it
    if (activePeers_.size() == 0 || state_ != CosProcessStatusEnum.StatRunningControl)
    {
      return;
    }

    logger_.debug("Peer " + peerName + " status change. Server name: " + uniqueKey +". Operation mode: "
            + CorbaHelper.OperationModeToString(mode) );

    //since we do not really control a named corba server, the only real purpose of synchronization
    //is to update the activeReference value.
    //it follows we are only interested if peer corba server switch to CONTROL.
    if (mode != CosOperationModeEnum.OperControl)
    {
       return;
    }

    //logging context
    NDC.push(peerName + ":" + uniqueKey);
       
    //get the peer reference
    PeerManagerStruct peer = activePeers_.get(peerName);
    if (peer == null || peer.reference == null)
    {
      //can not find the associated peer
      logger_.warn("Invalid peer name. Synchronization ignored!");
      NDC.pop();
      return;
    }

    //get the corba server info
    CorbaServerStruct server = corbaServers_.get(uniqueKey);
    if (server == null || server.reference == null)
    {
      //no matching managed process or not running. ignore
      logger_.warn("Invalid/unknown corba server. Synchronization ignored!");
      NDC.pop();
      return;
    }

    //if our corba server is also in control mode, ignore it. we prefer the local server
    if (server.operationMode == CosOperationModeEnum.OperControl)
    {
      logger_.warn("Our corba server " + uniqueKey +" is also in CONTROL. Prefer local server."
            + " Synchronization ignored!");
      NDC.pop();
      return;
    }
   
     //update the active reference
     logger_.trace("Changing the active reference...");

     //get the associated datapoint server from peer
     ICosMonitoredThread serverRef = null;
     try
     {
       serverRef = peer.reference.cosGetCorbaServer(uniqueKey);
       peer.errorCounter = 0;
     }
     catch(Exception ex)
     {
       logger_.warn("Can not get reference to peer corba server. Exception: " + ex.toString());
       peer.errorCounter++;
     }

     //update the active reference
     if (serverRef != null)
     {
       server.activeReference = serverRef;
     }

    NDC.pop();
  }

  /** -------------------------------------------------------------------------------- *
    * MANAGED PROCESS LOOKUP                                                           *
    * -------------------------------------------------------------------------------- **/

  /*
   * (non-Javadoc)
   * @see tcg.syscontrol.cos.ICosProcessManagerOperations#cosRegisterManagedProcess(java.lang.String, tcg.syscontrol.cos.CosProcessTypeEnum, tcg.syscontrol.cos.ICosManagedProcess, int)
   */
  public void cosRegisterManagedProcess(String entity, CosProcessTypeEnum processType,
      ICosManagedProcess managedProcess, long processId)
      throws CosProcessRunningException, CosFailedToRegisterException
  {
    //cannot store process with empty process name
    if (entity == null || entity.length() == 0)
    {
      throw new CosFailedToRegisterException();
    }

    logger_.debug("Process " + entity + " is registering (Process ID = " + processId + ")...");

    //logging context
    NDC.push(entity);
   
    //verbose
    String iorString = CorbaManager.objectToString(managedProcess);
    logger_.trace("Managed Process IOR: " + iorString );

    if (!poll_process(managedProcess))
    {
      logger_.error("Can not poll the registering process!!!");
      NDC.pop();
      throw new CosFailedToRegisterException();
    }
    else
    {
      logger_.debug("Registering process is running. I can poll!");
    }
    //End of debugging

    //Critical Section
    ManagedProcessStruct process = null;
    synchronized(managedProcesses_)
    {
      process = managedProcesses_.get(entity);
      if (process == null)
      {
        //not exist. create new entry
        process = new ManagedProcessStruct();
        //adhoc process
        process.adhoc = true;
        process.runtime.entity =entity;
        process.runtime.terminationCode = CosTerminationCodeEnum.TermNotKnown;
        process.runtime.nStarts = 1;
        process.runtime.weightage = 1;
        process.runtime.logLevel = CosLogLevelEnum.LogNormal;
        //set current value
        process.runtime.processId = processId;
        process.runtime.processType = processType;
        process.runtime.state = CosProcessStatusEnum.StatStarted;
        process.runtime.startDateTime = (Calendar.getInstance().getTimeInMillis() / 1000);
        process.runtime.requestedState = CosProcessStatusEnum.StatStarted;
        process.runtime.lastRequestDateTime = process.runtime.startDateTime;
        //default timeout value
        process.runtime.startTimeout = DEF_START_TIMEOUT_SEC;
        process.runtime.controlTimeout = DEF_CONTROL_TIMEOUT_SEC;
        process.runtime.monitorTimeout = DEF_MONITOR_TIMEOUT_SEC;
        process.runtime.terminateTimeout = DEF_TERMINATE_TIMEOUT_SEC;
        //store the reference
        process.reference = managedProcess;
        //insert into the list
        managedProcesses_.put(process.entity, process);
      }
      else
      {
        //already exist. update the runtime value
        //do not reset runtime loglevel. this way, any loglevel changes is persisted across restart
        //update the runtime value
        process.runtime.processId = processId;
        process.runtime.processType = processType;
        process.runtime.state = CosProcessStatusEnum.StatStarted;
        //store the reference
        process.reference = managedProcess;
      }
     
      //if successful, send runtime parameters
      if (process != null)
      {
        on_managed_process_registration(process);
      }
    }
   
    //perform misc tasks mostly used for error recovery/handling
    if (process != null)
    {
      //store ior into a file
      CorbaManager.persistIor(entity, process.reference);
 
      //kill zombie process
      kill_zombie_process(process);
    }

    NDC.pop();
  }

  /*
   * (non-Javadoc)
   * @see tcg.syscontrol.cos.ICosProcessManagerOperations#cosGetParams(java.lang.String)
   */
  public CosRunParamStruct[] cosGetParams(String entity)
  {
    //validation
    if (entity == null || entity.length() == 0)
    {
      return null;
    }
   
    //logging context
    NDC.push(entity);
   
    //get the process struct
    ManagedProcessStruct process = managedProcesses_.get(entity);
    if (process == null)
    {
      //invalid process
      logger_.warn("Invalid entity name.");
      NDC.pop();
      return null;
    }
   
    //just pass it the the helper functions
    CosRunParamStruct[] params = get_process_runtime_params(process);

    NDC.pop();
    return params;
  }

  /*
   * (non-Javadoc)
   * @see tcg.syscontrol.cos.ICosProcessManagerOperations#cosProcessTerminating(java.lang.String, tcg.syscontrol.cos.CosTerminationCodeEnum)
   */
  public void cosProcessTerminating(String entity, CosTerminationCodeEnum p_code)
  {
    //validation
    if (entity == null || entity.length() == 0)
    {
      return;
    }
   
    logger_.debug("Process " + entity + " is terminating...");

    //logging context
    NDC.push(entity);
   
    //critical section
    ManagedProcessStruct process = null;
    synchronized(managedProcesses_)
    {
      process = managedProcesses_.get(entity);
      if (process == null)
      {
        //invalid process
        logger_.warn("Invalid entity name.");
        NDC.pop();
        return;
      }

      //update runtime info
      process.runtime.terminationCode = p_code;
      process.runtime.state = CosProcessStatusEnum.StatTerminating;
    //end of critical section

    //inform/synchronize the peer
    synchronize_managed_process(process);
   
    //update the active reference
    process.activeReference = get_active_managed_process(process);
   
    NDC.pop();
  }

  /*
   * (non-Javadoc)
   * @see tcg.syscontrol.cos.ICosProcessManagerOperations#cosProcessStatusChanged(java.lang.String, tcg.syscontrol.cos.CosProcessStatusEnum)
   */
  public void cosProcessStatusChanged(String entity, CosProcessStatusEnum status)
  {
    //validation
    if (entity == null || entity.length() == 0)
    {
      return;
    }
   
    logger_.debug("Process " + entity + " change status to " + CorbaHelper.ProcessStateToString(status)
            +"...");

    //logging context
    NDC.push(entity);
   
    //critical section
    ManagedProcessStruct  process = null;
    synchronized(managedProcesses_)
    {
      process = managedProcesses_.get(entity);
      if (process == null)
      {
        //invalid process
        logger_.warn("Invalid entity name.");
        NDC.pop();
        return;
      }

      if (process.runtime.state == status)
      {
        //already in the state. ignore it.
        logger_.debug("Already in the expected state.");
        NDC.pop();
        return;
      }

      //update internal process state
      process.runtime.state = status;

      //other action
      switch (status.value())
      {
      case CosProcessStatusEnum._StatTerminating:
        //update runtime info
        if (process.runtime.requestedState == CosProcessStatusEnum.StatStopped)
        {
          process.runtime.terminationCode = CosTerminationCodeEnum.TermRequestedTerminate;
        }
        else
        {
          process.runtime.terminationCode = CosTerminationCodeEnum.TermNotKnown;
        }
        //update the active reference
        process.activeReference = get_active_managed_process(process);
        break;
      case CosProcessStatusEnum._StatStopped:
        //delete the reference in the map
        process.reference = null;
        //if it is adhoc, delete the runtime info
        if (process.adhoc)
        {
          managedProcesses_.remove(process);
        }
        else
        {
          //otherwise, update runtime info
          process.runtime.processId = 0;
        }
        //remove the respective ior file
        CorbaManager.removeIor(entity);
        //update the active reference
        process.activeReference = get_active_managed_process(process);
        break;
      case CosProcessStatusEnum._StatRunningControl:
        //stabil state. reset the requested state
        process.runtime.requestedState = status;
        //update the active reference
        process.activeReference = process.reference;
        break;
      case CosProcessStatusEnum._StatRunningMonitor:
        //stabil state. reset the requested state
        process.runtime.requestedState = status;
        //update the active reference
        process.activeReference = get_active_managed_process(process);
        break;
      default:
        //nothing
        break;
      }
    }  //end of critical section

    //inform/synchronize the peer
    synchronize_managed_process(process);
   
    NDC.pop();
  }

  /*
   * (non-Javadoc)
   * @see tcg.syscontrol.cos.ICosProcessManagerOperations#cosProcessGoingToControl(java.lang.String)
   */
  public boolean cosProcessGoingToControl(String entity)
  {
    //validation
    if (entity == null || entity.length() == 0)
    {
      return false;
    }
   
    logger_.debug("Process " + entity + " is asking permission to go to CONTROL...");

    //logging context
    NDC.push(entity);
   
    //get the process struct
    ManagedProcessStruct process = managedProcesses_.get(entity);
    if (process == null)
    {
      //invalid process
      logger_.warn("Invalid/unknown managed process. Always allow.");
      NDC.pop();
      return  true;
    }


    //if the requested runtime operation mode = control, proceed
    //this means the switching is requested by the manager
    if (process.runtime.requestedState == CosProcessStatusEnum.StatRunningControl)
    {
      logger_.debug("Switching is requested by the manager. Can go to CONTROL.");
      NDC.pop();
      return true;
    }

    //if we have no peer, also always allow
    if (activePeers_.size() == 0)
    {
      logger_.debug("No peer. Can go to CONTROL.");
      NDC.pop();
      return true;
    }

    //otherwise, need to determine the operation mode
    logger_.debug("Determining operation mode for process " + entity);

    CosOperationModeEnum expectedOperMode = get_expected_operation_mode(process);
    if (expectedOperMode == CosOperationModeEnum.OperControl)
    {
      NDC.pop();
      return true;
    }

    NDC.pop();
    return false;
  }

  /*
   * (non-Javadoc)
   * @see tcg.syscontrol.cos.ICosProcessManagerOperations#cosProcessGoingToMonitor(java.lang.String)
   */
  public boolean cosProcessGoingToMonitor(String entity)
  {
    //always allow to go to monitor
    return true;
  }

  /*
   * (non-Javadoc)
   * @see tcg.syscontrol.cos.ICosProcessManagerOperations#cosGetManagedProcess(short)
   */
  public ICosManagedProcess cosGetManagedProcess(short index)
      throws CosIndexOutOfBoundException
  {
    if (index < 0 || index >= managedProcesses_.size())
      throw new CosIndexOutOfBoundException();
    return managedProcesses_.get(index).reference;
  }

  /*
   * (non-Javadoc)
   * @see tcg.syscontrol.cos.ICosProcessManagerOperations#cosGetManagedProcess2(java.lang.String)
   */
  public ICosManagedProcess cosGetManagedProcess2(String entity)
      throws CosUnknownProcessException
  {
    ManagedProcessStruct process = managedProcesses_.get(entity);   
    if (process == null)
    {
      throw new CosUnknownProcessException();
    }   
    return process.reference;
  }

  /*
   * (non-Javadoc)
   * @see tcg.syscontrol.cos.ICosProcessManagerOperations#cosGetActiveManagedProcess(short)
   */
  public ICosManagedProcess cosGetActiveManagedProcess(short index)
      throws CosIndexOutOfBoundException
  {
    if (index < 0 || index >= managedProcesses_.size())
      throw new CosIndexOutOfBoundException();
    return managedProcesses_.get(index).activeReference;
  }

  /*
   * (non-Javadoc)
   * @see tcg.syscontrol.cos.ICosProcessManagerOperations#cosGetActiveManagedProcess2(java.lang.String)
   */
  public ICosManagedProcess cosGetActiveManagedProcess2(String entity)
      throws CosUnknownProcessException
  {
    ManagedProcessStruct process = managedProcesses_.get(entity);   
    if (process == null)
    {
      throw new CosUnknownProcessException();
    }   
    return process.activeReference;
  }

  /** -------------------------------------------------------------------------------- *
    * MANAGED PROCESS MONITORING                                                       *
    * -------------------------------------------------------------------------------- **/

  /*
   * (non-Javadoc)
   * @see tcg.syscontrol.cos.ICosProcessManagerOperations#cosGetNumberOfManagedProcesses()
   */
  public short cosGetNumberOfManagedProcesses()
  {
    return (short) managedProcesses_.size();
  }

  /*
   * (non-Javadoc)
   * @see tcg.syscontrol.cos.ICosProcessManagerOperations#cosGetProcessInfo(short)
   */
  public CosProcessRuntimeDataStruct cosGetProcessInfo(short index)
      throws CosIndexOutOfBoundException
  {
    if (index < 0 || index >= managedProcesses_.size())
      throw new CosIndexOutOfBoundException();
    return managedProcesses_.get(index).runtime;
  }

  /*
   * (non-Javadoc)
   * @see tcg.syscontrol.cos.ICosProcessManagerOperations#cosGetProcessInfo2(java.lang.String)
   */
  public CosProcessRuntimeDataStruct cosGetProcessInfo2(String entity)
      throws CosUnknownProcessException
  {
    ManagedProcessStruct process = managedProcesses_.get(entity);   
    if (process == null)
    {
      throw new CosUnknownProcessException();
    }   
    return process.runtime;
  }

  /*
   * (non-Javadoc)
   * @see tcg.syscontrol.cos.ICosProcessManagerOperations#cosGetProcessInfoAll()
   */
  public CosProcessRuntimeDataStruct[] cosGetProcessInfoAll()
  {
    CosProcessRuntimeDataStruct[] retval
        = new CosProcessRuntimeDataStruct[managedProcesses_.size()];
    for (int i=0; i<managedProcesses_.size(); i++)
    {
      retval[i] = managedProcesses_.get(i).runtime;
    }
   
    return retval;
  }

  /*
   * (non-Javadoc)
   * @see tcg.syscontrol.cos.ICosProcessManagerOperations#cosGetProcessConfig(short)
   */
  public CosProcessDataStruct cosGetProcessConfig(short index)
      throws CosIndexOutOfBoundException
  {
    if (index < 0 || index >= managedProcesses_.size())
      throw new CosIndexOutOfBoundException();
    return managedProcesses_.get(index).config;
  }

  /*
   * (non-Javadoc)
   * @see tcg.syscontrol.cos.ICosProcessManagerOperations#cosGetProcessConfig2(java.lang.String)
   */
  public CosProcessDataStruct cosGetProcessConfig2(String entity)
      throws CosUnknownProcessException
  {
    ManagedProcessStruct process = managedProcesses_.get(entity)
    if (process == null)
    {
      throw new CosUnknownProcessException();
    }   
   
    return process.config;
  }

  /*
   * (non-Javadoc)
   * @see tcg.syscontrol.cos.ICosProcessManagerOperations#cosGetProcessStatusString(short)
   */
  public String cosGetProcessStatusString(short index) throws CosIndexOutOfBoundException
  {
    if (index < 0 || index >= managedProcesses_.size())
      throw new CosIndexOutOfBoundException();
    ICosManagedProcess processRef = managedProcesses_.get(index).reference;

    String status = "";
    if (CorbaManager.isValidReference(processRef))
    {
      try
      {
        status = processRef.cosGetStatusString();
      }
      catch(Exception ex)
      {
        logger_.error("Can not get status string for "
                + managedProcesses_.get(index).entity
                + ". Exception: " + ex.getMessage());
      }
    }

    return status;
  }

  public String cosGetProcessStatusString2(String entity) throws CosUnknownProcessException
  {
    ManagedProcessStruct process = managedProcesses_.get(entity)
    if (process == null)
    {
      throw new CosUnknownProcessException();
    }
    ICosManagedProcess processRef = process.reference;

    String status = "";
    if (CorbaManager.isValidReference(processRef))
    {
      try
      {
        status = processRef.cosGetStatusString();
      }
      catch(Exception ex)
      {
        logger_.error("Can not get status string for " + entity
                + ". Exception: " + ex.getMessage());
      }
    }

    return status;
  }

  public void cosStartProcess(String entity)
  {
    ManagedProcessStruct process = managedProcesses_.get(entity)
    if (process == null)
    {
      return;
    }
    start_process(process);
  }

  public void cosTerminateProcess(String entity)
  {
    ManagedProcessStruct process = managedProcesses_.get(entity)
    if (process == null)
    {
      return;
    }
    terminate_process(process);
  }

  public void cosKillProcess(String entity)
  {
    ManagedProcessStruct process = managedProcesses_.get(entity)
    if (process == null)
    {
      return;
    }
    kill_process(process);
  }

  public void cosResetProcessNumberOfRestart(String entity)
  {
    ManagedProcessStruct process = managedProcesses_.get(entity)
    if (process == null)
    {
      return;
    }
    process.runtime.nStarts = 1;
  }
 
  public void cosSetProcessOperationMode(String entity, CosOperationModeEnum mode)
  {
    ManagedProcessStruct process = managedProcesses_.get(entity)
    if (process == null || process.reference == null)
    {
      return;
    }
    set_operation_mode(process.reference, mode);
  }

  public void cosSetProcessParams(String entity, CosRunParamStruct[] paramSeq)
  {
    ManagedProcessStruct process = managedProcesses_.get(entity)
    if (process == null || process.reference == null)
    {
      return;
    }
    set_runtime_params(process.reference, paramSeq);
  }

  public void cosSetProcessLogLevel(String entity, CosLogLevelEnum loglevel)
  {
    ManagedProcessStruct process = managedProcesses_.get(entity)
    if (process == null)
    {
      return;
    }
    ICosManagedProcess processRef = process.reference;

    //just pass it to the process
    if (CorbaManager.isValidReference(processRef))
    {
      try
      {
        processRef.cosSetLogLevel(loglevel);
      }
      catch(Exception ex)
      {
        logger_.error("Can not send log level for " + entity
            + " to " + CorbaHelper.LogLevelToString(loglevel)
            + ". Exception: " + ex.getMessage());
      }
    }  //if valid reference
  }

  public void cosSetProcessLogLevelDetail(String entity, String logger,
      CosLogLevelEnum loglevel)
  {
    ManagedProcessStruct process = managedProcesses_.get(entity)
    if (process == null)
    {
      return;
    }
    ICosManagedProcess processRef = process.reference;

    //just pass it to the process
    if (CorbaManager.isValidReference(processRef))
    {
      try
      {
        processRef.cosSetLogLevelDetail(logger, loglevel);
      }
      catch(Exception ex)
      {
        logger_.error("Can not send log level for " + logger
            + " @ " + entity + " to " + CorbaHelper.LogLevelToString(loglevel)
            + ". Exception: " + ex.getMessage());
      }
    }  //if valid reference
  }

  /** -------------------------------------------------------------------------------- *
    * SIMPLE NAMING LOOKUP/SERVICE INTERFACES                                          *
    * -------------------------------------------------------------------------------- **/

  public void cosRegisterCorbaServer(String uniqueKey, ICosMonitoredThread monitoredThread)
      throws CosFailedToRegisterException
  {
    //Debugging
    logger_.debug("IOR: " + CorbaManager.objectToString(monitoredThread) );

    //input validation
    if (!poll_process(monitoredThread))
    {
      logger_.error("Can not poll the registering dpserver!!!");
      throw new CosFailedToRegisterException();
    }
    else
    {
      logger_.debug("Registering dpserver is running. I can poll!");
    }

    //get the subsystem structure in the map
    CorbaServerStruct namedProcess = corbaServers_.get(uniqueKey);
    if (namedProcess == null)
    {
      //create a new structure
      namedProcess = new CorbaServerStruct();
      namedProcess.key = uniqueKey;
      namedProcess.reference = monitoredThread;
      //insert into the lookup
      corbaServers_.put(uniqueKey, namedProcess);
    }
    else
    {
      //replace the old reference
      namedProcess.reference = monitoredThread;
    }
   
    //update the active reference
    namedProcess.activeReference = get_active_corba_server(namedProcess);
  }

  public void cosUnregisterCorbaServer(String uniqueKey)
  {
    //delete the current reference
    CorbaServerStruct namedProcess = corbaServers_.get(uniqueKey);
    if (namedProcess != null)
    {
      corbaServers_.remove(uniqueKey);
    }
  }

  public void cosUpdateCorbaServerOperationMode(String uniqueKey,
      CosOperationModeEnum operationMode)
  {
    //get the reference
    CorbaServerStruct server = corbaServers_.get(uniqueKey);
    if (server == null)
    {
      logger_.error("Can not find corba server " + uniqueKey);
      return;
    }

    //update the internal structure
    server.operationMode = operationMode;

    //update the active reference, if necessary
    if (operationMode == CosOperationModeEnum.OperControl)
    {
      server.activeReference = server.reference;
    }
    else
    {
      server.activeReference = get_active_corba_server(server);
    }

    //inform/synchronize the peer
    synchronize_corba_server(server);
  };
 
  public ICosMonitoredThread cosGetCorbaServer(String uniqueKey)
      throws CosUnknownProcessException
  {
    //get the named process
    CorbaServerStruct namedProcess = corbaServers_.get(uniqueKey);
    if (namedProcess == null)
    {
      throw new CosUnknownProcessException();
    }
    //return the reference
    return namedProcess.reference;
  }

  public ICosMonitoredThread cosGetCorbaServerOperationMode(String uniqueKey,
      CosOperationModeEnumHolder operationMode) throws CosUnknownProcessException
  {
    operationMode.value = CosOperationModeEnum.OperNotApplicable;
    //get the named process
    CorbaServerStruct namedProcess = corbaServers_.get(uniqueKey);
    if (namedProcess == null)
    {
      throw new CosUnknownProcessException();
    }
    //return value
    operationMode.value = namedProcess.operationMode;
    return namedProcess.reference;
  }

  public ICosMonitoredThread cosGetActiveCorbaServer(String uniqueKey)
      throws CosUnknownProcessException
  {
    //get the named process
    CorbaServerStruct namedProcess = corbaServers_.get(uniqueKey);
    if (namedProcess == null)
    {
      throw new CosUnknownProcessException();
    }
    //check the current reference to active process. make sure it is in CONTROL
    if (CorbaManager.isValidReference(namedProcess.activeReference))
    {
      try
      {
        namedProcess.activeReference.cosPollControl();
      }
      catch (Exception ex)
      {
        logger_.error("Current active reference of process " + namedProcess.key
                + " is NOT in CONTROL. Resetting it!");
        namedProcess.activeReference = null;
      }
    }
    //if we dont have an active reference, try to get it
    if (namedProcess.activeReference == null)
    {
      namedProcess.activeReference = get_active_corba_server(namedProcess);
    }
    //return the active reference
    return namedProcess.activeReference;
  }

  /*
   * HELPER CLASS
   */
 
  private boolean initialize(String[] args)
  {
    state_ = CosProcessStatusEnum.StatStartup;

    //parse command line arguments
        if (!parse_arguments(args))
        {
          state_ = CosProcessStatusEnum.StatUnstarted;
          return false;
        }
       
        //load configuration from properties file
        props_ = load_properties(configFile_);
       
    //set database type
        DatabaseType dbType = null;
    String dbTypeString = props_.getProperty("tcg.db.type", "");
    if (0 == dbTypeString.compareToIgnoreCase("MYSQL"))
    {
      dbType = DatabaseType.MYSQL;
    }
    else if (0 == dbTypeString.compareToIgnoreCase("ORACLE"))
    {
      dbType = DatabaseType.ORACLE;
    }
    else
    {
      dbType = DatabaseType.HSQLDB;
    }
   
    //initialize database connection
    String dbTnsname = props_.getProperty("tcg.db.name", "");
    String dbUsername = props_.getProperty("tcg.db.user", "");
    String dbPassword = props_.getProperty("tcg.db.password", "");
    DatabaseManager.configure(dbType, dbTnsname, dbUsername, dbPassword);

    //load the configuration from database
    if (!load_configuration())
    {
          logger_.error("Can not load configuration from database.");
      state_ = CosProcessStatusEnum.StatUnstarted;
      return false;
   
   
        //initialize corba manager. if corbaPort = 0, the port is randomly allocated
        //if it is already initialized by the child class, it does nothing
        if (!CorbaManager.initialize(portNo_))
        {
          logger_.error("Can not initialize CORBA manager.");
      state_ = CosProcessStatusEnum.StatUnstarted;   
          return false;
        }
   
        //activate this servant.
        //if corbaPort != "", the servant is created as persistent object and
        //  can be acessed via corbaloc address: corbaloc::<ip-addr>:<port>/<corba-name>
        if (!CorbaManager.activate(this, serverKey_))
        {
          logger_.error("Can not activate CORBA servant.");
      state_ = CosProcessStatusEnum.StatUnstarted;   
          return false;
        }

    state_ = CosProcessStatusEnum.StatStarted;
    return true;
  }
 
  private void run()
  {
    state_ = CosProcessStatusEnum.StatGoingToControl;
     
    logger_.info("-----------------------------------");
    logger_.info("Starting control mode...");
    logger_.info("-----------------------------------");

    //try to connect to pre-existing process
    start_managed_processes();
   
    //start the poller thread
    processPoller_.start();
   
    //connect to configured peer. this will attempt to validate all configured peers.
    //validated peers are then copied over to the list of active peers
    connect_to_peer_managers();
   
    //start the peer thread
    peerSynch_.start();
   
    logger_.info("-----------------------------------");
    logger_.info("Process Manager is running CONTROL.");
    logger_.info("-----------------------------------");
   
    state_ = CosProcessStatusEnum.StatRunningControl;

    //start the orb. this will block until stop() is called via cosTerminate()
    CorbaManager.run();
   
    //this the corba is unblocked that means we are terminating
    state_ = CosProcessStatusEnum.StatStopped;
  }
 
  protected void stop()
  {
    state_ = CosProcessStatusEnum.StatTerminating;
   
    //first, shutdown the CORBA. it will prevent any corba call that might interrupt
    //our shutdown procedures.
    CorbaManager.shutdown();

    //stop the peer thread
    peerSynch_.stop();
   
    //stop the poller thread
    processPoller_.stop();
   
    //stop all managed processes
    stop_managed_processes();
   
    //clean up
    CorbaManager.cleanup();
   
    //other clean up
    managedProcesses_.clear();
    corbaServers_.clear();
    activePeers_.clear();
   
    state_ = CosProcessStatusEnum.StatStopped;
  }
 
  private boolean parse_arguments(String args[])
  {
    Options options = new Options();

    Option arg = new Option("n", "Process Manager hostname (required)");
    arg.setRequired(true);
    arg.setLongOpt("name");
    arg.setArgs(1);
    arg.setArgName("name");
    options.addOption(arg);
   
    arg = new Option("l", "Log directory (optional. default: current directory)");
    arg.setRequired(false);
    arg.setLongOpt("logdir");
    arg.setArgs(1);
    arg.setArgName("log-dir");
    options.addOption(arg);
   
    arg = new Option("f", "Configuration file (optional. default: scada.properties)");
    arg.setRequired(false);
    arg.setLongOpt("config-file");
    arg.setArgs(1);
    arg.setArgName("file");
    options.addOption(arg);
   
    arg = new Option("cp", "Corba port (optional. default: database config)");
    arg.setRequired(false);
    arg.setLongOpt("corba-port");
    arg.setArgs(1);
    arg.setArgName("port-no");   
    options.addOption(arg);
   
    arg = new Option("pn", "Peer hostname (optional. default: database config)");
    arg.setRequired(false);
    arg.setLongOpt("peer-name");
    arg.setArgs(1);
    arg.setArgName("host-name");   
    options.addOption(arg)
   
    arg = new Option("pp", "Peer port number (optional. default: database config)");
    arg.setRequired(false);
    arg.setLongOpt("peer-port");
    arg.setArgs(1);
    arg.setArgName("port-no");   
    options.addOption(arg);
           
    //parser
        org.apache.commons.cli.Parser parser = new org.apache.commons.cli.GnuParser();
       
        //parse command line arguments
        org.apache.commons.cli.CommandLine cmd = null;
        try
        {
          cmd = parser.parse(options, args);
        }
        catch(org.apache.commons.cli.ParseException pe)
        {
      logger_.error("Can not parse arguments: " + pe.toString());
      HelpFormatter formatter = new HelpFormatter();
      formatter.printHelp( "Command line parameters:", options );
      return false;
        }
       
    //get the value
        //process manager name (normally hostname)
    name_ = cmd.getOptionValue("n");
    //log directory
    logdir_ = cmd.getOptionValue("l");
    if (logdir_ == null || logdir_.length() == 0)
    {
      logdir_ = Utilities.getCurrentDir();
    }
    //config file
    configFile_ = cmd.getOptionValue("f");
    if (configFile_ == null || configFile_.length() == 0)
    {
          configFile_ = DEF_CONFIG_FILE;     
    }
    //corba port
        if (cmd.hasOption("cp"))
        {   
           portNo_ = __parseInt(cmd.getOptionValue("cp"));
        }
        //peer host name
        if (cmd.hasOption("pn"))
        {
          PeerManagerStruct peer = new PeerManagerStruct();
          peer.name = cmd.getOptionValue("pn");
            //peer port number
            if (cmd.hasOption("pp"))
            {
              peer.portNo = __parseInt(cmd.getOptionValue("pp"));
            }
            if (peer.portNo == 0)
            {
              peer.portNo = DEFAULT_MGR_PORT.value;
            }
            peer.heartbeatMillis = DEF_PEER_SYNC_RATE_MSEC;
      //add into the hashmap
      configuredPeers_.put(peer.name, peer);
       }
   
    return true;
  }

  private Properties load_properties(String filename)
  {
    Properties pt2 = new Properties();
   
    //Get from the system properties first
    Properties pt = System.getProperties();
        for(Enumeration<?> enumeration = pt.propertyNames(); enumeration.hasMoreElements();)
        {
            String key = (String) enumeration.nextElement();
            String value = pt.getProperty(key);
            if (key.startsWith("tcg.")
                && value!=null && pt2.getProperty(key)==null)
            {
              pt2.setProperty(key, value);
            }
            //special entry. orb
            if (key.startsWith("org.omg.") && value!=null && pt2.getProperty(key)==null)
            {
              pt2.setProperty(key, value);
            }
        }
   
        if (filename == null || filename.length() == 0)
        {
          return pt2;
        }
       
    //open the property file if given
    pt = new Properties();
    InputStream stream = Utilities.getInputStream(filename);
    if (stream == null)
    {
      logger_.error("Couldn't find " + filename + " in classpath.");
    }
    else
    {
      //load the properties file
      try
      {
        pt.load(stream);
        stream.close();
      }
      catch(IOException ioe)
      {
        logger_.error("Fail to load " + filename + ". Exception: " + ioe.getMessage());
      }
     
      //override the system properties with properties from file
          for(Enumeration<?> enumeration = pt.propertyNames(); enumeration.hasMoreElements();)
          {
              String key = (String) enumeration.nextElement();
              String value = pt.getProperty(key);
              if (value!=null)
              {
                pt2.setProperty(key, value);
              }
          }
    }  //if (stream == null) - else
       
    return pt2;
  }
 
  private boolean load_configuration()
  {
    Connection conn = DatabaseManager.getConnection();
    if (conn == null || !DatabaseManager.isConnected(conn))
    {
      logger_.warn("Can not get connection to database.");
      return false;
    }
   
    //get other configuration detail
    boolean status = false;
    logger_.debug("Getting configuration from database...");
   
    //build the query
    String query = "select POLL_RATE, PORT_NO, MAX_RESTART,"
            + " PEER_NAME1, PEER_PORT1, SYNC_RATE1,"
            + " PEER_NAME2, PEER_PORT2, SYNC_RATE2"
            + " from " + DEF_TABLE_MANAGER
            + " where HOSTNAME='" + name_ + "'";
    logger_.debug("SQL Query: " + query);

    String str = "";
    int errorCode = 0;
    try
    {
      Statement stmt = conn.createStatement();
      ResultSet rs = stmt.executeQuery(query);
      if (!rs.next())
      {
        //the fep name is not found in the database
        logger_.error("Process Manager " + name_ + " is not configured in the database.");
        //use default
        if (portNo_ == 0)
        {
          portNo_ = DEFAULT_MGR_PORT.value;
        }
        pollRateMillis_ = DEF_POLL_RATE_MSEC;
        maxRestart_ = DEF_MAX_RESTART;
      }
      else
      {
        if (portNo_ == 0)
        {
          portNo_ = rs.getInt("PORT_NO");
        }
        pollRateMillis_ = rs.getInt("POLL_RATE");
        maxRestart_ = rs.getInt("MAX_RESTART");
        //verbose
        logger_.trace("SQL Result: Poll Rate    = " + pollRateMillis_);
        logger_.trace("SQL Result: Manager Port = " + portNo_);
        logger_.trace("SQL Result: Max. Restart = " + maxRestart_);
        //first peer, if any
        str = rs.getString("PEER_NAME1");
        if (str != null)
        {
          PeerManagerStruct peer1_ = new PeerManagerStruct();
          peer1_.name = str;
          peer1_.portNo = rs.getInt("PEER_PORT1");
          if (peer1_.portNo == 0)
          {
            peer1_.portNo = DEFAULT_MGR_PORT.value;
          }
          peer1_.heartbeatMillis = rs.getInt("SYNC_RATE1");
          if (peer1_.heartbeatMillis == 0)
          {
            peer1_.heartbeatMillis = DEF_PEER_SYNC_RATE_MSEC;
          }
          //add into the hashmap
          configuredPeers_.put(peer1_.name, peer1_);
        }
        //second peer, if any
        str = rs.getString("PEER_NAME2");
        if (str != null)
        {
          PeerManagerStruct peer2_ = new PeerManagerStruct();
          peer2_.name = str;
          peer2_.portNo = rs.getInt("PEER_PORT2");
          if (peer2_.portNo == 0)
          {
            peer2_.portNo = DEFAULT_MGR_PORT.value;
          }
          peer2_.heartbeatMillis = rs.getInt("SYNC_RATE2");
          if (peer2_.heartbeatMillis == 0)
          {
            peer2_.heartbeatMillis = DEF_PEER_SYNC_RATE_MSEC;
          }
          //add into the hashmap
          configuredPeers_.put(peer2_.name, peer2_);
        }
        //successful
        status = true;
      }
      stmt.close();
    }
    catch (SQLException sqle)
    {
      logger_.error("Can not get Process Manager configuration. Exception: " + sqle.toString());
      errorCode = sqle.getErrorCode();
    }
   
    //check the status
    if (!status)
    {
      DatabaseManager.returnConnection(errorCode);
      return false;
    }
   
    //read the list of managed processes from database
    status = false;
    logger_.debug("Getting list of managed processes from database...");
   
    //build the query
    query = "select a.ENTITY, b.COMMAND_LINE, b.IN_PROCESS, b.ARGUMENTS,"
           + " a.EXTRA_ARGUMENTS, a.CONTROL_MODE, a.WEIGHTAGE, "
          + " b.START_TIMEOUT, b.TERMINATE_TIMEOUT, b.CONTROL_TIMEOUT, b.MONITOR_TIMEOUT, "
          + " a.KILL_KEYWORDS"
            + " from " + DEF_TABLE_PROCESSLIST + " a, " + DEF_TABLE_PROCESSCONFIG + " b"
            + " where a.HOSTNAME='" + name_ + "' and a.ENABLED='Y' and a.PROCESS_NAME=b.PROCESS_NAME";
    logger_.trace("SQL Query: " + query);

    errorCode = 0;
    try
    {
      Statement stmt = conn.createStatement();
      ResultSet rs = stmt.executeQuery(query);
      while(rs.next())
      {
        ManagedProcessStruct process = new ManagedProcessStruct();
        process.entity = rs.getString("ENTITY");
        //configuration data
        process.config.entity = process.entity;
        process.config.commandLine = rs.getString("COMMAND_LINE");
        str = rs.getString("IN_PROCESS");
        if (str == null || !str.equalsIgnoreCase("Y"))
        {
          process.inProcess = false;
        }
        else
        {
          process.inProcess = true;
        }
        process.config.arguments = rs.getString("ARGUMENTS");
        if (process.config.arguments == null)
        {
          process.config.arguments = "";
        }
        str = rs.getString("EXTRA_ARGUMENTS");
        if (str == null)
        {
          str = "";
        }
        else if (str.length() > 0)
        {
          process.config.arguments += " " + str;
        }
        //default operation mode
        str = rs.getString("CONTROL_MODE");
        if (str == null)
        {
          process.config.operationMode = CosOperationModeEnum.OperNotApplicable;
        }
        else if (str.compareToIgnoreCase("Y") == 0)
        {
          process.config.operationMode = CosOperationModeEnum.OperControl;
        }
        else if (str.compareToIgnoreCase("N") == 0)
        {
          process.config.operationMode = CosOperationModeEnum.OperMonitor;
        }
        process.config.weightage = rs.getShort("WEIGHTAGE");
        process.config.startTimeout = rs.getInt("START_TIMEOUT");
        process.config.terminateTimeout = rs.getInt("TERMINATE_TIMEOUT");
        process.config.controlTimeout = rs.getInt("CONTROL_TIMEOUT");
        process.config.monitorTimeout = rs.getInt("MONITOR_TIMEOUT");
        //runtime info
        process.runtime.entity = process.config.entity;
        process.runtime.processId = 0;
        process.runtime.processType = CosProcessTypeEnum.ProcThread;
        process.runtime.state = CosProcessStatusEnum.StatUnstarted;
        process.runtime.requestedState = CosProcessStatusEnum.StatStarted;
        process.runtime.terminationCode = CosTerminationCodeEnum.TermNotKnown;
        process.runtime.startDateTime = 0;
        process.runtime.lastRequestDateTime = 0;
        process.runtime.nStarts = 0;
        process.runtime.weightage = process.config.weightage;
        process.runtime.logLevel = CosLogLevelEnum.LogNormal;
        process.runtime.startTimeout = process.config.startTimeout;
        process.runtime.terminateTimeout = process.config.terminateTimeout;
        process.runtime.controlTimeout = process.config.controlTimeout;
        process.runtime.monitorTimeout = process.config.monitorTimeout;
        //add into the list
        managedProcesses_.put(process.entity, process);
      //while(rs.next())
      //successful, even if we somehow fail to insert the structure into the list
      status = true;
      //close query
      stmt.close();
    }
    catch (SQLException sqle)
    {
      logger_.error("Can not get Managed Process list. Exception: " + sqle.toString() );
      errorCode = sqle.getErrorCode();
    }
   
    if (!status)
    {
      DatabaseManager.returnConnection(errorCode);
      return false;
    }

    //return the connection
    DatabaseManager.returnConnection(errorCode);
   
    return true;
  }
 
  private boolean poll_process(ICosMonitoredThread thread)
  {
    if (!CorbaManager.isValidReference(thread))
    {
      logger_.error("Invalid reference to monitored thread.");
      return false;
    }

    //poll the process
    for (int i=0; i<CORBA_ERROR_THRESHOLD; i++)
    {
      try
      {
        thread.cosPoll();
        return true;
      }
      catch (Exception ex)
      {
        logger_.error("(" + i + ") Can not poll monitored thread! Exception: "
                + ex.toString());
      }
    }
    return false;
  }
 
  private boolean poll_manager(ICosProcessManager manager)
  {
    if (!CorbaManager.isValidReference(manager))
    {
      logger_.error("Invalid reference to process manager.");
      return false;
    }

    //poll the process
    for (int i=0; i<CORBA_ERROR_THRESHOLD; i++)
    {
      try
      {
        manager.cosPoll();
        return true;
      }
      catch (Exception ex)
      {
        logger_.error("(" + i + ") Can not poll process manager! Exception: "
                + ex.toString());
      }
    }
    return false;
  }
 
  private void start_process(ManagedProcessStruct process)
  {
    logger_.info("Starting process " + process.entity + "...");
   
    //if it is an adhoc process we will not know how to start it
    if (process.adhoc)
    {
      logger_.warn("Adhoc process. I do not know how to start it!");
      return;
    }
   
    //build log file
    String logfile = "";
    if (logdir_.length() > 0)
    {
      logfile = logdir_ + File.separator + process.entity + ".log";
    }
    else
    {
      logfile = process.entity + ".log";
    }
   
    //exec the command
    if (process.inProcess)
    {
      String cmd = "--entity " + process.entity + " "
            + "--mgr-port " + portNo_ + " "
            + "--logfile " + logfile + " "
            + "--inprocess "
            + process.config.arguments;
     
      //verbose
      logger_.info("Execute: " + cmd);

      //run in the same JVM
      try
      {
        Class<?> cls = Class.forName(process.config.commandLine);
        Object obj = cls.newInstance();
        Method method = cls.getMethod("execute", new Class[] { String.class });
        method.invoke(obj, new Object[] { cmd } );
        //store the object instance
        process.instance = obj;
      }
      catch(Exception ex)
      {
        logger_.warn("Fail to execute command. Exception: " + ex.toString());
      }
    }
    else
    {
      String cmd = process.config.commandLine + " "
            + "--entity " + process.entity + " "
            + "--mgr-port " + portNo_ + " "
            + "--logfile " + logfile + " "
            + process.config.arguments;
            //+ "&";
     
      //start the execution on its own thread.
      ProcessExecution exec = new ProcessExecution(cmd);
      exec.start();

      //store the execution context
      process.instance = exec;
     
//      //NOTE: maybe it is cleaner to just use a native call to fork out a process.
//      //      that way we do not have to worry about cleaning up stderr and stdout.
//      try
//      {
//        Process p = Runtime.getRuntime ().exec (cmd);
//       
//        //wait until it finishes. this way it is cleaner because then we do not need to run a
//        //separate thread for cleaning up the stdout and stderr.
//        //but this way also forces the command to be running as background.
//        //otherwise it will block forever!!!
//        try
//        {
//          p.waitFor();
//        }
//        catch(InterruptedException ie)
//        {
//          //ignore
//        }
//       
//        //clean up any output in stderr
//        BufferedReader   buffer = new BufferedReader (new InputStreamReader (p.getErrorStream ()));
//        String       line = "";
//        while ((line = buffer.readLine ()) != null)
//        {
//          System.err.println ("[Stderr] " + line);
//        }
//        buffer.close();
//       
//        //clean up any output in stdout
//        buffer = new BufferedReader (new InputStreamReader (p.getInputStream()));
//        line = "";
//        while ((line = buffer.readLine ()) != null)
//        {
//          System.out.println ("[Stdout] " + line);
//        }
//        buffer.close();
//      }
//      catch(IOException ioe)
//      {
//        logger_.warn("Fail to execute command. Exception: " + ioe.toString());
//      }
    }
   
    //update runtime infos
    process.runtime.state = CosProcessStatusEnum.StatStartup;
    process.runtime.startDateTime = Utilities.getTimeInSecs();
    process.runtime.requestedState = CosProcessStatusEnum.StatStarted;
    process.runtime.lastRequestDateTime = process.runtime.startDateTime;
    process.runtime.nStarts += 1;

    return;
  }
 
  private void restart_process(ManagedProcessStruct process)
  {
    logger_.info("Restarting process " + process.entity + "...");

    //kill the process
    if (process.runtime.processId > 0)
    {
      kill_process(process);
    }
    //if the number of restart has exceeded max restart, disabled the process
    //NOTE: this should not prevent the manual start of this process from process monitor
    //      via the cosStartProcess() call.
    if (process.runtime.nStarts > maxRestart_)
    {
      logger_.warn("Process has exceeded max restart number. It will NOT be restarted!");
      process.runtime.state = CosProcessStatusEnum.StatUnstarted;
      process.runtime.requestedState = CosProcessStatusEnum.StatUnstarted;
      return;
    }
    //if it is an adhoc process, no need to restart it. we won't know how to anyway
    if (process.adhoc)
    {
      logger_.warn("Adhoc process. It will NOT be restarted!");
      return;
    }
    //start the process
    start_process(process);
  }
 
  private void kill_process(ManagedProcessStruct process)
  {
    logger_.info("Killing process " + process.entity + "...");
   
    //kill the process
    if (process.inProcess)
    {
      if (process.instance == null)
      {
        logger_.error("Invalid process instance for " + process.entity + ". Can not kill.");
      }
     
      //run in the same JVM
      try
      {
        Class<?> cls = Class.forName(process.config.commandLine);
        Method method = cls.getMethod("stop", new Class[] {  });
        method.invoke(process.instance, new Object[] {  } );
      }
      catch(Exception ex)
      {
        logger_.warn("Fail to kill process. " + process.entity
            + "Exception: " + ex.toString());
      }
    }
    else
    {
      if (process.runtime.processId <= 0)
      {
        logger_.error("Invalid process id for " + process.entity + ". Can not kill.");
      }

      //native call
      if (0 != Utilities.killProcess(process.runtime.processId))
      {
        logger_.error("Fail to kill the process " + process.entity
            + ". Process id: " + process.runtime.processId);
        try
        {
          ProcessExecution exec = (ProcessExecution) process.instance;
          if (exec != null)
          {
            exec.stop(100);
          }
        }
        catch (Exception ex)
        {
          //ignore
        }
      }
     
    }

    //update runtime infos
    process.reference = null;
    process.instance = null;
    process.runtime.processId = 0;
    process.runtime.state = CosProcessStatusEnum.StatStopped;
    process.runtime.terminationCode = CosTerminationCodeEnum.TermKilled;
    process.runtime.requestedState = CosProcessStatusEnum.StatStopped;
    process.runtime.lastRequestDateTime = Utilities.getTimeInSecs();

    return;
  }
   
  private void terminate_process(ManagedProcessStruct process)
  {
    if (!CorbaManager.isValidReference(process.reference))
    {
      logger_.error("Invalid reference to managed process.");
      return;
    }
 
    //ask process to terminate
    //if we failed to ask the process to terminate, as long as we update the runtime infos
    //properly, then the process will be killed on the next polling cycle.
    //because of this, we do not need to attempt to send the command several times!
    try
    {
      process.reference.cosTerminate();
      return;
    }
    catch (Exception ex)
    {
      logger_.error("Can not ask process to terminate! Exception: "
              + ex.toString());
      logger_.error("The process will be killed in the next polling cycle.");
    }
   
    //update runtime info
    process.runtime.state = CosProcessStatusEnum.StatTerminating;
    process.runtime.requestedState = CosProcessStatusEnum.StatStopped;
    process.runtime.lastRequestDateTime = Utilities.getTimeInSecs();
  }
 
  private CosProcessStatusEnum get_process_status(ICosManagedProcess process)
  {
    //return null if we fail to get the status from managed process
    //otherwise, return the actual status of the managed process
    if (!CorbaManager.isValidReference(process))
    {
      logger_.error("Invalid reference to managed process.");
      return null;
    }

    CosProcessStatusEnum status = null;
   
    //send the operation mode
    for (int i=0; i<CORBA_ERROR_THRESHOLD; i++)
    {
      try
      {
        status = process.cosGetStatus();
        break;
      }
      catch (Exception ex)
      {
        logger_.error("(" + i + ") Can not get process' runtime status! Exception: "
                + ex.toString());
      }
    }
    return status;
  }
 
  private boolean set_operation_mode(ICosManagedProcess process, CosOperationModeEnum mode)
  {
    if (!CorbaManager.isValidReference(process))
    {
      logger_.error("Invalid reference to managed process.");
      return false;
    }

    //send the operation mode
    for (int i=0; i<CORBA_ERROR_THRESHOLD; i++)
    {
      try
      {
        process.cosSetOperationMode(mode);
        return true;
      }
      catch (Exception ex)
      {
        logger_.error("(" + i + ") Can not set process' operation mode! Exception: "
                + ex.toString());
      }
    }
    return false;
  }

  private boolean set_runtime_params(ICosManagedProcess process, CosRunParamStruct[] params)
  {
    if (!CorbaManager.isValidReference(process))
    {
      logger_.error("Invalid reference to managed process.");
      return false;
    }

    //send the runtime params
    for (int i=0; i<CORBA_ERROR_THRESHOLD; i++)
    {
      try
      {
        process.cosSetParams(params);
        return true;
      }
      catch (Exception ex)
      {
        logger_.error("(" + i + ") Can not set process' runtime parameters! Exception: "
                + ex.toString());
      }
    }
    return false;
  }

  private boolean register_with_peer(ICosProcessManager peer)
  {
    if (!CorbaManager.isValidReference(peer))
    {
      logger_.error("Invalid reference to peer manager.");
      return false;
    }

    //register with peer
    for (int i=0; i<CORBA_ERROR_THRESHOLD; i++)
    {
      try
      {
        peer.cosRegisterPeer(name_, this._this());
        return true;
      }
      catch (Exception ex)
      {
        logger_.error("(" + i + ") Can not register with peer! Exception: "
                + ex.toString());
      }
    }
    return false;
  }
 
  private boolean synchronize_managed_process(ICosProcessManager peer,
        ManagedProcessStruct process)
  {
    if (!CorbaManager.isValidReference(peer))
    {
      logger_.error("Invalid reference to peer manager.");
      return false;
    }
   
    //synchronize the process operation mode to peer process manager
    for (int i=0; i<CORBA_ERROR_THRESHOLD; i++)
    {
      try
      {
        peer.cosSynchronizeManagedProcess(name_, process.entity, process.runtime.weightage,
                          process.runtime.state);
        return true;
      }
      catch (Exception ex)
      {
        logger_.error("(" + i + ") Can not synchronize with peer manager! Exception: "
                + ex.toString());
      }
    }
    return false;
  }

  private void synchronize_managed_process(ManagedProcessStruct process)
  {
    Iterator<PeerManagerStruct> it = activePeers_.values().iterator();
    while(it.hasNext())
    {
      PeerManagerStruct peer = it.next();
     
      //make sure the reference is valied
      if (peer == null || peer.reference == null)
      {
        continue;
      }
     
      logger_.info("Synchronizing managed process " + process.entity + " with peer " + peer.name);
     
      //synchronize the process operation mode to peer process manager
      if (!synchronize_managed_process(peer.reference, process))
      {
        logger_.warn("Fail to synchronize managed process " + process.entity + " with peer " + peer.name);
        peer.errorCounter++;
      }
      else
      {
        peer.errorCounter = 0;
      }       
    } 
  }

//  private boolean synchronize_corba_server(ICosProcessManager peer,
//                        CorbaServerStruct process)
//  {
//    if (!CorbaManager.isValidReference(peer))
//    {
//      logger_.error("Invalid reference to peer manager.");
//      return false;
//    }
//   
//    //synchronize the process operation mode to peer process manager
//    for (int i=0; i<CORBA_ERROR_THRESHOLD; i++)
//    {
//      try
//      {
//        peer.cosSynchronizeCorbaServer(name_, process.key, process.operationMode);
//        return true;
//      }
//      catch (Exception ex)
//      {
//        logger_.error("(" + i + ") Can not synchronize with peer manager! Exception: "
//                + ex.toString());
//      }
//    }
//    return false;
//  }
 
  private void synchronize_corba_server(CorbaServerStruct server)
  {
    Iterator<PeerManagerStruct> it = activePeers_.values().iterator();
    boolean            status = false;
    while(it.hasNext())
    {
      PeerManagerStruct peer = it.next();
     
      //make sure the reference is valied
      if (peer == null || peer.reference == null)
      {
        continue;
      }
     
      logger_.info("Synchronizing corba server " + server.key + " with peer " + peer.name);
     
      //synchronize the process operation mode to peer process manager
      status = false;
      for (int i=0; i<CORBA_ERROR_THRESHOLD && !status; i++)
      {
        try
        {
          peer.reference.cosSynchronizeCorbaServer(name_, server.key, server.operationMode);
          status = true;
          break;
        }
        catch (Exception ex)
        {
          logger_.error("(" + i + ") Can not synchronize corba server with peer manager!"
                  + " Exception: " + ex.toString());
        }
      }
     
      //update error counter
      if (!status)
      {
        logger_.info("Fail to synchronize corba server " + server.key + " with peer " + peer.name);
        peer.errorCounter++;
      }
      else
      {
        peer.errorCounter = 0;
      }       
    } 
  }

  private ICosManagedProcess get_active_managed_process(ManagedProcessStruct process)
  {
    //try to use local reference if the local corba server is in CONTROL
    ICosManagedProcess processRef = null;
    if (process.runtime.state == CosProcessStatusEnum.StatRunningControl
        && process.reference != null)
    {
      processRef = process.reference;
      //validate it
      try
      {
        processRef.cosPollControl();
      }
      catch (Exception ex)
      {
        //ignore. just reset the reference
        logger_.trace("Can not poll control local managed process.");
        processRef = null;
      }
    }
   
    //if the local process is not suitable, try to get from peers. always use the first one found
    if (processRef == null)
    {   
      Iterator<PeerManagerStruct> it = activePeers_.values().iterator();
      while(it.hasNext() && processRef == null)
      {
        //precautions
        if (it.next().reference == null)
          continue;
        //get from peer and validate it
        try
        {
          processRef = it.next().reference.cosGetManagedProcess2(process.entity);
          if (processRef != null)
          {
            processRef.cosPollControl();
          }
          it.next().errorCounter = 0;
        }
        catch(CosUnknownProcessException ue)
        {
          processRef = null;
          it.next().errorCounter = 0;
        }
        catch(Exception ex)
        {
          //ignore. just reset the reference for precaution
          processRef = null;
          it.next().errorCounter++;
        }         
      } 
    }
   
    return processRef;
  }
 
  private ICosMonitoredThread get_active_corba_server(CorbaServerStruct server)
  {
    //try to use local reference if the local corba server is in CONTROL
    ICosMonitoredThread serverRef = null;
    if (server.operationMode == CosOperationModeEnum.OperControl
        && server.reference != null)
    {
      serverRef = server.reference;
      //validate it
      try
      {
        serverRef.cosPollControl();
      }
      catch (Exception ex)
      {
        //ignore. just reset the reference
        logger_.trace("Can not poll control local corba server.");
        serverRef = null;
      }
    }
   
    //if the local server is not suitable, try to get from peers. always use the first one found
    if (serverRef == null)
    {   
      Iterator<PeerManagerStruct> it = activePeers_.values().iterator();
      while(it.hasNext() && serverRef == null)
      {
        //precautions
        if (it.next().reference == null)
          continue;
        //get from peer and validate it
        try
        {
          serverRef = it.next().reference.cosGetCorbaServer(server.key);
          if (serverRef != null)
          {
            serverRef.cosPollControl();
          }
          it.next().errorCounter = 0;
        }
        catch(CosUnknownProcessException ue)
        {
          serverRef = null;
          it.next().errorCounter = 0;
        }
        catch(Exception ex)
        {
          //ignore. just reset the reference for precaution
          serverRef = null;
          it.next().errorCounter++;
        }         
      } 
    }
   
    return serverRef;
  }
 
  private void on_managed_process_registration(ManagedProcessStruct process)
  {
    //create a new thread to do the delayed operation
    OnProcessRegistrationThread runnable = new OnProcessRegistrationThread(process);
    Thread  thread = new Thread(runnable);
   
    //run the thread
    thread.start();
  }
  
  private void kill_zombie_process(ManagedProcessStruct process)
  {
    logger_.info("Killing zombie process for process " + process.entity + "...");
   
    //adhoc process, I won't know how to kill the zombies
    if (process.adhoc)
    {
      logger_.warn("Adhoc process. I do not know how to kill the zombies!");
      return;
    }
   
    //not started?
    if (process.runtime.processId == 0 || process.reference == null)
    {
      return;
    }

    //build keywords to kill
    if (process.config.killKeywords.length() == 0)
    {
      process.config.killKeywords = process.config.commandLine + " "
                      + "--entity " + process.entity +" "
                      + "--mgr-port " + portNo_;
    }

    //build the command
    String cmd = "zombie_killer " + process.runtime.processId + " "
            + "\"" + process.config.killKeywords + "\" &";

    //verbose
    logger_.info("Execute: " + cmd);
   
    //exec the command
    //NOTE: maybe it is cleaner to just use a native call to fork out a process.
    //      that way we do not have to worry about cleaning up stderr and stdout.
    try
    {
      Process p = Runtime.getRuntime ().exec (cmd);
     
      //wait until it finishes. this way it is cleaner because then we do not to run a
      //separate thread for cleaning up the stdout and stderr.
      //but this way also forces the command to be running as background.
      //otherwise it will block forever!!!
      try
      {
        p.waitFor();
      }
      catch(InterruptedException ie)
      {
        //ignore
      }
     
      //clean up any output in stderr
      BufferedReader   buffer = new BufferedReader (new InputStreamReader (p.getErrorStream ()));
      String       line = "";
      while ((line = buffer.readLine ()) != null)
      {
        System.err.println ("[Stderr] " + line);
      }
      buffer.close();
     
      //clean up any output in stdout
      buffer = new BufferedReader (new InputStreamReader (p.getInputStream()));
      line = "";
      while ((line = buffer.readLine ()) != null)
      {
        System.out.println ("[Stdout] " + line);
      }
      buffer.close();
    }
    catch(IOException ioe)
    {
      logger_.warn("Fail to execute command. Exception: " + ioe.toString());
    }
  }
 
  private void start_managed_processes()
  {
    //no need to synchronize with peer. the startup procedure will do it for us

    logger_.info("Reconnecting with running managed processes (if any)...");
   
    //prepare params for zombie processes
    CosRunParamStruct[] params = new CosRunParamStruct[1];
   
    //pass in the process manager port
    params[0] = new CosRunParamStruct();
    params[0].name = MANAGER_PORT_KEY.value;
    params[0].value = Integer.toString(portNo_);
   
    ManagedProcessStruct   process = null;
    String          iorString = "";
    org.omg.CORBA.Object  obj = null;
    ICosManagedProcess    processRef = null;
    long          processId = 0;
    CosOperationModeEnum  operMode = null;
   
    //try connecting to existing/zombie processes
    Iterator<ManagedProcessStruct> itProcess = managedProcesses_.values().iterator();
    while (itProcess.hasNext())
    {
      process = itProcess.next();
     
      //get the ior string
      iorString = CorbaManager.readIor(process.entity);
      if (iorString.length() == 0)
      {
        continue;
      }
   
      //build the managed process reference
      obj = CorbaManager.stringToObject(iorString);
      if (obj != null)
      {
        processRef = ICosManagedProcessHelper.narrow(obj);
        if (processRef == null)
        {
          logger_.warn("Invalid IOR to process " + process.entity + ". IOR String: "
                  + iorString);
          continue;
        }
      }  //if (obj != null)
     
      //critical section
      synchronized(managedProcesses_)
      {
        //reconnect to the zombie process
        processId = 0;
        for (int i=0; i<CORBA_ERROR_THRESHOLD; i++)
        {
          try
          {
            //notify process about our new port
            processRef.cosSetParams(params);
            //get the process id
            processId = processRef.cosGetProcessId();
            //get operation mode
            operMode = processRef.cosGetOperationMode();
            //notify managed process of a restart
            //ideally, the managed process should then re-register to the process manager
            //TODO
            break;
          }
          catch (Exception ex)
          {
            logger_.warn("Cannot connect to managed process: " + process.entity);
            //TODO: maybe we should for awhile here to accommodate in case the machine is too loaded.
            continue;
          }
        }  //for (int i=0; i<CORBA_ERROR_THRESHOLD; i++)
       
        if (processId == 0)
        {
          continue;
        }
       
        //update the runtime value
        process.runtime.processId = processId;
        process.runtime.processType = CosProcessTypeEnum.ProcThread;
        process.runtime.state = CosProcessStatusEnum.StatStartup;
        process.runtime.startDateTime = Utilities.getTimeInSecs();
        process.runtime.requestedState = CosProcessStatusEnum.StatStarted;
        process.runtime.lastRequestDateTime = process.runtime.startDateTime;
        //store the reference. not necessary to since it will eventually re-register. hopefully.
        //but can be used to terminate the process
        process.reference = processRef;
        //initialize the active reference
        if (operMode == CosOperationModeEnum.OperControl)
        {
          process.activeReference = processRef;
        }
      }  //end of critical section
     
    //while (itProcess.hasNext())
  }
 
  private void connect_to_peer_managers()
  {
    //try to connect to every configured peers
    Iterator<PeerManagerStruct> itPeer = configuredPeers_.values().iterator();
    while (itPeer.hasNext())
    {
      PeerManagerStruct peer = itPeer.next();

      //critical section
      synchronized(activePeers_)
      {
        logger_.info("Establishing peering with peer manager " + peer.name);
       
        //add context
        NDC.push(peer.name);
       
        //always make sure that by default every peer is not-active
        peer.isActive = false;
       
        //build the peer reference
        peer.ior = "corbaloc::" + peer.name + ":" + peer.portNo + "/ProcessManager";
        org.omg.CORBA.Object obj = CorbaManager.stringToObject(peer.ior);
        try
        {
          peer.reference = ICosProcessManagerHelper.narrow(obj);
        }
        catch(Exception ex)
        {
          logger_.warn("Can not connect to peer. Corbaloc: " + peer.ior);
          peer.reference = null;
        }
       
        //make sure we have valid reference
        if (!CorbaManager.isValidReference(peer.reference))
        {
          continue;
        }
       
        //poll the peer
        if (poll_manager(peer.reference))
        {
          logger_.info("Can not poll peer. Considered it as not-active");
          continue;
        }
       
        //if we successfully poll a peer, that means the peer is back alive.
        //we should register with the peer and re-establish the peering
        if (!register_with_peer(peer.reference))
        {
          logger_.warn("Can not register with peer. Considered it as not-active");
          continue;
        }
       
        //reset the error counter so that we can start using it
        peer.errorCounter = 0;
       
        //synchronized every managed process
        Iterator<ManagedProcessStruct> itProcess = managedProcesses_.values().iterator();
        while (itProcess.hasNext())
        {
          ManagedProcessStruct process = itProcess.next();
          if (process == null || process.runtime.processId == 0)
          {
            continue;
          }
          if (!synchronize_managed_process(peer.reference, process))
          {
            logger_.warn("Fail to synchronize process " + process.entity);
            peer.errorCounter++;
          }
        }
       
        //check if error threshold has been reached
        if (peer.errorCounter >= DEF_ERROR_THRESHOLD)
        {
          logger_.error("Error threshold has been reached. Fail to establish peering." );
          continue;
        }
        else if (peer.errorCounter > 0)
        {
          logger_.warn("Fail to synchronize some processes! But since error threshold is not reached, "
                  + "we consider it to be acceptable.");
          logger_.warn("I sincerely hope that the best-case scenarios will always prevail. :)");
        }
       
        //if we get here. that means everything's well. move the peer to active peer list
        logger_.info("Everything is OK (or nearly OK). Peering is established.");
        peer.isActive = true;
        activePeers_.put(peer.name, peer);
       
        NDC.pop();
      }   //end of critical section           
    //for each configured peer
  }
 
  private void stop_managed_processes()
  {
    //no need to synchronize with peer. the startup procedure will do it for us

    logger_.info("Stopping running managed processes (if any)...");
   
    //kill all running processes
    Iterator<ManagedProcessStruct> itProcess = managedProcesses_.values().iterator();
    while (itProcess.hasNext())
    {
      ManagedProcessStruct process = itProcess.next();
      if (process == null)
      {
        continue;
      }
      //kill the process
      kill_process(process);
    //while (itProcess.hasNext())
  }
 
  private CosOperationModeEnum get_expected_operation_mode(ManagedProcessStruct process)
  {
    //if we do not have peers, always try to go to CONTROL
    if (activePeers_.size() == 0)
    {
      logger_.debug("No peers. Always try to go to CONTROL.");
      return CosOperationModeEnum.OperControl;
    }
   
    //our preference is to go to CONTROL
    boolean   canGoToControl = true;
   
    CosProcessRuntimeDataStruct    peerProcess = null;
    CosOperationModeEnum      peerOperMode = null;
   
    //otherwise, check with all peers
    Iterator<PeerManagerStruct> it = activePeers_.values().iterator();
    while(it.hasNext() && canGoToControl)
    {
      //precautions
      if (it.next().reference == null)
        continue;
     
      //get from peer and validate it
      try
      {
        peerProcess = it.next().reference.cosGetProcessInfo2(process.entity);
        it.next().errorCounter = 0;
      }
      catch(CosUnknownProcessException ue)
      {
        peerProcess = null;
        it.next().errorCounter = 0;
        continue;
      }
      catch(Exception ex)
      {
        //ignore. just reset the reference for precaution
        peerProcess = null;
        it.next().errorCounter++;
        continue;
      }     
     
      //check peer operation mode
      peerOperMode = translate_process_status(peerProcess.state);
     
      //compare with our weightage
      if (peerOperMode == CosOperationModeEnum.OperControl &&
          (peerProcess.weightage >= process.runtime.weightage))
      {
        //one of the peer has higher weightage. do not go to control
        canGoToControl = false;
        break;
      }
    } 

    //can we go to control?
    if (canGoToControl)
    {
      logger_.debug("Has peers. Can go to CONTROL.");
      return CosOperationModeEnum.OperControl;
    }
    else
    {
      logger_.debug("Has peers. DO NOT go to CONTROL.");
      return CosOperationModeEnum.OperMonitor;
    }
  }
 
  private CosRunParamStruct[] get_process_runtime_params(ManagedProcessStruct process)
  {
    ArrayList<CosRunParamStruct>  params = new ArrayList<CosRunParamStruct>();
    CosRunParamStruct         param = null;
   
    //pass in anything in the runtime properties.
        for(Enumeration<?> enumeration = runtimeProps_.propertyNames();
            enumeration.hasMoreElements();)
        {
            param = new CosRunParamStruct();
            param.name = (String) enumeration.nextElement();
            param.value = runtimeProps_.getProperty(param.name);
            //add into the list
            params.add(param);
        }
       
    //log file. this should have been passed in as command line argument!
    String logfile = "";
    if (logdir_.length() > 0)
    {
      logfile = logdir_ + "/" + process.entity + ".log";
    } else {
      logfile = process.entity + ".log";
    }
        param = new CosRunParamStruct();
        param.name = LOG_FILE_KEY.value;
        param.value = logfile;
        //add into the list
        params.add(param);
   
    //log level. should use cosSetLogLevel() directly
        param = new CosRunParamStruct();
        param.name = LOG_LEVEL_KEY.value;
        param.value = CorbaHelper.LogLevelToString(process.runtime.logLevel);
        //add into the list
        params.add(param);

        CosRunParamStruct[] retval = new CosRunParamStruct[params.size()];
        return (CosRunParamStruct[]) params.toArray(retval);
  }
 
  private CosOperationModeEnum translate_process_status(CosProcessStatusEnum status)
  {
    switch(status.value())
    {
      case CosProcessStatusEnum._StatGoingToControl:
      case CosProcessStatusEnum._StatRunningControl:
        return CosOperationModeEnum.OperControl;
      case CosProcessStatusEnum._StatGoingToMonitor:
      case CosProcessStatusEnum._StatRunningMonitor:
        return CosOperationModeEnum.OperMonitor;
      default:
        return CosOperationModeEnum.OperNotApplicable;
    }
  }
 
  private boolean is_running_status(CosProcessStatusEnum status)
  {
    switch(status.value())
    {
      case CosProcessStatusEnum._StatGoingToControl:
      case CosProcessStatusEnum._StatRunningControl:
      case CosProcessStatusEnum._StatGoingToMonitor:
      case CosProcessStatusEnum._StatRunningMonitor:
        return true;
      default:
        return false;
    }
  }
 
//  public String object_to_string(org.omg.CORBA.Object obj)
//  {
//    return CorbaManager.objectToString(obj);
//  }
// 
//  public org.omg.CORBA.Object string_to_object(String ior)
//  {
//    return CorbaManager.stringToObject(ior);
//   }
// 
//  public boolean is_valid(org.omg.CORBA.Object obj)
//  {
//    return CorbaManager.isValidReference(obj);
//  }
 
  protected int __parseInt(String text)
  {
    int retval = 0;
      try {
        retval = Integer.parseInt(text);
      } catch (NumberFormatException ne) {
        logger_.trace("Exception: " + ne.getMessage());
     
      return retval;
  }
 
  /** -------------------------------------------------------------------------------- *
    *  WORKER THREADS                                                                  *
    * -------------------------------------------------------------------------------- **/
 
  class ProcessPollerThread implements Runnable
 
    private boolean keepRunning_ = false;
    private Thread thread_ = null;

    public void start()
    {
      //avoid running more than once
      if (thread_ != null && keepRunning_ && thread_.isAlive())
      {
        return;
      }
      //create a new tcp server
      thread_ = new Thread(this);
      //finally, start the tcpserver thread
      thread_.start();
     
    }
   
    public void stop()
    {
      if (thread_ == null)
      {
        return;
      }
      //stop the polling thread
      keepRunning_ = false;
      try
      {
        thread_.interrupt();
        thread_.join(DEF_THREAD_WAIT_MSEC);
      }
      catch (InterruptedException ie)
      {
        //ignore
      }
      thread_ = null;
    }
   
    public void run()
    {
      //must be initialized first and all process must be started
      if (state_ != CosProcessStatusEnum.StatGoingToControl
          && state_ != CosProcessStatusEnum.StatRunningControl)
      {
        return;
      }
     
      //time value. used to calculate how long it takes to poll all processes
      long              polltime, difftime;

      //ProcessMap_t::iterator       iterMap;

      //long               timeout;
      long              elapsetime;

      int                nActiveProcesses;
      ArrayList<String>        toDeleteList = new ArrayList<String>();
      Iterator<ManagedProcessStruct>   itProcess = null;
      Iterator<CorbaServerStruct>    itServer = null;
     
      int                counter = 0;
      //CosOperationModeEnum       operMode;
      CosProcessStatusEnum      processStatus;

      logger_.debug("Going into main loop...");
      NDC.push("ProcessPoller");
     
      keepRunning_ = true;
      while(keepRunning_)
      {                //infinite loop
        //start time
        polltime = Utilities.getTimeInSecs();

        //verbose. print out number of datapoint servers
        logger_.debug("managedProcesses.size(): " + managedProcesses_.size());

        //checking every process in the runtime process map
        itProcess = managedProcesses_.values().iterator();
       
        //clear list of tobe deleted process. just in case
        toDeleteList.clear();
       
        nActiveProcesses = 0;
        while (itProcess.hasNext())
        {
          //if process manager is terminating, stop polling the processes
          if (state_ == CosProcessStatusEnum.StatTerminating
              || state_ == CosProcessStatusEnum.StatStopped)
          {
            break;
          }

          //if loop is terminated, break out
          if (!keepRunning_break;

          //critical section. mostly to keep everything consistent
          ManagedProcessStruct process = itProcess.next();
          synchronized(managedProcesses_)
          {
            logger_.trace("Checking managed process '" + process.entity + "\". "
                        + "State: " + CorbaHelper.ProcessStateToString(process.runtime.state));
 
            //add logger config
            NDC.push(process.entity);
 
            switch (process.runtime.state.value())
            {
            //not started yet
            case CosProcessStatusEnum._StatNotRunning:
              //ignore this process. process is disabled
              break;
            case CosProcessStatusEnum._StatUnstarted:
              //if we are terminating, don't start any new process
              if (state_ == CosProcessStatusEnum.StatTerminating
                  || state_ == CosProcessStatusEnum.StatStopped)
              {
                break;
              }
              //normal case: see if we are supposed to start it
              if (!process.adhoc &&
                  process.config.operationMode != CosOperationModeEnum.OperNotApplicable)
              {
                //start the proces
                start_process(process);
              }
 
              break;
            //started but not register yet
            case CosProcessStatusEnum._StatStartup:
              //consider it as active process
              nActiveProcesses++;
 
              //check if timeout has elapsed. if it has, restart the process
              elapsetime = Utilities.getTimeInSecs() - process.runtime.lastRequestDateTime;
              if (elapsetime > process.runtime.startTimeout)
              {
                logger_.warn("Timeout in startup (" + elapsetime + " sec). It will be restarted!");
                //restart the process if necessary
                restart_process(process);
              }
 
              break;
            //have registered
            case CosProcessStatusEnum._StatStarted:
              //consider it as active process
              nActiveProcesses++;
 
              //check if I still have the reference to the process
              if (process.reference == null || !poll_process(process.reference))
              {
                //I have lost it, restart it
                restart_process(process);
                break;
              }
 
              //set run time params
              //it will be handled by the onManagedProcessRegistration()
             
              break;
            //is in transition to control state
            case CosProcessStatusEnum._StatGoingToControl:
              //consider it as active process
              nActiveProcesses++;
 
              //check if I still have the reference to the process
              if (process.reference == null || !poll_process(process.reference))
              {
                //I have lost it, restart it
                restart_process(process);
                break;
              }
 
              //check if timeout has elapsed. if it has, restart the process
              elapsetime = Utilities.getTimeInSecs() - process.runtime.lastRequestDateTime;
              if (elapsetime > process.runtime.controlTimeout)
              {
                logger_.warn("Timeout in going to control (" + elapsetime + " sec). It will be restarted.");
                //restart the process if necessary
                restart_process(process);
              }
              break;
            //is in transition to monitor state
            case CosProcessStatusEnum._StatGoingToMonitor:
              //consider it as active process
              nActiveProcesses++;
 
              //check if I still have the reference to the process
              if (process.reference == null || !poll_process(process.reference))
              {
                //I have lost it, restart it
                restart_process(process);
                break;
              }
 
              //check if timeout has elapsed. if it has, restart the process
              elapsetime = Utilities.getTimeInSecs() - process.runtime.lastRequestDateTime;
              if (elapsetime > process.runtime.monitorTimeout)
              {
                logger_.warn("Timeout in going to monitor (" + elapsetime + " sec). It will be restarted.");
                //restart the process if necessary
                restart_process(process);
              }
              break;
             
              /**
               * Note:
               * We only synchronized the stable state of managed process (with our internal structure)
               * The transition state is only used internally by both the process manager and managed process
               * That is:
               * - As soon as process manager send cosSetOperationMode(), it assumes (and sets the internal state)
               *   that the managed process is in transition mode.
               * - But the managed process may not transition right way for example because of pending reconfiguration
               * - In this case, the managed process internal state would still be the previous state (say StatStarted)
               *   eventhough the process manager's process structure has already changed to a transition state.
               * Thus, it is not desirable to synchronize the state of managed process with the process manager
               * internal process structure when it is in transition state.
               **/
             
            //is in the middle of terminating
            case CosProcessStatusEnum._StatTerminating:
              //consider it as active process
              nActiveProcesses++;
 
              //check if I still have the reference to the process
              if (process.reference == null || !poll_process(process.reference))
              {
                //I have lost it, restart it
                restart_process(process);
                break;
              }
 
              //check if timeout has elapsed. if it has, restart the process
              elapsetime = Utilities.getTimeInSecs() - process.runtime.lastRequestDateTime;
              if (elapsetime > process.runtime.terminateTimeout)
              {
                logger_.warn("Timeout in temination (" + elapsetime + " sec). It will be killed.");
                //just kill it
                kill_process(process);
                //inform peers
                synchronize_managed_process(process);
              }
 
              break;
            //have already teminated.
            case CosProcessStatusEnum._StatStopped:
              //if we are terminating, don't restart any new process
              if (state_ == CosProcessStatusEnum.StatTerminating
                  || state_ == CosProcessStatusEnum.StatStopped)
              {
                break;
              }
 
              //if it is an adhoc process, schedule a deletion
              if (process.adhoc)
              {
                toDeleteList.add(process.entity);
                break;
              }
 
              //restart the process if the termination is not from process manager
              if (process.runtime.requestedState != CosProcessStatusEnum.StatStopped)
              {
                start_process(process);
              }
 
              //just in case we miss it
              process.reference = null;
 
              break;
            //stable state: either in control or in monitor
            default:
              //consider it as active process
              nActiveProcesses++;
 
              //check if I still have the reference to the process
              if (process.reference == null)
              {
                //I have lost it, restart it
                restart_process(process);
                break;
              }

              //when it comes time, we will attempt to resync the status data
              //TODO: maybe should use timestamp for each managed process for more proper
              //      implementation of status synch interval/rate
              if (counter == 0)
              {
                //get the actual status from managed process
                processStatus = get_process_status(process.reference);
                if (processStatus == null)
                {
                  logger_.warn("Cannot get status. Restarting it.");
                  restart_process(process);
                }
                else if (processStatus != process.runtime.state)
                {
                  //update the runtime state. this hopefully will trigger the necessary actions
                  process.runtime.state = processStatus;
                  //only reset the requested state if it is a stable state.
                  //this usually means the managed process has choose to disobey the process manager
                  // and run its own operation mode based on its own criteria/judgement
                  if (processStatus == CosProcessStatusEnum.StatRunningControl
                      || processStatus == CosProcessStatusEnum.StatRunningMonitor)
                  {
                    process.runtime.requestedState = processStatus;
                  }
                }
                //force re-synch with all peers
                synchronize_managed_process(process);
              }
              else
              {
                //other occasion, just try to poll/ping
                if (!poll_process(process.reference))
                {
                  //failed to get heartbeat signal from process
                  logger_.warn("Cannot poll managed process. It will be restarted.");
                  //restart the process
                  restart_process(process);
                }
              }
 
              break;
            }
 
            //remove logger context
            NDC.pop();
          //end of critical section

          //increase the counter
          counter++;

          //reset the counter if it reaches the interval number
          if (counter >= pollSyncInterval_) {
            counter = 0;
          }

        }      //for each iterRuntime

        //if terminating, just stopped
        if (state_ == CosProcessStatusEnum.StatTerminating
            || state_ == CosProcessStatusEnum.StatStopped)
        {
          keepRunning_ = false;
          break;
        }

        //delete terminated ad-hoc process. critical section for consistency
        if (toDeleteList.size() > 0)
        {
          synchronized(managedProcesses_)
          {
            //if there is process in the to-be-deleted list, delete it
            for (int i=0; i<toDeleteList.size(); i++)
            {
              managedProcesses_.remove(toDeleteList.get(i));
            }
            toDeleteList.clear();
          }   //end clean up section
        }
       
        //poll all corba servers
       
        //verbose. print out number of datapoint servers
        logger_.debug("corbaServers_.size(): " + corbaServers_.size());

        //checking every process in the runtime process map
        itServer = corbaServers_.values().iterator();
       
        //clear list of tobe deleted process. just in case
        toDeleteList.clear();
       
        while (itServer.hasNext())
        {
          //if process manager is terminating, stop polling the processes
          if (state_ == CosProcessStatusEnum.StatTerminating
              || state_ == CosProcessStatusEnum.StatStopped)
          {
            break;
          }

          //if loop is terminated, break out
          if (!keepRunning_break;

          //critical section. mostly to keep everything consistent
          CorbaServerStruct server = itServer.next();
          synchronized(corbaServers_)
          {
            logger_.trace("Checking corba server '" + server.key + "\". Operation Mode: "
                    + CorbaHelper.OperationModeToString(server.operationMode));
 
            //add logger config
            NDC.push(server.key);
           
            if (!poll_process(server.reference))
            {
              logger_.warn("Cannot poll corba server. It will be deleted from the map.");
              toDeleteList.add(server.key);
            }
           
            NDC.pop();
          }   //end of critical section
        }
       
        //delete terminated corba server. critical section for consistency
        if (toDeleteList.size() > 0)
        {
          synchronized(corbaServers_)
          {
            //if there is process in the to-be-deleted list, delete it
            for (int i=0; i<toDeleteList.size(); i++)
            {
              corbaServers_.remove(toDeleteList.get(i));
            }
            toDeleteList.clear();
          }   //end clean up section
        }

        //calculate elapse time for the last processing
        difftime = polltime + pollRateMillis_ - Utilities.getTimeInSecs();

        //sleep between polling
        if (keepRunning_ && difftime > 0)
        {
          try
          {
            Thread.sleep(difftime);
          }
          catch(InterruptedException ie)
          {
            //ignore
          }
        }

        //NOTE:
        //maybe should make each iteration of iterRuntime in each own thread.
        //this way, pollRate should then be guaranteed.
        //but must be aware of possible sending conflicting message (between threads)
        // to managed processes
      }

      //stopped
      keepRunning_ = false;

      logger_.info("Main poller loop has exited...");
     
      //kill all managed processes. don't bother with gracious kill
      //NOTE: moved to stop()
     
      NDC.pop();
    }
  };

  class PeerSynchThread implements Runnable
  {
    private boolean keepRunning_ = false;
    private Thread thread_ = null;

    public void start()
    {
      //avoid running more than once
      if (thread_ != null && keepRunning_ && thread_.isAlive())
      {
        return;
      }
      //create a new tcp server
      thread_ = new Thread(this);
      //finally, start the tcpserver thread
      thread_.start();
     
    }
   
    public void stop()
    {
      if (thread_ == null)
      {
        return;
      }
      //stop the listen thread
      keepRunning_ = false;
      try
      {
        thread_.interrupt();
        thread_.join(DEF_THREAD_WAIT_MSEC);
      }
      catch (InterruptedException ie)
      {
        //ignore
      }
      thread_ = null;
    }
   
    public void run()
    {
      //must be initialized first and all process must be started
      if (state_ != CosProcessStatusEnum.StatStarted)
        return;

      logger_.info("Going into peer synch loop...");
      NDC.push("PeerSynch");
     
      long curtime, difftime;
     
      ArrayList<String>      toDeleteList = new ArrayList<String>();
      Iterator<PeerManagerStruct>  itPeer = null;
     
      keepRunning_ = true;
      while (keepRunning_)
      {
        curtime = Utilities.getTimeInSecs();

        //if process manager is terminating, or no peers is active, quit from the loop
        if (activePeers_.size() == 0
            || state_ == CosProcessStatusEnum.StatTerminating
            || state_ == CosProcessStatusEnum.StatStopped)
        {
          break;
        }

        //check the active peers one by one
        itPeer = activePeers_.values().iterator();
        while (itPeer.hasNext())
        {
          PeerManagerStruct peer = itPeer.next();

          //check if it is time to poll this peer
          if (peer.nextPollingMillis > curtime)
          {
            continue;
          }
          peer.nextPollingMillis = curtime + peer.heartbeatMillis;

          //critical section
          synchronized(activePeers_)
          {
            //add context
            NDC.push(peer.name);
           
            //poll the peer
            if (poll_manager(peer.reference))
            {
              peer.errorCounter = 0;
            }
            else
            {
              peer.errorCounter++;
            }
           
            //check if error threshold has been reached
            if (peer.errorCounter >= DEF_ERROR_THRESHOLD)
            {
              logger_.error("Error threshold has been reached. It will be deleted from list of active peers." );
              //maximun retries. maybe peer has died
              toDeleteList.add(peer.name);
            }
           
            NDC.pop();
          }   //end of critical section           
        //for each active peer
       
        //NOTE: not sure the concept of active peer will work in our case
        //      what about if network split happens for a very short while.
        //      both process manager will think that the other party has shutdown,
        //      and since there is no persistent attempt for reconnection when the
        //      network recover the peering will not recover!
        //NOTE: we will always try to poll all pre-configured peers even if it is persistently
        //      down. this way, if the problem is temporary the peering will be re-established
        //      as soon as the problem is mitigated.
       
        //check the configured peer one by one. configured peer represent non-active peer.
        //this way, if there is a temporary network problem, the peering is reconnected
        // as soon as the problem is fixed
        itPeer = configuredPeers_.values().iterator();
        while (itPeer.hasNext())
        {
          PeerManagerStruct peer = itPeer.next();

          //if it is currently active, it has been handled by the previous segment
          if (peer.isActive)
          {
            continue;
          }
         
          //check if it is time to poll this peer
          if (peer.nextPollingMillis > curtime)
          {
            continue;
          }
          peer.nextPollingMillis = curtime + peer.heartbeatMillis;

          //critical section
          synchronized(activePeers_)
          {
            //add context
            NDC.push(peer.name);
           
            //build the peer reference if it is not build yet
            if (peer.reference == null)
            {
              if (peer.ior.length() == 0)
              {
                peer.ior = "corbaloc::" + peer.name + ":" + peer.portNo + "/ProcessManager";
              }
              org.omg.CORBA.Object obj = CorbaManager.stringToObject(peer.ior);
              peer.reference = ICosProcessManagerHelper.narrow(obj);
            }
           
            //make sure we have valid reference
            if (!CorbaManager.isValidReference(peer.reference))
            {
              continue;
            }
           
            //poll the peer
            //do not use the helper function poll_manager() because it will attempt
            //ERROR_THRESHOLD times. always assume that any problem is persistent.
            try
            {
              peer.reference.cosPoll();
            }
            catch(Exception ex)
            {
              //ignore
              logger_.trace("Can not poll configured peer. Exception: "
                      + ex.toString());
              continue;
            }
           
            //if we successfully poll a peer, that means the peer is back alive.
            //we should register with the peer and re-establish the peering
            logger_.info("Peer " + peer.name + " is back ALIVE. Re-establishing peering...");
            if (!register_with_peer(peer.reference))
            {
              logger_.warn("Can not register with peer.");
              continue;
            }
           
            //reset the error counter so that we can start using it
            peer.errorCounter = 0;
            //synchronized every managed process
            Iterator<ManagedProcessStruct> itProcess = managedProcesses_.values().iterator();
            while (itProcess.hasNext())
            {
              ManagedProcessStruct process = itProcess.next();
              if (process == null || process.runtime.processId == 0)
              {
                continue;
              }
              if (!synchronize_managed_process(peer.reference, process))
              {
                logger_.warn("Fail to synchronize process " + process.entity);
                peer.errorCounter++;
              }
            }
           
            //check if error threshold has been reached
            if (peer.errorCounter >= DEF_ERROR_THRESHOLD)
            {
              logger_.error("Error threshold has been reached. Fail to re-establish peering." );
              continue;
            }
            else if (peer.errorCounter > 0)
            {
              logger_.warn("Fail to synchronize some processes! But since error threshold is not reached, "
                      + "we consider it to be acceptable.");
              logger_.warn("I sincerely hope that the best-case scenarios will always prevail. :)");
            }
           
            //if we get here. that means everything's well. move the peer to active peer list
            logger_.info("Everything is OK (or nearly OK). Peering is re-established.");
            peer.isActive = true;
            activePeers_.put(peer.name, peer);
           
            NDC.pop();
          }   //end of critical section           
        //for each configured peer
       
       
        //if there is peer  in the to-be-deleted list, delete it
        if (toDeleteList.size() > 0)
        {
          synchronized(activePeers_)
          {
            //if there is peer manager in the to-be-deleted list, delete it
            PeerManagerStruct peer = null;
            for (int i=0; i<toDeleteList.size(); i++)
            {
              peer = activePeers_.remove(toDeleteList.get(i));
              //if it is not an adhoc peer, put back to configured peer
              if (peer != null && !peer.adhoc)
              {
                peer.isActive = false;
              }
            }
            toDeleteList.clear();
          }   //end clean up section
        }

        //calculate elapse time for the last processing
        //NOTE: there is no specific precision configuration to control how often the loop
        //      is run. thus, we just use the same parameter as the process poller
        difftime = curtime + pollRateMillis_ - Utilities.getTimeInSecs();

        //sleep between polling
        if (keepRunning_ && difftime > 0)
        {
          try
          {
            Thread.sleep(difftime);
          }
          catch(InterruptedException ie)
          {
            //ignore
          }
        }
      }

      //reset. just in case
      keepRunning_ = false;
      logger_.info("Peer synch loop has exited...");

      //start all managed process to control (only when we are not terminating)
      logger_.info("Switching all managed processes to CONTROL...");
      if (state_ != CosProcessStatusEnum.StatTerminating
          && state_ != CosProcessStatusEnum.StatStopped)
      {
        Iterator<ManagedProcessStruct> itProcess = managedProcesses_.values().iterator();
        while(itProcess.hasNext())
        {
          ManagedProcessStruct process = itProcess.next();
          if (process == null || process.reference == null
              || process.runtime.processId == 0)
          {
            continue;
          }
          //set the process to control
          if (!set_operation_mode(process.reference, CosOperationModeEnum.OperControl))
          {
            logger_.warn("Can not switch process " + process.entity + " to CONTROL.");
          }
        }  //for each managed process
      //while not terminating
     
      NDC.pop();
    }
  }

  class OnProcessRegistrationThread implements Runnable
  {
    ManagedProcessStruct   process_ = null;
   
    public OnProcessRegistrationThread(ManagedProcessStruct process)
    {
      process_ = process;
    }
   
    public void run()
    {
      if (process_ == null || process_.reference == null)
      {
        return;
      }

      logger_.info("Performing registration procedure for " + process_.entity);
      NDC.push("Registration:" + process_.entity);
     
      //synchronize process status
      CosProcessStatusEnum status = get_process_status(process_.reference);
      if (status != null)
      {
        logger_.info("Expected status: " + CorbaHelper.ProcessStateToString(process_.runtime.state) +
                ". Actual status: " + CorbaHelper.ProcessStateToString(status));
        process_.runtime.state = status;
        //if it is a stable state, set the requested state so that we do not switch
        // existing process unnecessarily
        //NOTE: this is not useful because in any case we will resend the operation mode!
        if (status == CosProcessStatusEnum.StatRunningControl
            || status == CosProcessStatusEnum.StatRunningMonitor)
        {
          process_.runtime.requestedState = status;
        }
      }

      //synchronize with peer
      synchronize_managed_process(process_);

      //get run time params and operation mode
      CosOperationModeEnum   operMode = get_expected_operation_mode(process_);
      CosRunParamStruct[]   params = get_process_runtime_params(process_);

      if (operMode == CosOperationModeEnum.OperNotApplicable)
      {
        logger_.error("Can not determine expected operation mode. Restarting it.");
        restart_process(process_);
      }

      //send the runtime params to managed process
      if (!set_runtime_params(process_.reference, params))
      {
        logger_.warn("Can not set runtime parameters. Restarting it.");
        restart_process(process_);
      }

      //send the operation mode to managed process
      //TODO: this might switch the current status of managed process! not sure we want to do that!
      if (!set_operation_mode(process_.reference, operMode))
      {
        logger_.warn("Can not set operation mode. Restarting it.");
        restart_process(process_);
      }

      //requested operation mode
      if (operMode == CosOperationModeEnum.OperControl)
      {
        process_.runtime.requestedState = CosProcessStatusEnum.StatRunningControl;
      }
      else
      {
        process_.runtime.requestedState = CosProcessStatusEnum.StatRunningMonitor;
      }

      //NOTE:
      //do not set the peer process operation mode
      //DON'T!!! There is a danger that both process will be in MONITOR.
      //anyway, we have already synchronize out status with peer
      //if managed process is already in stable state (control, monitor), the sync would have notified peer.
      //if managed process is still starting, when the it switch to stable state it should call cosProcessStatusChanged()
      //   on which we would synchronize the status with peer!
     
    //run()
  //class OnProcessRegistrationThread

}

class ShutdownHook extends Thread
{
  ProcessManager instance_ = null;
 
  public ShutdownHook(ProcessManager instance)
  {
    instance_ = instance;
  }
 
  public void run()
  {
    ProcessManager.logger_.warn("JVM is shutting down...");
    instance_.stop();
    //instance_.shutdown();
  }
}

class ManagedProcessStruct
{
  public String            entity = "";
  public CosProcessDataStruct     config = null;
  public CosProcessRuntimeDataStruct   runtime = null;
 
  public ICosManagedProcess       reference = null;
  public ICosManagedProcess       peerReference = null;
  public ICosManagedProcess       activeReference = null;
 
  public boolean            adhoc = false;
 
  //if it is an in-process running, we need to keep reference to the instance
  //to stop it
  public boolean            inProcess = false;
  public Object            instance = null;
 
  public ManagedProcessStruct()
  {
    config = new CosProcessDataStruct();
    runtime = new CosProcessRuntimeDataStruct();
  }
};

class CorbaServerStruct
{
  public String        key = "";
  public CosOperationModeEnum  operationMode = CosOperationModeEnum.OperNotApplicable;
 
  public ICosMonitoredThread   reference = null;
  public ICosMonitoredThread   peerReference = null;
  public ICosMonitoredThread  activeReference = null;
}

class PeerManagerStruct
{
  public String         name = "";
  public ICosProcessManager  reference = null;
  public int          heartbeatMillis = 0;
  public int          portNo = 0;
  public String        ior = "";
  public long          nextPollingMillis = 0;
 
  public boolean        adhoc = false;
  public boolean        isActive = false;
  public int          errorCounter = 0;
}
TOP

Related Classes of tcg.syscontrol.ManagedProcessStruct

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.