Package com.alipay.bluewhale.core.daemon.supervisor

Source Code of com.alipay.bluewhale.core.daemon.supervisor.SyncProcesses

package com.alipay.bluewhale.core.daemon.supervisor;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.ConcurrentHashMap;

import org.apache.log4j.Logger;

import backtype.storm.Config;
import backtype.storm.utils.LocalState;
import backtype.storm.utils.Time;

import com.alipay.bluewhale.core.cluster.Common;
import com.alipay.bluewhale.core.cluster.StormConfig;
import com.alipay.bluewhale.core.daemon.State;
import com.alipay.bluewhale.core.messaging.ZMQContext;
import com.alipay.bluewhale.core.task.LocalAssignment;
import com.alipay.bluewhale.core.utils.PathUtils;
import com.alipay.bluewhale.core.utils.ProcessSimulator;
import com.alipay.bluewhale.core.utils.StormUtils;
import com.alipay.bluewhale.core.utils.TimeUtils;
import com.alipay.bluewhale.core.work.Worker;
import com.alipay.bluewhale.core.work.WorkerShutdown;
import com.alipay.bluewhale.core.work.refresh.WorkerHeartbeat;

/**
*  SyncProcesses
*  (1) �ر������쳣(�����ڣ���ʱ)��worker��
*  (2) �ҳ���Ҫ������worker��������
*/
class SyncProcesses extends ShutdownWork {
  private static Logger LOG = Logger.getLogger(SyncProcesses.class);

  private LocalState localState;

  private Map conf;

  private ConcurrentHashMap<String, String> workerThreadPids;

  private String supervisorId;

  private ZMQContext sharedContext;

  // private Supervisor supervisor;

  /**
   * @param conf
   * @param localState
   * @param workerThreadPids
   * @param supervisorId
   * @param sharedContext
   * @param workerThreadPidsReadLock
   * @param workerThreadPidsWriteLock
   */
  public SyncProcesses(String supervisorId, Map conf, LocalState localState,
      ConcurrentHashMap<String, String> workerThreadPids,
      ZMQContext sharedContext) {

    this.supervisorId = supervisorId;

    this.conf = conf;

    this.localState = localState;

    this.workerThreadPids = workerThreadPids;

    this.sharedContext = sharedContext;
  }

  @SuppressWarnings("unchecked")
  @Override
  public void run() {
    /**
     * Step 1: get assigned tasks from localstat Map<port(type Integer),
     * LocalAssignment>
     */
    Map<Integer, LocalAssignment> assignedTasks = null;
    try {
      assignedTasks = (Map<Integer, LocalAssignment>) localState
          .get(Common.LS_LOCAL_ASSIGNMENTS);
    } catch (IOException e) {
      LOG.error(
          "Failed to get Common.LS_LOCAL_ASSIGNMENTS from localState\n",
          e);
    }
    if (assignedTasks == null) {
      assignedTasks = new HashMap<Integer, LocalAssignment>();
    }

    /**
     * Step 2: get allocated tasks from local_dir/worker/ids/heartbeat
     * Map<workerid [WorkerHeartbeat, state]>
     */
    Map<String, StateHeartbeat> allocated = null;
    try {
      allocated = readAllocatedworkers(conf, localState, assignedTasks);
    } catch (IOException e2) {
      LOG.error("readAllocatedworkers" + allocated + " failed");
    }

    /**
     * Step 3: get which one should be keep and get keeper ports
     */
    Map<String, StateHeartbeat> keepers = null;
    Set<Integer> keepPorts = null;
    if (allocated != null) {
      keepers = new HashMap<String, StateHeartbeat>();
      keepPorts = new HashSet<Integer>();

      Set<Entry<String, StateHeartbeat>> allocatedSet = allocated
          .entrySet();
      for (Iterator<Entry<String, StateHeartbeat>> it = allocatedSet
          .iterator(); it.hasNext();) {

        Entry<String, StateHeartbeat> entry = it.next();

        String workerid = entry.getKey();
        StateHeartbeat hbstate = entry.getValue();

        if (hbstate.getState().equals(State.valid)) {
          keepers.put(workerid, hbstate);
        }
        if (hbstate.getHeartbeat() != null) {
          keepPorts.add(hbstate.getHeartbeat().getPort());
        }
        // kill those in allocated that are dead or disallowed;
        if (hbstate.getState() != State.valid) {
          StringBuilder sb = new StringBuilder();
          sb.append("Shutting down and clearing state for id ");
          sb.append(workerid);
          sb.append(";State:");
          sb.append(hbstate.getState());
          sb.append(";Heartbeat");
          sb.append(hbstate.getHeartbeat());
          LOG.info(sb);

          try {
            shutWorker(conf, supervisorId, workerid,
                workerThreadPids);
          } catch (IOException e) {
            String errMsg = "Failed to shutdown worker workId:"
                + workerid + ",supervisorId: " + supervisorId
                + ",workerThreadPids:" + workerThreadPids;
            LOG.error(errMsg, e);
          }
        }
      }

    }

    /**
     * Step 4: get reassigned tasks, which is in assignedTasks, but not in
     * keeperPorts Map<port(type Integer), LocalAssignment>
     */
    Map<Integer, LocalAssignment> reassignTasks = StormUtils
        .select_keys_pred(keepPorts, assignedTasks);

    /**
     * Step 5: generate new work ids
     */
    Map<Integer, String> newWorkerIds = null;
    if (reassignTasks != null) {

      newWorkerIds = new HashMap<Integer, String>();

      Set<Integer> reassignedTaskSet = reassignTasks.keySet();

      for (Integer port : reassignedTaskSet) {

        String newWorkerId = UUID.randomUUID().toString();

        newWorkerIds.put(port, newWorkerId);

        // create new worker Id directory
        // LOCALDIR/workers/newworkid/pids
        String path = StormConfig.worker_pids_root(conf, newWorkerId);
        try {
          PathUtils.local_mkdirs(path);
        } catch (IOException e) {
          LOG.error("Making dirs at " + path + " failed");
        }
      }
    }

    LOG.debug("Syncing processes");
    LOG.debug("Assigned tasks: " + assignedTasks);
    LOG.debug("Allocated: " + allocated);

    /**
     * Step 6: update localstat's LS_APPROVED_WORKERS Create approvedWorkers
     * Map<WorkerId, port>
     */

    Map<String, Integer> lsApprovedWorkers = null;
    try {
      lsApprovedWorkers = (Map<String, Integer>) localState
          .get(Common.LS_APPROVED_WORKERS);

    } catch (IOException e) {
      LOG.error("get Common.LS_APPROVED_WORKERS of localState failed");
    }
    if (lsApprovedWorkers == null) {
      lsApprovedWorkers = new HashMap<String, Integer>();
    }

    Map<String, Integer> approvedWorkers = new HashMap<String, Integer>();

    if (keepers != null && lsApprovedWorkers != null) {
      Set<String> keepersKeySet = keepers.keySet();
      Set<Entry<String, Integer>> lsAWEntrySet = lsApprovedWorkers
          .entrySet();

      for (Iterator<Entry<String, Integer>> it = lsAWEntrySet.iterator(); it
          .hasNext();) {

        Entry<String, Integer> entry = it.next();

        String keepWorkerId = entry.getKey();

        if (keepersKeySet.contains(keepWorkerId)) {

          approvedWorkers.put(keepWorkerId, entry.getValue());
        }
      }
    }

    if (newWorkerIds != null) {
      Set<Entry<Integer, String>> newWorkerIdsEntrySet = newWorkerIds
          .entrySet();
      for (Entry<Integer, String> entry : newWorkerIdsEntrySet) {

        String workerId = entry.getValue();
        Integer port = entry.getKey();

        approvedWorkers.put(workerId, port);
      }
    }

    try {
      localState.put(Common.LS_APPROVED_WORKERS, approvedWorkers);
    } catch (IOException e1) {
      LOG.error("put Common.LS_APPROVED_WORKERS " + approvedWorkers
          + " of localState failed");
    }

    /**
     * Step 7: wait for worker launch
     */
    if (reassignTasks != null) {
      Set<Entry<Integer, LocalAssignment>> reassignTasksEntrySet = reassignTasks
          .entrySet();
      for (Entry<Integer, LocalAssignment> entry : reassignTasksEntrySet) {

        Integer port = entry.getKey();
        LocalAssignment assignment = entry.getValue();

        String workerId = newWorkerIds.get(port);

        StringBuilder sb = new StringBuilder();
        sb.append("Launching worker with assiangment ");
        sb.append(assignment.toString());
        sb.append(" for the supervisor ");
        sb.append(supervisorId);
        sb.append(" on port ");
        sb.append(port);
        sb.append(" with id ");
        sb.append(workerId);
        LOG.info(sb);

        try {
          String clusterMode = StormConfig.cluster_mode(conf);

          if (clusterMode.equals("distributed")) {
            launchWorker(conf, sharedContext,
                assignment.getTopologyId(), supervisorId, port,
                workerId);
          } else if (clusterMode.equals("local")) {
            // in fact, this is no use
            launchWorker(conf, sharedContext,
                assignment.getTopologyId(), supervisorId, port,
                workerId, workerThreadPids);
          }
        } catch (Exception e) {
          String errorMsg = "Failed to launchWorker workerId:"
              + workerId + ":" + port;
          LOG.error(errorMsg, e);
        }
      }
    }

    /**
     * FIXME, workerIds should be Set, not Collection, but here simplify the
     * logic
     */
    Collection<String> workerIds = newWorkerIds.values();
    try {
      waitForWorkersLaunch(conf, workerIds);
    } catch (IOException e) {
      LOG.error(e + " waitForWorkersLaunch failed");
    } catch (InterruptedException e) {
      LOG.error(e + " waitForWorkersLaunch failed");
    }

  }

  /**
   * wait for all workers of the supervisor launch
   *
   * @param conf
   * @param workerIds
   * @throws InterruptedException
   * @throws IOException
   * @pdOid 52b11418-7474-446d-bff5-0ecd68f4954f
   */
  public void waitForWorkersLaunch(Map conf, Collection<String> workerIds)
      throws IOException, InterruptedException {

    int startTime = TimeUtils.current_time_secs();

    for (Iterator<String> iter = workerIds.iterator(); iter.hasNext();) {
      String workerId = iter.next();

      waitForWorkerLaunch(conf, workerId, startTime);
    }
  }

  /**
   * wait for worker launch if the time is not > *
   * SUPERVISOR_WORKER_START_TIMEOUT_SECS, otherwise info failed
   *
   * @param conf
   * @param workerId
   * @param startTime
   * @throws IOException
   * @throws InterruptedException
   * @pdOid f0a6ab43-8cd3-44e1-8fd3-015a2ec51c6a
   */
  public void waitForWorkerLaunch(Map conf, String workerId, int startTime)
      throws IOException, InterruptedException {

    LocalState ls = StormConfig.worker_state(conf, workerId);

    while (true) {

      WorkerHeartbeat whb = (WorkerHeartbeat) ls
          .get(Common.LS_WORKER_HEARTBEAT);
      if (whb == null
          && ((TimeUtils.current_time_secs() - startTime) < (Integer) conf
              .get(Config.SUPERVISOR_WORKER_START_TIMEOUT_SECS))) {
        LOG.info(workerId + "still hasn't started");
        Time.sleep(500);
      } else {
        // whb is valid or timeout
        break;
      }
    }

    WorkerHeartbeat whb = (WorkerHeartbeat) ls
        .get(Common.LS_WORKER_HEARTBEAT);
    if (whb == null) {
      LOG.info("Worker " + workerId + "failed to start");
    }
  }

  /**
   * get localstat approved workerId's map
   *
   * @return Map<workerid [workerheart, state]> [workerheart, state] is also a
   *         map, key is "workheartbeat" and "state"
   * @param conf
   * @param localState
   * @param assignedTasks
   * @throws IOException
   * @pdOid 11c9bebb-d082-4c51-b323-dd3d5522a649
   */
  @SuppressWarnings("unchecked")
  public Map<String, StateHeartbeat> readAllocatedworkers(Map conf,
      LocalState localState, Map<Integer, LocalAssignment> assignedTasks)
      throws IOException {

    Map<String, StateHeartbeat> workeridHbstate = null;

    int now = TimeUtils.current_time_secs();

    /**
     * Get approved workerIds from local_dir/supervisor/localstat
     * Map<WorkerId, port>
     */
    Map<String, Integer> approvedIds = (Map<String, Integer>) localState
        .get(Common.LS_APPROVED_WORKERS);

    /**
     * Get Map<workerId, WorkerHeartbeat> from
     * local_dir/worker/ids/heartbeat
     */
    Map<String, WorkerHeartbeat> idToHeartbeat = readWorkerHeartbeats(conf);

    if (idToHeartbeat != null) {
      workeridHbstate = new HashMap<String, StateHeartbeat>();
      Set<Map.Entry<String, WorkerHeartbeat>> entrySet = idToHeartbeat
          .entrySet();
      for (Iterator<Map.Entry<String, WorkerHeartbeat>> it = entrySet
          .iterator(); it.hasNext();) {

        Map.Entry<String, WorkerHeartbeat> entry = it.next();

        String workerid = entry.getKey().toString();

        WorkerHeartbeat whb = entry.getValue();

        State state = null;

        if (whb == null) {

          state = State.notStarted;

        } else if (approvedIds == null
            || approvedIds.containsKey(workerid) == false
            || matchesAssignment(whb, assignedTasks) == false) {

          // workerId isn't approved or
          // isn't assigned task
          state = State.disallowed;

        } else if ((now - whb.getTimeSecs()) > (Integer) conf
            .get(Config.SUPERVISOR_WORKER_TIMEOUT_SECS)) {//

          state = State.timedOut;
        } else {
          state = State.valid;
        }

        LOG.debug("Worker:" + workerid + " state:" + state
            + " WorkerHeartbeat: " + whb
            + " at supervisor time-secs " + now);

        workeridHbstate.put(workerid, new StateHeartbeat(state, whb));
      }
    }

    return workeridHbstate;
  }

  /**
   * check whether the workerheartbeat is allowed in the assignedTasks
   *
   * @param whb
   *            : WorkerHeartbeat
   * @param assignedTasks
   * @return boolean if true, the assignments(LS-LOCAL-ASSIGNMENTS) is match
   *         with workerheart if fasle, is not matched
   */
  public boolean matchesAssignment(WorkerHeartbeat whb,
      Map<Integer, LocalAssignment> assignedTasks) {

    boolean isMatch = true;
    LocalAssignment localAssignment = assignedTasks.get(whb.getPort());

    if (localAssignment == null) {
      isMatch = false;
    } else if (!whb.getTopologyId().equals(localAssignment.getTopologyId())) {
      // topology id not equal
      LOG.info("topology id not equal whb=" + whb.getTopologyId()
          + ",localAssignment=" + localAssignment.getTopologyId());
      isMatch = false;
    } else if (!(whb.getTaskIds().equals(localAssignment.getTaskIds()))) {
      // task-id isn't equal
      LOG.info("task-id isn't equal whb=" + whb.getTaskIds()
          + ",localAssignment=" + localAssignment.getTaskIds());
      isMatch = false;
    }

    return isMatch;
  }

  /**
   * get all workers heartbeats of the supervisor
   *
   * @param conf
   * @return Map<workerId, WorkerHeartbeat>
   * @throws IOException
   * @throws IOException
   */
  public Map<String, WorkerHeartbeat> readWorkerHeartbeats(Map conf)
      throws IOException {

    Map<String, WorkerHeartbeat> workerHeartbeats = null;

    // get the path: STORM-LOCAL-DIR/workers
    String path = StormConfig.worker_root(conf);

    List<String> workerIds = PathUtils.read_dir_contents(path);

    if (workerIds != null) {
      workerHeartbeats = new HashMap<String, WorkerHeartbeat>();

      for (String workerId : workerIds) {

        WorkerHeartbeat whb = readWorkerHeartbeat(conf, workerId);
        // this place whb can be null
        workerHeartbeats.put(workerId, whb);
      }
    }
    return workerHeartbeats;
  }

  /**
   * get worker heartbeat by workerid
   *
   * @param conf
   * @param workerId
   * @returns WorkerHeartbeat
   * @throws IOException
   */
  public WorkerHeartbeat readWorkerHeartbeat(Map conf, String workerId)
      throws IOException {

    LocalState ls = StormConfig.worker_state(conf, workerId);

    return (WorkerHeartbeat) ls.get(Common.LS_WORKER_HEARTBEAT);
  }

  /**
   * launch a worker in local mode
   *
   * @param conf
   * @param sharedcontext
   * @param stormId
   * @param supervisorId
   * @param port
   * @param workerId
   * @param workerThreadPidsAtom
   * @param workerThreadPidsAtomWriteLock
   * @pdOid 405f44c7-bc1b-4e16-85cc-b59352b6ff5d
   */
  @Deprecated
  public void launchWorker(Map conf, ZMQContext sharedcontext,
      String stormId, String supervisorId, Integer port, String workerId,
      ConcurrentHashMap<String, String> workerThreadPidsAtom)
      throws Exception {

    String pid = UUID.randomUUID().toString();

    WorkerShutdown worker = Worker.mk_worker(conf, sharedcontext, stormId,
        supervisorId, port, workerId);

    ProcessSimulator.registerProcess(pid, worker);

    workerThreadPidsAtom.put(workerId, pid);

  }

  /**
   * launch a worker in distributed mode
   *
   * @param conf
   * @param sharedcontext
   * @param topologyId
   * @param supervisorId
   * @param port
   * @param workerId
   * @throws IOException
   * @pdOid 6ea369dd-5ce2-4212-864b-1f8b2ed94abb
   */
  public void launchWorker(Map conf, ZMQContext sharedcontext,
      String topologyId, String supervisorId, Integer port,
      String workerId) throws IOException {

    // STORM-LOCAL-DIR/supervisor/stormdist/topologyId
    String stormroot = StormConfig.supervisor_stormdist_root(conf,
        topologyId);

    // STORM-LOCAL-DIR/supervisor/stormdist/topologyId/stormjar.jar
    String stormjar = StormConfig.supervisor_stormjar_path(stormroot);

    // get supervisor conf
    Map stormConf = StormConfig
        .read_supervisor_storm_conf(conf, topologyId);

    // get classpath
    // String[] param = new String[1];
    // param[0] = stormjar;
    // String classpath = StormUtils.add_to_classpath(
    // StormUtils.current_classpath(), param);
    String[] classpath = (new String(StormUtils.current_classpath() + ":" + stormjar)).split(":");
   
    String execute=(String) stormConf.get("worker.classpath.exclude");
   
    ArrayList<String> finalclasspath=new ArrayList<String>();
    for(String s:classpath)
    {
      if(execute==null||!s.matches(execute))
      {
        finalclasspath.add(s);
      }
    }
   
    StringBuffer classpathBuffer=new StringBuffer();
    String joinchar="";
    for(String s:finalclasspath)
    {
      classpathBuffer.append(joinchar);
      classpathBuffer.append(s);
      joinchar=":";
    }
    // get child process parameter

    String childopts = "";
   
    if (conf.get(Config.WORKER_CHILDOPTS) != null) {
      childopts = ""+conf.get(Config.WORKER_CHILDOPTS);
    }
    if (conf.get(Config.WORKER_CHILDOPTS+"."+port) != null) {
      childopts = ""+conf.get(Config.WORKER_CHILDOPTS+"."+port);
    }
   
    if (stormConf.get(Config.TOPOLOGY_WORKER_CHILDOPTS) != null) {
      childopts = " " + stormConf.get(Config.TOPOLOGY_WORKER_CHILDOPTS);
    }
   
    if (stormConf.get(Config.TOPOLOGY_WORKER_CHILDOPTS+"."+port) != null) {
      childopts = " " + stormConf.get(Config.TOPOLOGY_WORKER_CHILDOPTS+"."+port);
    }
   
    String stormhome = System.getProperty("storm.home");
    if (stormhome == null) {
      stormhome=".";
    }
    // TODO ???��������� %ID%
    childopts = childopts.replace("%ID%", port.toString());
    childopts = childopts.replaceAll("%port%", port.toString());
    childopts = childopts.replaceAll("%storm.home%", stormhome);

    String logFileName = "worker-" + port + ".log";

    StringBuilder commandSB = new StringBuilder();
    // FIXME ���ӻ��ˣ��˴�ƴ���ַ�����ִ���ְ��տո�ֲ�
    commandSB.append("java -server ");
    commandSB.append(childopts);

    commandSB.append(" -Djava.library.path=");
    commandSB.append((String) conf.get(Config.JAVA_LIBRARY_PATH));

    commandSB.append(" -Dlogfile.name=");
    commandSB.append(logFileName);

   
    commandSB.append(" -Dstorm.home=");
    commandSB.append(stormhome);

    commandSB.append(" -Dlog4j.configuration=storm.log.properties");

    commandSB.append(" -cp ");
    commandSB.append(classpathBuffer.toString());

    commandSB.append(" com.alipay.bluewhale.core.work.Worker ");
    commandSB.append(topologyId);

    commandSB.append(" ");
    commandSB.append(supervisorId);

    commandSB.append(" ");
    commandSB.append(port);

    commandSB.append(" ");
    commandSB.append(workerId);

    LOG.info("Launching worker with command: " + commandSB);

    Map<String, String> environment = new HashMap<String, String>();
    environment.put("LD_LIBRARY_PATH",
        (String) conf.get(Config.JAVA_LIBRARY_PATH));
   
    try{
    ArrayList<String> killlist=findByJavaPort.findProcess(port);
    if(killlist!=null)
    {
     
      for(int i=0;i<3;i++)
      {
        for(String pid:killlist)
        {
          StormUtils.ensure_process_killed(Integer.parseInt(pid));
        }
        Thread.sleep(300);
      }
    }
   
    }catch(Throwable e)
    {
      LOG.error("killlist",e);
    }
   

    StormUtils.launch_work_process(commandSB.toString(), environment);
  }

}
TOP

Related Classes of com.alipay.bluewhale.core.daemon.supervisor.SyncProcesses

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.