Package system

Source Code of system.MasterImpl$FaultDetector

/**
*
*/
package system;

import exceptions.DataNotFoundException;
import exceptions.IllegalClassException;
import exceptions.IllegalInputException;
import exceptions.NoResourcesException;
import graphs.GraphPartitioner;

import java.io.IOException;
import java.rmi.RemoteException;
import java.rmi.registry.LocateRegistry;
import java.rmi.registry.Registry;
import java.rmi.server.UnicastRemoteObject;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Vector;
import java.util.logging.Logger;

import utility.JPregelLogger;
import utility.Pair;

import api.Vertex;

/**
* @author Manasa Chandrasekhar
* @author Kowshik Prakasam
*
*/
public class MasterImpl extends UnicastRemoteObject implements ManagerToMaster,
     Runnable {

  /**
   *
   */
  private int superStep;

  public int getSuperStep() {
    return superStep;
  }

  public void setSuperStep(int superStep) {
    this.superStep = superStep;
  }

  private static final long serialVersionUID = -7962409918852099855L;

  private GraphPartitioner gp;
  private Thread superstepExecutorThread;
  private Thread faultDetectorThread;
  public Map<String, WorkerManager> idManagerMap;
  private Logger logger;
  private String id;
  private String vertexClassName;
  private List<String> returnedManagers;
  private boolean allDone;

  private int participatingMgrs;

  public synchronized int getParticipatingMgrs() {
    return participatingMgrs;
  }

  public synchronized void setParticipatingMgrs(int participatingMgrs) {
    this.participatingMgrs = participatingMgrs;
  }

  private int numMachines;

  private FaultDetector aFaultDetector;

  private boolean isActive;

  private boolean isWkrMgrsInitialized;

  private int lastCheckPoint;

  public String getVertexClassName() {
    return vertexClassName;
  }

  public void setVertexClassName(String vertexClassName) {
    this.vertexClassName = vertexClassName;
    logger.info("set vertexClassName : " + vertexClassName);
  }

  private static final String LOG_FILE_PREFIX = JPregelConstants.LOG_DIR
      + "master";
  private static final String LOG_FILE_SUFFIX = ".log";
  private static final int PORT_NUMBER = 3672;

  public class FaultDetector implements Runnable {

    private Logger logger;
    private static final String LOG_FILE_PREFIX = JPregelConstants.LOG_DIR
        + "faultdetector";
    private static final String LOG_FILE_SUFFIX = ".log";

    private void initLogger() throws IOException {
      this.logger = JPregelLogger.getLogger(this.getID(), LOG_FILE_PREFIX
          + LOG_FILE_SUFFIX);
    }

    public FaultDetector() throws IOException {
      initLogger();
    }

    @Override
    public void run() {
      while (true) {
        WorkerManager aWkrMgr = null;
        String wkrMgrID = null;
        try {
          for (Map.Entry<String, WorkerManager> e : idManagerMap
              .entrySet()) {
            wkrMgrID = e.getKey();
            aWkrMgr = e.getValue();
            aWkrMgr.isAlive();
          }
        } catch (RemoteException e) {
          this.logger
              .severe("Worker manager : " + wkrMgrID + " died");
          System.err.println("Worker manager : "+wkrMgrID+" died");
          e.printStackTrace();
          System.err.println("Deactivating master");
          // deactivate master
          deactivate();

          synchronized (idManagerMap) {
            idManagerMap.remove(wkrMgrID);
          }

          // check if this fellow has returned already. decrement
          // counter otherwise.
          if (!returnedManagers.contains(wkrMgrID)) {
            setParticipatingMgrs(getParticipatingMgrs() - 1);
            this.logger
                .info(wkrMgrID
                    + " didn't report completion earlier .. so decrementing participatingMgrs to "
                    + participatingMgrs);
            System.err.println(wkrMgrID
                + " didn't report completion earlier .. so decrementing participatingMgrs to "
                + participatingMgrs);
            if (getParticipatingMgrs() == 0) {
              allDone = true;
            }
          }
          // for all worker managers, other than the failed
          // manager,
          // stop the superstep immediately.
         
          stopSuperStep();

          // for all worker managers, other than the failed
          // manager,
          // restore state.
          try {
            restoreState();
          } catch (IOException e1) {

            e1.printStackTrace();
          } catch (IllegalInputException e1) {

            e1.printStackTrace();
          } catch (DataNotFoundException e1) {

            e1.printStackTrace();
          } catch (InstantiationException e1) {

            e1.printStackTrace();
          } catch (IllegalAccessException e1) {

            e1.printStackTrace();
          } catch (ClassNotFoundException e1) {

            e1.printStackTrace();
          } catch (NoResourcesException e1) {

            e1.printStackTrace();
          }
         
          //activating master again
          setSuperStep(getLastCheckPoint());
          setWkrMgrsInitialized(true);
          activate();

        }
       
       
      }
    }

    public String getID() {
      return "FaultDetector";
    }
  }

  /**
   * @return
   */
  private int getLastCheckPoint() {
    return this.lastCheckPoint;
  }

  /**
   * @return
   */
  private void setLastCheckPoint(int checkPoint) {
    this.lastCheckPoint = checkPoint;
  }

  public MasterImpl(String vertexClassName, int numMachines)
      throws IOException {

    this.setId("Master");
    initLogger();
    this.lastCheckPoint = JPregelConstants.FIRST_SUPERSTEP;
    this.setNumMachines(numMachines);
    this.setSuperStep(JPregelConstants.FIRST_SUPERSTEP);
    this.setVertexClassName(vertexClassName);
    this.returnedManagers = new Vector<String>();
    this.idManagerMap = new HashMap<String, WorkerManager>();
    this.superstepExecutorThread = new Thread(this, getId());
    this.aFaultDetector = new FaultDetector();
    this.faultDetectorThread = new Thread(this.aFaultDetector,
        this.aFaultDetector.getID());

  }

  /**
   * @param numMachines
   */
  private void setNumMachines(int numMachines) {
    this.numMachines = numMachines;

  }


  public void setId(String id) {
    this.id = id;
  }

  public String getId() {
    return id;
  }

  private void initLogger() throws IOException {
    this.logger = JPregelLogger.getLogger(getId(), LOG_FILE_PREFIX
        + LOG_FILE_SUFFIX);
  }

  /*
   * (non-Javadoc)
   *
   * @see system.ManagerToMaster#register(system.WorkerManager,
   * java.lang.String)
   */
  @Override
  public synchronized void register(WorkerManager aWorkerManager, String id)
      throws RemoteException {
    this.idManagerMap.put(id, aWorkerManager);
    logger.info("registered worker manager : " + id);
    logger.info("size of map : " + idManagerMap.size());

    if (idManagerMap.size() == this.numMachines) {
      try {
        executeTask();
      } catch (IOException e) {
        logger.severe(e.toString());
        throw new RemoteException(e.toString());
      }
    }

  }

  public static void main(String args[]) throws IllegalClassException {
    String vertexClassName = args[0];
    int numMachines = Integer.parseInt(args[1]);
    try {
      Class<?> c = Class.forName(vertexClassName);
      if (!c.getSuperclass().equals(Vertex.class)) {
        throw new IllegalClassException(vertexClassName);
      }

    } catch (ClassNotFoundException e) {
      System.err.println("Client vertex class not found !");
      e.printStackTrace();
      return;
    }
    if (System.getSecurityManager() == null) {
      System.setSecurityManager(new SecurityManager());
    }
    try {

      MasterImpl master = new MasterImpl(vertexClassName, numMachines);
      Registry registry = LocateRegistry.createRegistry(PORT_NUMBER);
      registry.rebind(ManagerToMaster.SERVICE_NAME, master);
      System.err.println("Master instance bound");
    } catch (Exception e) {
      System.err.println("Can't bind Master instance");
      e.printStackTrace();

    }
  }

  public int getWorkerMgrsCount() {
    logger.info("returning : " + idManagerMap.size());
    return this.idManagerMap.size();
  }

  public int getWorkerMgrThreads() {
    return JPregelConstants.WORKER_MGR_THREADS;
  }

  public synchronized boolean isActive() {
    return this.isActive;
  }

  public synchronized void deactivate() {
    this.isActive = false;
  }

  public synchronized void activate() {
    this.isActive = true;
  }

  public void executeTask() throws RemoteException {
    try {
      this.gp = new GraphPartitioner(JPregelConstants.GRAPH_FILE, this,
          this.getVertexClassName());
    } catch (IOException e) {
      logger.severe(e.toString());
      throw new RemoteException(e.getMessage(),e);
    }
    this.setAllDone(true);
    this.activate();
    logger.info("Starting superstep executor thread");
    superstepExecutorThread.start();
    logger.info("Starting fault detector thread");
    faultDetectorThread.start();
  }

  public void run() {
   
    while (true) {
      if (this.isActive()) {
        try {
          while (!allDone()) {

          }
          synchronized (idManagerMap) {
            if (!isWkrMgrsInitialized()) {
              initializeWorkerManagers();
              setWkrMgrsInitialized(true);
            }

            if (findActiveManagers(this.getSuperStep()) > 0) {
              try {
                Thread.sleep(10);
              } catch (InterruptedException e) {
                e.printStackTrace();
              }

              this.setAllDone(false);

              this.setParticipatingMgrs(this.idManagerMap.size());
              this.returnedManagers.clear();
              for (Map.Entry<String, WorkerManager> e : this.idManagerMap
                  .entrySet()) {
                String aWkrMgrId = e.getKey();
                WorkerManager aWkrMgr = e.getValue();

                logger.info("Commencing superstep : "
                    + this.getSuperStep()
                    + " in worker manager : " + aWkrMgrId);
                aWkrMgr.beginSuperStep(this.getSuperStep(),
                    this.isCheckPoint());
              }
              logger.info("Waiting for worker managers to complete execution");
              while (isActive() && !allDone()) {

              }
              if (isActive()) {
                logger.info("Superstep over : "
                    + this.getSuperStep());
                this.setSuperStep(this.getSuperStep() + 1);
              }
            } else {
              logger.info("-----------------------------------------------------");
              logger.info("Writing Solutions");
              // Writing solutions
              writeSolutions();
              System.err.println("Computations completed. Solutions written to solutions/. Logs in logs/ !");
              break;
            }

          }

        } catch (RemoteException e) {
          logger.severe(e.toString());
          e.printStackTrace();
          this.deactivate();
        } catch (IOException e) {
          logger.severe(e.getMessage());
          e.printStackTrace();
          break;
        } catch (IllegalInputException e) {
          logger.severe(e.getMessage());
          e.printStackTrace();
          break;
        } catch (DataNotFoundException e) {
          logger.severe(e.getMessage());
          e.printStackTrace();
          break;
        } catch (InstantiationException e) {
          logger.severe(e.getMessage());
          e.printStackTrace();
          break;
        } catch (IllegalAccessException e) {
          logger.severe(e.getMessage());
          e.printStackTrace();
          break;
        } catch (ClassNotFoundException e) {
          logger.severe(e.getMessage());
          e.printStackTrace();
          break;
        } catch (NoResourcesException e) {
          logger.severe(e.getMessage());
          e.printStackTrace();
          break;
        }

      }
    }

  }

  /**
   * @param b
   */
  private synchronized void setWkrMgrsInitialized(boolean newState) {
    this.isWkrMgrsInitialized = newState;
  }

  /**
   * @return
   */
  private synchronized boolean isWkrMgrsInitialized() {
    return this.isWkrMgrsInitialized;
  }

  /**
   * @return
   */
  private boolean isCheckPoint() {
    if (this.getSuperStep() % JPregelConstants.CHECKPOINT_INTERVAL == 0) {
      return true;
    }
    return false;
  }

  /**
   * @throws RemoteException
   *
   */
  private void writeSolutions() throws RemoteException {
    for (Map.Entry<String, WorkerManager> e : this.idManagerMap.entrySet()) {
      WorkerManager aWkrMgr = e.getValue();
      aWkrMgr.writeSolutions();
    }

  }

  /**
   * @return
   * @throws RemoteException
   */
  private int findActiveManagers(int superStep) throws RemoteException {
    if (superStep == JPregelConstants.FIRST_SUPERSTEP) {
      return this.idManagerMap.size();
    }
    int activeManagers = 0;
    for (Map.Entry<String, WorkerManager> e : this.idManagerMap.entrySet()) {
      MessageSpooler aSpooler = (MessageSpooler) e.getValue();
      if (!aSpooler.isQueueEmpty()) {
        activeManagers++;
      }
    }
    return activeManagers;

  }

  /**
   * @return
   */
  private synchronized boolean allDone() {
    return allDone;
  }

  private void restoreState() throws IOException, IllegalInputException,
      DataNotFoundException, InstantiationException,
      IllegalAccessException, ClassNotFoundException,
      NoResourcesException {

    if (idManagerMap.size() == 0) {
      throw new NoResourcesException(
          "No worker managers available to the Master for queueing jobs");
    }

    List<List<Integer>> assignedPartitions = this.assignPartitions();
    Map<Integer, Pair<String, String>> partitionWkrMgrMap = this
        .getPartitionMap(assignedPartitions);
    this.restoreState(assignedPartitions);

    // Write partition - worker manager map to file
    DataLocator dl = DataLocator.getDataLocator(gp.getPartitionSize());
    dl.writePartitionMap(partitionWkrMgrMap);
    logger.info("Restored state");
  }

  private void restoreState(List<List<Integer>> assignedPartitions) {
    int index = 0;
    for (Map.Entry<String, WorkerManager> anEntry : idManagerMap.entrySet()) {
      String wkrMgrToBeRestored = anEntry.getKey();

      try {
        System.err.println(wkrMgrToBeRestored + " restoring state");
        this.logger.info(wkrMgrToBeRestored + " restoring state");
        anEntry.getValue().restoreState(getLastCheckPoint(),
            assignedPartitions.get(index));
        System.err.println(wkrMgrToBeRestored + " state restoration - SUCCESSFUL !");
      } catch (RemoteException e) {
        // can't really catch this now, ignore
        e.printStackTrace();
      }
      index++;
    }
  }

  private void stopSuperStep() {
    for (Map.Entry<String, WorkerManager> anEntry : idManagerMap.entrySet()) {
      String wkrMgrToBeStopped = anEntry.getKey();

      try {
        System.err.println(wkrMgrToBeStopped + " stopping superstep");
        this.logger.info(wkrMgrToBeStopped + " stopping superstep");
        anEntry.getValue().stopSuperStep();
      } catch (RemoteException e1) {
        // can't really catch this now, ignore
        e1.printStackTrace();
      }

    }
  }

  /**
   * @throws IOException
   * @throws DataNotFoundException
   * @throws IllegalInputException
   * @throws ClassNotFoundException
   * @throws IllegalAccessException
   * @throws InstantiationException
   * @throws NoResourcesException
   *
   */
  private void initializeWorkerManagers() throws IOException,
      IllegalInputException, DataNotFoundException,
      InstantiationException, IllegalAccessException,
      ClassNotFoundException, NoResourcesException {

    if (idManagerMap.size() == 0) {
      throw new NoResourcesException(
          "No worker managers available to the Master for queueing jobs");
    }

    int numPartitions = this.gp.partitionGraphs();
    logger.info("Num partitions : " + numPartitions);
    List<List<Integer>> assignedPartitions = this.assignPartitions();
    Map<Integer, Pair<String, String>> partitionWkrMgrMap = this
        .getPartitionMap(assignedPartitions);
    this.initializeWorkerManagers(assignedPartitions);

    // Write partition - worker manager map to file
    DataLocator dl = DataLocator.getDataLocator(gp.getPartitionSize());
    dl.writePartitionMap(partitionWkrMgrMap);
    logger.info("Initialized worker managers : ");
    dl.clearSolutions();
    logger.info("Cleared solutions folder");
  }

  private Map<Integer, Pair<String, String>> getPartitionMap(
      List<List<Integer>> assignedPartitions) throws RemoteException {
    Map<Integer, Pair<String, String>> partitionWkrMgrMap = new HashMap<Integer, Pair<String, String>>();
    int index = 0;
    WorkerManager thisWkrMgr = null;
    String thisWkrMgrId = null;
    for (Map.Entry<String, WorkerManager> e : this.idManagerMap.entrySet()) {
      thisWkrMgrId = e.getKey();
      thisWkrMgr = e.getValue();
      List<Integer> thisWkrMgrPartitions = assignedPartitions.get(index);
      for (int partition : thisWkrMgrPartitions) {
        partitionWkrMgrMap.put(partition, new Pair<String, String>(
            thisWkrMgrId, thisWkrMgr.getHostInfo()));
      }
      index++;
    }
    return partitionWkrMgrMap;
  }

  private void initializeWorkerManagers(List<List<Integer>> assignedPartitions)
      throws RemoteException {
    int index = 0;
    WorkerManager thisWkrMgr = null;

    for (Map.Entry<String, WorkerManager> e : this.idManagerMap.entrySet()) {
      thisWkrMgr = e.getValue();
      List<Integer> thisWkrMgrPartitions = assignedPartitions.get(index);
      thisWkrMgr.initialize(thisWkrMgrPartitions,
          this.getWorkerMgrThreads(), this.gp.getPartitionSize(),
          this.gp.getNumVertices());
      index++;
    }
  }

  private List<List<Integer>> assignPartitions() throws NoResourcesException,
      IOException, IllegalInputException, DataNotFoundException,
      InstantiationException, IllegalAccessException,
      ClassNotFoundException {
    if (idManagerMap.size() == 0) {
      throw new NoResourcesException(
          "No worker managers available to the Master for queueing jobs");
    }
    logger.info(idManagerMap.toString());

    List<List<Integer>> assignedPartitions = new Vector<List<Integer>>();

   

    int numMgrPartitions = this.gp.getNumberOfPartitions() / this.getWorkerMgrsCount();
    List<Integer> wkrMgrPartitions = new Vector<Integer>();
    int partitionCount = 0;
    WorkerManager thisWkrMgr = null;
    for (Map.Entry<String, WorkerManager> e : this.idManagerMap.entrySet()) {
      if (thisWkrMgr != null) {
        logger.info("Generating partitions for worker manager : "
            + thisWkrMgr.getId() + " -> " + wkrMgrPartitions);
        assignedPartitions.add(wkrMgrPartitions);
      }
      wkrMgrPartitions = new Vector<Integer>();
      thisWkrMgr = e.getValue();
      for (int i = 0; i < numMgrPartitions; i++, partitionCount++) {
        wkrMgrPartitions.add(partitionCount);
        logger.info("added : " + partitionCount);
      }
      logger.info("End of loop : " + wkrMgrPartitions);
    }

    while (partitionCount < this.gp.getNumberOfPartitions()) {
      wkrMgrPartitions.add(partitionCount);
      partitionCount++;
    }

    assignedPartitions.add(wkrMgrPartitions);

    logger.info("Assigned partitions : " + assignedPartitions);
    return assignedPartitions;
  }

  /*
   * (non-Javadoc)
   *
   * @see system.ManagerToMaster#endSuperStep(java.lang.String)
   */
  @Override
  public void endSuperStep(String wkrMgrId) throws RemoteException {

    // this fellow shouldn't have returned before .. checking just in
    // case there is a double endSuperStep() done by a worker manager
    // during a stopSuperStep() call
    if (!this.returnedManagers.contains(wkrMgrId)
        && superStep == this.getSuperStep()) {
      // Checking if any managers are yet to report completion
      if (this.getParticipatingMgrs() > 0) {

        logger.info("Worker manager : " + wkrMgrId
            + " has reported completion of superstep : "
            + this.getSuperStep());
        this.setParticipatingMgrs(this.getParticipatingMgrs() - 1);
        this.returnedManagers.add(wkrMgrId);

        if (this.getParticipatingMgrs() == 0) {
          logger.info("All worker managers reported completion of superstep : "
              + this.getSuperStep());
          if (this.isCheckPoint() && this.isActive()) {
            logger.info("#############################");
            logger.info("Checkpointed data at superstep : "
                + this.getSuperStep());
            logger.info("#############################");
            this.setLastCheckPoint(this.getSuperStep());

          }
          setAllDone(true);
        }
      }
    }

  }

  /**
   * @param b
   */
  private synchronized void setAllDone(boolean allDone) {
    this.allDone = allDone;

  }

}
TOP

Related Classes of system.MasterImpl$FaultDetector

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.