Package org.apache.hadoop.yarn.server.api.records

Examples of org.apache.hadoop.yarn.server.api.records.HeartbeatResponse


    NodeHeartbeatResponse nodeHeartBeatResponse = recordFactory
        .newRecordInstance(NodeHeartbeatResponse.class);
   
    // 3. Check if it's a 'fresh' heartbeat i.e. not duplicate heartbeat
    HeartbeatResponse lastHeartbeatResponse = rmNode.getLastHeartBeatResponse();
    if (remoteNodeStatus.getResponseId() + 1 == lastHeartbeatResponse
        .getResponseId()) {
      LOG.info("Received duplicate heartbeat from node "
          + rmNode.getNodeAddress());
      nodeHeartBeatResponse.setHeartbeatResponse(lastHeartbeatResponse);
      return nodeHeartBeatResponse;
    } else if (remoteNodeStatus.getResponseId() + 1 < lastHeartbeatResponse
        .getResponseId()) {
      LOG.info("Too far behind rm response id:"
          + lastHeartbeatResponse.getResponseId() + " nm response id:"
          + remoteNodeStatus.getResponseId());
      // TODO: Just sending reboot is not enough. Think more.
      this.rmContext.getDispatcher().getEventHandler().handle(
          new RMNodeEvent(nodeId, RMNodeEventType.REBOOTING));
      return reboot;
    }

    // Heartbeat response
    HeartbeatResponse latestResponse = recordFactory
        .newRecordInstance(HeartbeatResponse.class);
    latestResponse.setResponseId(lastHeartbeatResponse.getResponseId() + 1);
    rmNode.updateHeartbeatResponseForCleanup(latestResponse);
    latestResponse.setNodeAction(NodeAction.NORMAL);

    // Check if node's masterKey needs to be updated and if the currentKey has
    // roller over, send it across
    if (isSecurityEnabled()) {

      boolean shouldSendMasterKey = false;

      MasterKey nextMasterKeyForNode =
          this.containerTokenSecretManager.getNextKey();
      if (nextMasterKeyForNode != null) {
        // nextMasterKeyForNode can be null if there is no outstanding key that
        // is in the activation period.
        MasterKey nodeKnownMasterKey = request.getLastKnownMasterKey();
        if (nodeKnownMasterKey.getKeyId() != nextMasterKeyForNode.getKeyId()) {
          shouldSendMasterKey = true;
        }
      }
      if (shouldSendMasterKey) {
        latestResponse.setMasterKey(nextMasterKeyForNode);
      }
    }

    // 4. Send status to RMNode, saving the latest response.
    this.rmContext.getDispatcher().getEventHandler().handle(
View Full Code Here


    int request = 2;
    am.allocate("h1" , 1000, request,
        new ArrayList<ContainerId>());
   
    //kick the scheduler
    HeartbeatResponse resp = nm1.nodeHeartbeat(true);
    List<Container> conts = am.allocate(new ArrayList<ResourceRequest>(),
        new ArrayList<ContainerId>()).getAllocatedContainers();
    int contReceived = conts.size();
    int waitCount = 0;
    while (contReceived < request && waitCount++ < 200) {
      LOG.info("Got " + contReceived + " containers. Waiting to get "
               + request);
      Thread.sleep(100);
      conts = am.allocate(new ArrayList<ResourceRequest>(),
          new ArrayList<ContainerId>()).getAllocatedContainers();
      contReceived += conts.size();
    }
    Assert.assertEquals(request, contReceived);
   
    am.unregisterAppAttempt();
    am.waitForState(RMAppAttemptState.FINISHED);

    //currently only containers are cleaned via this
    //AM container is cleaned via container launcher
    List<ContainerId> contsToClean = resp.getContainersToCleanupList();
    List<ApplicationId> apps = resp.getApplicationsToCleanupList();
    int cleanedConts = contsToClean.size();
    int cleanedApps = apps.size();
    waitCount = 0;
    while ((cleanedConts < 3 || cleanedApps < 1) && waitCount++ < 200) {
      LOG.info("Waiting to get cleanup events.. cleanedConts: "
          + cleanedConts + " cleanedApps: " + cleanedApps);
      Thread.sleep(100);
      resp = nm1.nodeHeartbeat(true);
      contsToClean = resp.getContainersToCleanupList();
      apps = resp.getApplicationsToCleanupList();
      cleanedConts += contsToClean.size();
      cleanedApps += apps.size();
    }
   
    Assert.assertEquals(1, apps.size());
View Full Code Here

        new ArrayList<ContainerStatus>();
    containerStatusList.add(BuilderUtils.newContainerStatus(conts.get(0)
      .getId(), ContainerState.RUNNING, "nothing", 0));
    containerStatuses.put(app.getApplicationId(), containerStatusList);

    HeartbeatResponse resp = nm1.nodeHeartbeat(containerStatuses, true);
    dispatcher.await();
    List<ContainerId> contsToClean = resp.getContainersToCleanupList();
    int cleanedConts = contsToClean.size();
    waitCount = 0;
    while (cleanedConts < 1 && waitCount++ < 200) {
      LOG.info("Waiting to get cleanup events.. cleanedConts: " + cleanedConts);
      Thread.sleep(100);
      resp = nm1.nodeHeartbeat(true);
      dispatcher.await();
      contsToClean = resp.getContainersToCleanupList();
      cleanedConts += contsToClean.size();
    }
    LOG.info("Got cleanup for " + contsToClean.get(0));
    Assert.assertEquals(1, cleanedConts);

    // Now to test the case when RM already gave cleanup, and NM suddenly
    // realizes that the container is running.
    LOG.info("Testing container launch much after release and "
        + "NM getting cleanup");
    containerStatuses.clear();
    containerStatusList.clear();
    containerStatusList.add(BuilderUtils.newContainerStatus(conts.get(0)
      .getId(), ContainerState.RUNNING, "nothing", 0));
    containerStatuses.put(app.getApplicationId(), containerStatusList);

    resp = nm1.nodeHeartbeat(containerStatuses, true);
    dispatcher.await();
    contsToClean = resp.getContainersToCleanupList();
    cleanedConts = contsToClean.size();
    // The cleanup list won't be instantaneous as it is given out by scheduler
    // and not RMNodeImpl.
    waitCount = 0;
    while (cleanedConts < 1 && waitCount++ < 200) {
      LOG.info("Waiting to get cleanup events.. cleanedConts: " + cleanedConts);
      Thread.sleep(100);
      resp = nm1.nodeHeartbeat(true);
      dispatcher.await();
      contsToClean = resp.getContainersToCleanupList();
      cleanedConts += contsToClean.size();
    }
    LOG.info("Got cleanup for " + contsToClean.get(0));
    Assert.assertEquals(1, cleanedConts);
View Full Code Here

  @After
  public void tearDown() throws Exception {
  }
 
  private RMNodeStatusEvent getMockRMNodeStatusEvent() {
    HeartbeatResponse response = mock(HeartbeatResponse.class);

    NodeHealthStatus healthStatus = mock(NodeHealthStatus.class);
    Boolean yes = new Boolean(true);
    doReturn(yes).when(healthStatus).getIsNodeHealthy();
   
View Full Code Here

    // but updating heartbeat response for cleanup does
    RMNodeStatusEvent statusEvent = getMockRMNodeStatusEvent();
    node.handle(statusEvent);
    Assert.assertEquals(1, node.getContainersToCleanUp().size());
    Assert.assertEquals(1, node.getAppsToCleanup().size());
    HeartbeatResponse hbrsp = Records.newRecord(HeartbeatResponse.class);
    node.updateHeartbeatResponseForCleanup(hbrsp);
    Assert.assertEquals(0, node.getContainersToCleanUp().size());
    Assert.assertEquals(0, node.getAppsToCleanup().size());
    Assert.assertEquals(1, hbrsp.getContainersToCleanupCount());
    Assert.assertEquals(completedContainerId, hbrsp.getContainerToCleanup(0));
    Assert.assertEquals(1, hbrsp.getApplicationsToCleanupCount());
    Assert.assertEquals(finishedAppId, hbrsp.getApplicationsToCleanup(0));
  }
View Full Code Here

   
    ClusterMetrics metrics = ClusterMetrics.getMetrics();
    assert(metrics != null);
    int initialMetricCount = metrics.getNumDecommisionedNMs();

    HeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true);
    Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction()));
    nodeHeartbeat = nm2.nodeHeartbeat(true);
    Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction()));

    writeToHostsFile("host1");

    rm.getNodesListManager().refreshNodes(conf);

    nodeHeartbeat = nm1.nodeHeartbeat(true);
    Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction()));
    Assert
        .assertEquals(0, ClusterMetrics.getMetrics().getNumDecommisionedNMs());

    nodeHeartbeat = nm2.nodeHeartbeat(true);
    Assert.assertTrue("Node is not decommisioned.", NodeAction.SHUTDOWN
        .equals(nodeHeartbeat.getNodeAction()));

    checkDecommissionedNMCount(rm, ++initialMetricCount);
  }
View Full Code Here

    MockNM nm2 = rm.registerNode("host2:5678", 10240);

    int initialMetricCount = ClusterMetrics.getMetrics()
        .getNumDecommisionedNMs();

    HeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true);
    Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction()));
    nodeHeartbeat = nm2.nodeHeartbeat(true);
    Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction()));

    writeToHostsFile("host2");

    rm.getNodesListManager().refreshNodes(conf);

    nodeHeartbeat = nm1.nodeHeartbeat(true);
    Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction()));
    nodeHeartbeat = nm2.nodeHeartbeat(true);
    Assert.assertTrue("The decommisioned metrics are not updated",
        NodeAction.SHUTDOWN.equals(nodeHeartbeat.getNodeAction()));
    checkDecommissionedNMCount(rm, ++initialMetricCount);
  }
View Full Code Here

    MockNM nm1 = rm.registerNode("host1:1234", 5120);
    MockNM nm2 = rm.registerNode("host2:5678", 10240);
    ClusterMetrics metrics = ClusterMetrics.getMetrics();
    assert(metrics != null);
    int initialMetricCount = metrics.getNumDecommisionedNMs();
    HeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true);
    Assert.assertEquals(
        NodeAction.NORMAL,
        nodeHeartbeat.getNodeAction());
    nodeHeartbeat = nm2.nodeHeartbeat(true);
    Assert.assertEquals(
        NodeAction.NORMAL,
        nodeHeartbeat.getNodeAction());
    writeToHostsFile("host1");
    conf.set(YarnConfiguration.RM_NODES_INCLUDE_FILE_PATH, hostFile
        .getAbsolutePath());
    rm.getNodesListManager().refreshNodes(conf);
    nodeHeartbeat = nm1.nodeHeartbeat(true);
    Assert.assertEquals(
        "Node should not have been decomissioned.",
        NodeAction.NORMAL,
        nodeHeartbeat.getNodeAction());
    nodeHeartbeat = nm2.nodeHeartbeat(true);
    Assert.assertEquals("Node should have been decomissioned but is in state" +
        nodeHeartbeat.getNodeAction(),
        NodeAction.SHUTDOWN, nodeHeartbeat.getNodeAction());
    checkDecommissionedNMCount(rm, ++initialMetricCount);
  }
View Full Code Here

    MockNM nm1 = rm.registerNode("host1:1234", 5120);
    MockNM nm2 = rm.registerNode("host2:5678", 10240);
    ClusterMetrics metrics = ClusterMetrics.getMetrics();
    assert(metrics != null);
    int initialMetricCount = metrics.getNumDecommisionedNMs();
    HeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true);
    Assert.assertEquals(
        NodeAction.NORMAL,
        nodeHeartbeat.getNodeAction());
    nodeHeartbeat = nm2.nodeHeartbeat(true);
    Assert.assertEquals(
        NodeAction.NORMAL,
        nodeHeartbeat.getNodeAction());
    writeToHostsFile("host2");
    conf.set(YarnConfiguration.RM_NODES_EXCLUDE_FILE_PATH, hostFile
        .getAbsolutePath());
    rm.getNodesListManager().refreshNodes(conf);
    nodeHeartbeat = nm1.nodeHeartbeat(true);
    Assert.assertEquals(
        "Node should not have been decomissioned.",
        NodeAction.NORMAL,
        nodeHeartbeat.getNodeAction());
    nodeHeartbeat = nm2.nodeHeartbeat(true);
    Assert.assertEquals("Node should have been decomissioned but is in state" +
        nodeHeartbeat.getNodeAction(),
        NodeAction.SHUTDOWN, nodeHeartbeat.getNodeAction());
    checkDecommissionedNMCount(rm, ++initialMetricCount);
  }
View Full Code Here

    MockNM nm1 = rm.registerNode("host1:1234", 5120);
    MockNM nm2 = rm.registerNode("host2:1234", 2048);

    int initialMetricCount = ClusterMetrics.getMetrics().getNumRebootedNMs();
    HeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true);
    Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction()));

    nodeHeartbeat = nm2.nodeHeartbeat(
      new HashMap<ApplicationId, List<ContainerStatus>>(), true, -100);
    Assert.assertTrue(NodeAction.REBOOT.equals(nodeHeartbeat.getNodeAction()));
    checkRebootedNMCount(rm, ++initialMetricCount);
  }
View Full Code Here

TOP

Related Classes of org.apache.hadoop.yarn.server.api.records.HeartbeatResponse

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.