Package org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore

Examples of org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.ApplicationState


    }
  }
 
  @Override
  public void recover(RMState state) {
    ApplicationState appState = state.getApplicationState().get(getApplicationId());
    LOG.info("Recovering app: " + getApplicationId() + " with " +
            + appState.getAttemptCount() + " attempts");
    for(int i=0; i<appState.getAttemptCount(); ++i) {
      // create attempt
      createNewAttempt(false);
      // recover attempt
      ((RMAppAttemptImpl) currentAttempt).recover(state);
    }
View Full Code Here


    Assert.assertEquals(0, rmAppState.size());
       
    // create app that gets launched and does allocate before RM restart
    RMApp app1 = rm1.submitApp(200);
    // assert app1 info is saved
    ApplicationState appState = rmAppState.get(app1.getApplicationId());
    Assert.assertNotNull(appState);
    Assert.assertEquals(0, appState.getAttemptCount());
    Assert.assertEquals(appState.getApplicationSubmissionContext()
        .getApplicationId(), app1.getApplicationSubmissionContext()
        .getApplicationId());

    //kick the scheduling to allocate AM container
    nm1.nodeHeartbeat(true);
   
    // assert app1 attempt is saved
    RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
    ApplicationAttemptId attemptId1 = attempt1.getAppAttemptId();
    rm1.waitForState(attemptId1, RMAppAttemptState.ALLOCATED);
    Assert.assertEquals(1, appState.getAttemptCount());
    ApplicationAttemptState attemptState =
                                appState.getAttempt(attemptId1);
    Assert.assertNotNull(attemptState);
    Assert.assertEquals(BuilderUtils.newContainerId(attemptId1, 1),
                        attemptState.getMasterContainer().getId());
   
    // launch the AM
    MockAM am1 = rm1.sendAMLaunched(attempt1.getAppAttemptId());
    am1.registerAppAttempt();

    // AM request for containers
    am1.allocate("h1" , 1000, 1, new ArrayList<ContainerId>());   
    // kick the scheduler
    nm1.nodeHeartbeat(true);
    List<Container> conts = am1.allocate(new ArrayList<ResourceRequest>(),
        new ArrayList<ContainerId>()).getAllocatedContainers();
    while (conts.size() == 0) {
      nm1.nodeHeartbeat(true);
      conts.addAll(am1.allocate(new ArrayList<ResourceRequest>(),
          new ArrayList<ContainerId>()).getAllocatedContainers());
      Thread.sleep(500);
    }
   
    // create app that does not get launched by RM before RM restart
    RMApp app2 = rm1.submitApp(200);

    // assert app2 info is saved
    appState = rmAppState.get(app2.getApplicationId());
    Assert.assertNotNull(appState);
    Assert.assertEquals(0, appState.getAttemptCount());
    Assert.assertEquals(appState.getApplicationSubmissionContext()
        .getApplicationId(), app2.getApplicationSubmissionContext()
        .getApplicationId());
   
    // create unmanaged app
    RMApp appUnmanaged = rm1.submitApp(200, "someApp", "someUser", null, true, null);
    ApplicationAttemptId unmanagedAttemptId =
                        appUnmanaged.getCurrentAppAttempt().getAppAttemptId();
    // assert appUnmanaged info is saved
    ApplicationId unmanagedAppId = appUnmanaged.getApplicationId();
    appState = rmAppState.get(unmanagedAppId);
    Assert.assertNotNull(appState);
    // wait for attempt to reach LAUNCHED state
    rm1.waitForState(unmanagedAttemptId, RMAppAttemptState.LAUNCHED);
    rm1.waitForState(unmanagedAppId, RMAppState.ACCEPTED);
    // assert unmanaged attempt info is saved
    Assert.assertEquals(1, appState.getAttemptCount());
    Assert.assertEquals(appState.getApplicationSubmissionContext()
        .getApplicationId(), appUnmanaged.getApplicationSubmissionContext()
        .getApplicationId())
   
   
    // PHASE 2: create new RM and start from old state
   
    // create new RM to represent restart and recover state
    MockRM rm2 = new MockRM(conf, memStore);
   
    // start new RM
    rm2.start();
   
    // change NM to point to new RM
    nm1.setResourceTrackerService(rm2.getResourceTrackerService());
    nm2.setResourceTrackerService(rm2.getResourceTrackerService());

    // verify load of old state
    // only 2 apps are loaded since unmanaged app is not loaded back since it
    // cannot be restarted by the RM this will change with work preserving RM
    // restart in which AMs/NMs are not rebooted
    Assert.assertEquals(2, rm2.getRMContext().getRMApps().size());
   
    // verify correct number of attempts and other data
    RMApp loadedApp1 = rm2.getRMContext().getRMApps().get(app1.getApplicationId());
    Assert.assertNotNull(loadedApp1);
    //Assert.assertEquals(1, loadedApp1.getAppAttempts().size());
    Assert.assertEquals(app1.getApplicationSubmissionContext()
        .getApplicationId(), loadedApp1.getApplicationSubmissionContext()
        .getApplicationId());
   
    RMApp loadedApp2 = rm2.getRMContext().getRMApps().get(app2.getApplicationId());
    Assert.assertNotNull(loadedApp2);
    //Assert.assertEquals(0, loadedApp2.getAppAttempts().size());
    Assert.assertEquals(app2.getApplicationSubmissionContext()
        .getApplicationId(), loadedApp2.getApplicationSubmissionContext()
        .getApplicationId());
   
    // verify state machine kicked into expected states
    rm2.waitForState(loadedApp1.getApplicationId(), RMAppState.ACCEPTED);
    rm2.waitForState(loadedApp2.getApplicationId(), RMAppState.ACCEPTED);
   
    // verify new attempts created
    Assert.assertEquals(2, loadedApp1.getAppAttempts().size());
    Assert.assertEquals(1, loadedApp2.getAppAttempts().size());
   
    // verify old AM is not accepted
    // change running AM to talk to new RM
    am1.setAMRMProtocol(rm2.getApplicationMasterService());
    AMResponse amResponse = am1.allocate(new ArrayList<ResourceRequest>(),
        new ArrayList<ContainerId>());
    Assert.assertTrue(amResponse.getReboot());
   
    // NM should be rebooted on heartbeat, even first heartbeat for nm2
    HeartbeatResponse hbResponse = nm1.nodeHeartbeat(true);
    Assert.assertEquals(NodeAction.REBOOT, hbResponse.getNodeAction());
    hbResponse = nm2.nodeHeartbeat(true);
    Assert.assertEquals(NodeAction.REBOOT, hbResponse.getNodeAction());
   
    // new NM to represent NM re-register
    nm1 = rm2.registerNode("h1:1234", 15120);
    nm2 = rm2.registerNode("h2:5678", 15120);

    // verify no more reboot response sent
    hbResponse = nm1.nodeHeartbeat(true);
    Assert.assertTrue(NodeAction.REBOOT != hbResponse.getNodeAction());
    hbResponse = nm2.nodeHeartbeat(true);
    Assert.assertTrue(NodeAction.REBOOT != hbResponse.getNodeAction());
   
    // assert app1 attempt is saved
    attempt1 = loadedApp1.getCurrentAppAttempt();
    attemptId1 = attempt1.getAppAttemptId();
    rm2.waitForState(attemptId1, RMAppAttemptState.ALLOCATED);
    appState = rmAppState.get(loadedApp1.getApplicationId());
    attemptState = appState.getAttempt(attemptId1);
    Assert.assertNotNull(attemptState);
    Assert.assertEquals(BuilderUtils.newContainerId(attemptId1, 1),
                        attemptState.getMasterContainer().getId());

    // Nodes on which the AM's run
    MockNM am1Node = nm1;
    if(attemptState.getMasterContainer().getNodeId().toString().contains("h2")){
      am1Node = nm2;
    }

    // assert app2 attempt is saved
    RMAppAttempt attempt2 = loadedApp2.getCurrentAppAttempt();
    ApplicationAttemptId attemptId2 = attempt2.getAppAttemptId();
    rm2.waitForState(attemptId2, RMAppAttemptState.ALLOCATED);
    appState = rmAppState.get(loadedApp2.getApplicationId());
    attemptState = appState.getAttempt(attemptId2);
    Assert.assertNotNull(attemptState);
    Assert.assertEquals(BuilderUtils.newContainerId(attemptId2, 1),
                        attemptState.getMasterContainer().getId());

    MockNM am2Node = nm1;
View Full Code Here

    }
  }

  @Override
  public void recover(RMState state) {
    ApplicationState appState =
        state.getApplicationState().get(getAppAttemptId().getApplicationId());
    ApplicationAttemptState attemptState = appState.getAttempt(getAppAttemptId());
    assert attemptState != null;
    setMasterContainer(attemptState.getMasterContainer());
    LOG.info("Recovered attempt: AppId: " + getAppAttemptId().getApplicationId()
             + " AttemptId: " + getAppAttemptId()
             + " MasterContainer: " + masterContainer);
View Full Code Here

    Map<ApplicationId, ApplicationState> rmAppState = state.getApplicationState();

    // removed app or orphan attempt is not loaded
    assertEquals(1, rmAppState.size());

    ApplicationState appState = rmAppState.get(appId1);
    // app is loaded
    assertNotNull(appState);
    // app is loaded correctly
    assertEquals(submitTime, appState.getSubmitTime());
    // submission context is loaded correctly
    assertEquals(appId1,
                 appState.getApplicationSubmissionContext().getApplicationId());
    ApplicationAttemptState attemptState = appState.getAttempt(attemptId1);
    // attempt1 is loaded correctly
    assertNotNull(attemptState);
    assertEquals(attemptId1, attemptState.getAttemptId());
    // attempt1 container is loaded correctly
    assertEquals(containerId1, attemptState.getMasterContainer().getId());
    attemptState = appState.getAttempt(attemptId2);
    // attempt2 is loaded correctly
    assertNotNull(attemptState);
    assertEquals(attemptId2, attemptState.getAttemptId());
    // attempt2 container is loaded correctly
    assertEquals(containerId2, attemptState.getMasterContainer().getId());
View Full Code Here

    }
  }

  @Override
  public void recover(RMState state) throws Exception {
    ApplicationState appState =
        state.getApplicationState().get(getAppAttemptId().getApplicationId());
    ApplicationAttemptState attemptState =
        appState.getAttempt(getAppAttemptId());
    assert attemptState != null;
    LOG.info("Recovering attempt: " + getAppAttemptId() + " with final state: "
        + attemptState.getState());
    diagnostics.append("Attempt recovered after RM restart");
    diagnostics.append(attemptState.getDiagnostics());
View Full Code Here

    }
  }

  @Override
  public void recover(RMState state) throws Exception{
    ApplicationState appState = state.getApplicationState().get(getApplicationId());
    this.recoveredFinalState = appState.getState();
    LOG.info("Recovering app: " + getApplicationId() + " with " +
        + appState.getAttemptCount() + " attempts and final state = " + this.recoveredFinalState );
    this.diagnostics.append(appState.getDiagnostics());
    this.storedFinishTime = appState.getFinishTime();
    this.startTime = appState.getStartTime();

    for(int i=0; i<appState.getAttemptCount(); ++i) {
      // create attempt
      createNewAttempt();
      ((RMAppAttemptImpl)this.currentAttempt).recover(state);
    }
  }
View Full Code Here

      diags = getAppKilledDiagnostics();
      break;
    default:
      break;
    }
    ApplicationState appState =
        new ApplicationState(this.submitTime, this.startTime,
          this.submissionContext, this.user, stateToBeStored, diags,
          this.storedFinishTime);
    this.rmContext.getStateStore().updateApplicationState(appState);
  }
View Full Code Here

    Assert.assertEquals(0, rmAppState.size());
       
    // create app that gets launched and does allocate before RM restart
    RMApp app1 = rm1.submitApp(200);
    // assert app1 info is saved
    ApplicationState appState = rmAppState.get(app1.getApplicationId());
    Assert.assertNotNull(appState);
    Assert.assertEquals(0, appState.getAttemptCount());
    Assert.assertEquals(appState.getApplicationSubmissionContext()
        .getApplicationId(), app1.getApplicationSubmissionContext()
        .getApplicationId());

    //kick the scheduling to allocate AM container
    nm1.nodeHeartbeat(true);
   
    // assert app1 attempt is saved
    RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
    ApplicationAttemptId attemptId1 = attempt1.getAppAttemptId();
    rm1.waitForState(attemptId1, RMAppAttemptState.ALLOCATED);
    Assert.assertEquals(1, appState.getAttemptCount());
    ApplicationAttemptState attemptState =
                                appState.getAttempt(attemptId1);
    Assert.assertNotNull(attemptState);
    Assert.assertEquals(BuilderUtils.newContainerId(attemptId1, 1),
                        attemptState.getMasterContainer().getId());
   
    // launch the AM
    MockAM am1 = rm1.sendAMLaunched(attempt1.getAppAttemptId());
    am1.registerAppAttempt();

    // AM request for containers
    am1.allocate("h1" , 1000, 1, new ArrayList<ContainerId>());   
    // kick the scheduler
    nm1.nodeHeartbeat(true);
    List<Container> conts = am1.allocate(new ArrayList<ResourceRequest>(),
        new ArrayList<ContainerId>()).getAllocatedContainers();
    while (conts.size() == 0) {
      nm1.nodeHeartbeat(true);
      conts.addAll(am1.allocate(new ArrayList<ResourceRequest>(),
          new ArrayList<ContainerId>()).getAllocatedContainers());
      Thread.sleep(500);
    }
   
    // create app that does not get launched by RM before RM restart
    RMApp app2 = rm1.submitApp(200);

    // assert app2 info is saved
    appState = rmAppState.get(app2.getApplicationId());
    Assert.assertNotNull(appState);
    Assert.assertEquals(0, appState.getAttemptCount());
    Assert.assertEquals(appState.getApplicationSubmissionContext()
        .getApplicationId(), app2.getApplicationSubmissionContext()
        .getApplicationId());
   
    // create unmanaged app
    RMApp appUnmanaged = rm1.submitApp(200, "", "", null, true);
    ApplicationAttemptId unmanagedAttemptId =
                        appUnmanaged.getCurrentAppAttempt().getAppAttemptId();
    // assert appUnmanaged info is saved
    ApplicationId unmanagedAppId = appUnmanaged.getApplicationId();
    appState = rmAppState.get(unmanagedAppId);
    Assert.assertNotNull(appState);
    // wait for attempt to reach LAUNCHED state
    rm1.waitForState(unmanagedAttemptId, RMAppAttemptState.LAUNCHED);
    rm1.waitForState(unmanagedAppId, RMAppState.ACCEPTED);
    // assert unmanaged attempt info is saved
    Assert.assertEquals(1, appState.getAttemptCount());
    Assert.assertEquals(appState.getApplicationSubmissionContext()
        .getApplicationId(), appUnmanaged.getApplicationSubmissionContext()
        .getApplicationId())
   
   
    // PHASE 2: create new RM and start from old state
   
    // create new RM to represent restart and recover state
    MockRM rm2 = new MockRM(conf, memStore);
   
    // start new RM
    rm2.start();
   
    // change NM to point to new RM
    nm1.setResourceTrackerService(rm2.getResourceTrackerService());
    nm2.setResourceTrackerService(rm2.getResourceTrackerService());

    // verify load of old state
    // only 2 apps are loaded since unmanaged app is not loaded back since it
    // cannot be restarted by the RM this will change with work preserving RM
    // restart in which AMs/NMs are not rebooted
    Assert.assertEquals(2, rm2.getRMContext().getRMApps().size());
   
    // verify correct number of attempts and other data
    RMApp loadedApp1 = rm2.getRMContext().getRMApps().get(app1.getApplicationId());
    Assert.assertNotNull(loadedApp1);
    //Assert.assertEquals(1, loadedApp1.getAppAttempts().size());
    Assert.assertEquals(app1.getApplicationSubmissionContext()
        .getApplicationId(), loadedApp1.getApplicationSubmissionContext()
        .getApplicationId());
   
    RMApp loadedApp2 = rm2.getRMContext().getRMApps().get(app2.getApplicationId());
    Assert.assertNotNull(loadedApp2);
    //Assert.assertEquals(0, loadedApp2.getAppAttempts().size());
    Assert.assertEquals(app2.getApplicationSubmissionContext()
        .getApplicationId(), loadedApp2.getApplicationSubmissionContext()
        .getApplicationId());
   
    // verify state machine kicked into expected states
    rm2.waitForState(loadedApp1.getApplicationId(), RMAppState.ACCEPTED);
    rm2.waitForState(loadedApp2.getApplicationId(), RMAppState.ACCEPTED);
   
    // verify new attempts created
    Assert.assertEquals(2, loadedApp1.getAppAttempts().size());
    Assert.assertEquals(1, loadedApp2.getAppAttempts().size());
   
    // verify old AM is not accepted
    // change running AM to talk to new RM
    am1.setAMRMProtocol(rm2.getApplicationMasterService());
    AMResponse amResponse = am1.allocate(new ArrayList<ResourceRequest>(),
        new ArrayList<ContainerId>());
    Assert.assertTrue(amResponse.getReboot());
   
    // NM should be rebooted on heartbeat, even first heartbeat for nm2
    HeartbeatResponse hbResponse = nm1.nodeHeartbeat(true);
    Assert.assertEquals(NodeAction.REBOOT, hbResponse.getNodeAction());
    hbResponse = nm2.nodeHeartbeat(true);
    Assert.assertEquals(NodeAction.REBOOT, hbResponse.getNodeAction());
   
    // new NM to represent NM re-register
    nm1 = rm2.registerNode("h1:1234", 15120);
    nm2 = rm2.registerNode("h2:5678", 15120);

    // verify no more reboot response sent
    hbResponse = nm1.nodeHeartbeat(true);
    Assert.assertTrue(NodeAction.REBOOT != hbResponse.getNodeAction());
    hbResponse = nm2.nodeHeartbeat(true);
    Assert.assertTrue(NodeAction.REBOOT != hbResponse.getNodeAction());
   
    // assert app1 attempt is saved
    attempt1 = loadedApp1.getCurrentAppAttempt();
    attemptId1 = attempt1.getAppAttemptId();
    rm2.waitForState(attemptId1, RMAppAttemptState.ALLOCATED);
    appState = rmAppState.get(loadedApp1.getApplicationId());
    attemptState = appState.getAttempt(attemptId1);
    Assert.assertNotNull(attemptState);
    Assert.assertEquals(BuilderUtils.newContainerId(attemptId1, 1),
                        attemptState.getMasterContainer().getId());

    // Nodes on which the AM's run
    MockNM am1Node = nm1;
    if(attemptState.getMasterContainer().getNodeId().toString().contains("h2")){
      am1Node = nm2;
    }

    // assert app2 attempt is saved
    RMAppAttempt attempt2 = loadedApp2.getCurrentAppAttempt();
    ApplicationAttemptId attemptId2 = attempt2.getAppAttemptId();
    rm2.waitForState(attemptId2, RMAppAttemptState.ALLOCATED);
    appState = rmAppState.get(loadedApp2.getApplicationId());
    attemptState = appState.getAttempt(attemptId2);
    Assert.assertNotNull(attemptState);
    Assert.assertEquals(BuilderUtils.newContainerId(attemptId2, 1),
                        attemptState.getMasterContainer().getId());

    MockNM am2Node = nm1;
View Full Code Here

    }
  }
 
  @Override
  public void recover(RMState state) {
    ApplicationState appState = state.getApplicationState().get(getApplicationId());
    LOG.info("Recovering app: " + getApplicationId() + " with " +
            + appState.getAttemptCount() + " attempts");
    for(int i=0; i<appState.getAttemptCount(); ++i) {
      // create attempt
      createNewAttempt(false);
      // recover attempt
      ((RMAppAttemptImpl) currentAttempt).recover(state);
    }
View Full Code Here

    }
  }

  @Override
  public void recover(RMState state) {
    ApplicationState appState =
        state.getApplicationState().get(getAppAttemptId().getApplicationId());
    ApplicationAttemptState attemptState = appState.getAttempt(getAppAttemptId());
    assert attemptState != null;
    setMasterContainer(attemptState.getMasterContainer());
    LOG.info("Recovered attempt: AppId: " + getAppAttemptId().getApplicationId()
             + " AttemptId: " + getAppAttemptId()
             + " MasterContainer: " + masterContainer);
View Full Code Here

TOP

Related Classes of org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.ApplicationState

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.