Package org.globus.workspace.testing.suites.failure

Source Code of org.globus.workspace.testing.suites.failure.TerminateSuite

/*
* Copyright 1999-2011 University of Chicago
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy
* of the License at
*
*    http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/

package org.globus.workspace.testing.suites.failure;

import static org.testng.AssertJUnit.assertFalse;
import static org.testng.AssertJUnit.assertEquals;
import static org.testng.AssertJUnit.assertNotNull;
import static org.testng.AssertJUnit.assertTrue;

import org.globus.workspace.testing.NimbusTestBase;
import org.globus.workspace.testing.NimbusTestContextLoader;
import org.globus.workspace.xen.xenssh.MockShutdownTrash;
import org.nimbustools.api.repr.Caller;
import org.nimbustools.api.repr.CreateRequest;
import org.nimbustools.api.repr.CreateResult;
import org.nimbustools.api.repr.vm.State;
import org.nimbustools.api.repr.vm.VM;
import org.nimbustools.api.services.rm.Manager;
import org.springframework.test.context.ContextConfiguration;
import org.testng.annotations.AfterSuite;
import org.testng.annotations.Test;


/**
* Tests that exercise the termination system, in particular the retry feature that makes it
* possible for the service to continue killing the nodes til they're actually dead and gone.
* Transient termination failures have been an issue (e.g. the VMM cannot be contacted at
* that moment).
*
* Now there are two new internal states introduced: DestroySucceeded and DestroyFailed.
* Only when DestroySucceeded has been reached will the workspace service remove leases etc.
* If the VM is in the DestroyFailed state, the resource sweeper will try to kill it.
*
* The TestNG "singleThreaded" setting does not seem to be taking (this may be an IDEA
* config issue), so instead each test below just "depends" on the previous one.  If they
* run simultaneously, they will fail because a static mock task is used and also the global
* VM count is examined.
*
* This "depends" situation has the unfortunate side effect of requiring you to change the
* depends to "prereqs" if you just want to run a single method over and over vs. running
* the whole suite.
*/
@ContextConfiguration(
        locations={"file:./service/service/java/tests/suites/failure/home/services/etc/nimbus/workspace-service/other/main.xml"},
        loader=NimbusTestContextLoader.class)
public class TerminateSuite extends NimbusTestBase {

    // -----------------------------------------------------------------------------------------
    // extends NimbusTestBase
    // -----------------------------------------------------------------------------------------

    @AfterSuite(alwaysRun=true)
    @Override
    public void suiteTeardown() throws Exception {
        super.suiteTeardown();
    }

    /**
     * This is how coordinate your Java test suite code with the conf files to use.
     * @return absolute path to the value that should be set for $NIMBUS_HOME
     * @throws Exception if $NIMBUS_HOME cannot be determined
     */
    @Override
    protected String getNimbusHome() throws Exception {
        return this.determineSuitesPath() + "/failure/home";
    }


    // -----------------------------------------------------------------------------------------
    // PREREQ TESTS (if any of these fail, nothing else will work at all)
    // -----------------------------------------------------------------------------------------

    /**
     * Check if ModuleLocator can be retrieved and used at all.
     */
    @Test(groups="prereqs")
    public void retrieveModuleLocator() throws Exception {
        logger.debug("retrieveModuleLocator");
        final Manager rm = this.locator.getManager();
        final VM[] vms = rm.getGlobalAll();

        // we know there are zero so far because it is in group 'prereqs'
        assertEquals(0, vms.length);
    }


    // -----------------------------------------------------------------------------------------
    // NORMAL TERMINATE BEHAVIOR
    // -----------------------------------------------------------------------------------------

    /**
     * Lease a VM and then destroy it.
     */
    @Test(groups="basic1", dependsOnGroups="prereqs")
    public void leaseOne() throws Exception {
        logger.debug("leaseOne");
        final Manager rm = this.locator.getManager();

        VM[] allvms = rm.getGlobalAll();
        assertEquals(0, allvms.length);

        final Caller caller = this.populator().getCaller();
        final CreateResult result =
                rm.create(this.populator().getCreateRequest("suite:failure:leaseOne"),
                          caller);

        final VM[] vms = result.getVMs();
        assertEquals(1, vms.length);
        assertNotNull(vms[0]);
        logger.info("Leased vm '" + vms[0].getID() + '\'');

        assertTrue(rm.exists(vms[0].getID(), Manager.INSTANCE));

        Thread.sleep(4000L);
        rm.trash(vms[0].getID(), Manager.INSTANCE, caller);

        Thread.sleep(1000L);
       
        allvms = rm.getGlobalAll();
        assertEquals(0, allvms.length);

    }

    /**
     * Lease a number of VMs and then destroy them all at once.
     */
    @Test(groups="basic5", dependsOnGroups="basic1")
    public void leaseMany() throws Exception {
        logger.debug("leaseMany");
        final Manager rm = this.locator.getManager();

        VM[] allvms = rm.getGlobalAll();
        assertEquals(0, allvms.length);

        final int durationSecs = 240;
        final int memory = 64;
        final int numNodes = 5;
        final CreateRequest creq =
                this.populator().getCreateRequest("suite:failure:leaseMany",
                                                  durationSecs, memory, numNodes);
        final Caller caller = this.populator().getCaller();
        final CreateResult result = rm.create(creq, caller);

        final VM[] vms = result.getVMs();
        assertEquals(5, vms.length);
        for (int i = 0; i < vms.length; i++) {
            assertNotNull(vms[i]);
            logger.info("Leased vm '" + vms[i].getID() + '\'');
            assertTrue(rm.exists(vms[i].getID(), Manager.INSTANCE));
        }

        Thread.sleep(2000L);
       
        for (int i = 0; i < vms.length; i++) {
            assertTrue(rm.exists(vms[i].getID(), Manager.INSTANCE));
        }
        allvms = rm.getGlobalAll();
        assertEquals(5, allvms.length);

        Thread.sleep(2000L);

        for (int i = 0; i < vms.length; i++) {
            rm.trash(vms[i].getID(), Manager.INSTANCE, caller);
        }

        Thread.sleep(2000L);

        allvms = rm.getGlobalAll();
        assertEquals(0, allvms.length);

        for (int i = 0; i < vms.length; i++) {
            assertFalse(rm.exists(vms[i].getID(), Manager.INSTANCE));
        }
    }

    /**
     * Lease a VM and then wait for it to expire
     */
    @Test(groups="basicwait", dependsOnGroups="basic5")
    public void leaseOneWait() throws Exception {
        logger.debug("leaseOneWait");
        final Manager rm = this.locator.getManager();

        VM[] allvms = rm.getGlobalAll();
        assertEquals(0, allvms.length);

        final int durationSecs = 5;
        final int memory = 64;
        final int numNodes = 1;
        final CreateRequest creq =
                this.populator().getCreateRequest("suite:failure:leaseOneWait",
                                                  durationSecs, memory, numNodes);
        final Caller caller = this.populator().getCaller();
        final CreateResult result = rm.create(creq, caller);

        final VM[] vms = result.getVMs();
        assertEquals(1, vms.length);
        assertNotNull(vms[0]);
        logger.info("Leased vm '" + vms[0].getID() + '\'');
        assertTrue(rm.exists(vms[0].getID(), Manager.INSTANCE));

        Thread.sleep(10000L);

        assertFalse(rm.exists(vms[0].getID(), Manager.INSTANCE));
        allvms = rm.getGlobalAll();
        assertEquals(0, allvms.length);
    }

    /**
     * Lease a VM and then unpropagate it.  Then allow to expire
     */
    @Test(groups="unpropWait", dependsOnGroups="basicwait")
    public void unpropOneWait() throws Exception {
        logger.debug("unpropOneWait");
        final Manager rm = this.locator.getManager();

        VM[] allvms = rm.getGlobalAll();
        assertEquals(0, allvms.length);

        final int durationSecs = 5;
        final int memory = 64;
        final int numNodes = 1;
        final CreateRequest creq =
                this.populator().getCreateRequest("suite:failure:unpropOneWait",
                                                  durationSecs, memory, numNodes);
        final Caller caller = this.populator().getCaller();
        final CreateResult result = rm.create(creq, caller);

        final VM[] vms = result.getVMs();
        assertEquals(1, vms.length);
        assertNotNull(vms[0]);
        logger.info("Leased vm '" + vms[0].getID() + '\'');
        assertTrue(rm.exists(vms[0].getID(), Manager.INSTANCE));

        Thread.sleep(1500L);

        allvms = rm.getGlobalAll();
        assertEquals(State.STATE_Running, allvms[0].getState().getState());

        // Unpropagate:
        rm.shutdownSave(vms[0].getID(), Manager.INSTANCE,
                        this.populator().getShutdownTasks() , caller);

        allvms = rm.getGlobalAll();
        assertEquals(1, allvms.length);
       
        // Wait for complete termination:
        Thread.sleep(15000L);

        assertFalse(rm.exists(vms[0].getID(), Manager.INSTANCE));
        allvms = rm.getGlobalAll();
        assertEquals(0, allvms.length);
    }


    // -----------------------------------------------------------------------------------------
    // FAILURE BEHAVIOR
    // -----------------------------------------------------------------------------------------

    /**
     * Lease one VM, kill it but trigger shutdown task to fail.  System should keep trying.
     * After a wait, trigger shutdown task to succeed.  Ensure it finally gets killed.
     */
    @Test(groups="basicfail1", dependsOnGroups="unpropWait")
    public void basicFail() throws Exception {
        logger.debug("basicFail");
        final Manager rm = this.locator.getManager();

        VM[] allvms = rm.getGlobalAll();
        assertEquals(0, allvms.length);
       
        MockShutdownTrash.resetFailCount();

        final Caller caller = this.populator().getCaller();
        final CreateResult result =
                rm.create(this.populator().getCreateRequest("suite:failure:basicFail"),
                          caller);

        final VM[] vms = result.getVMs();
        assertEquals(1, vms.length);
        assertNotNull(vms[0]);
        logger.info("Leased vm '" + vms[0].getID() + '\'');

        assertTrue(rm.exists(vms[0].getID(), Manager.INSTANCE));

        Thread.sleep(2000L);
        assertEquals(0, MockShutdownTrash.getFailCount());

        // Fail at killing the instance:
        MockShutdownTrash.setFail(true);
        logger.warn("Set to fail.");

        rm.trash(vms[0].getID(), Manager.INSTANCE, caller);
        Thread.sleep(4000L);

        // Check that it is at least trying to terminate the node:
        int lastFailCount = MockShutdownTrash.getFailCount();
        assertFalse(0 == lastFailCount);
       
        assertTrue(rm.exists(vms[0].getID(), Manager.INSTANCE));
        allvms = rm.getGlobalAll();
        assertEquals(1, allvms.length);

        Thread.sleep(4000L);
        assertTrue(rm.exists(vms[0].getID(), Manager.INSTANCE));
        allvms = rm.getGlobalAll();
        assertEquals(1, allvms.length);

        // Start succeeding to kill the instance:
        MockShutdownTrash.setFail(false);
        logger.warn("Set to succeed.");

        Thread.sleep(12000L);
       
        // Should now be gone:
        assertFalse(rm.exists(vms[0].getID(), Manager.INSTANCE));
        allvms = rm.getGlobalAll();
        assertEquals(0, allvms.length);
    }

    /**
     * More of the same idea as in basicFail() but working with more instances and situations
     */
    @Test(groups="complicatedfail", dependsOnGroups="basicfail1")
    public void complicatedFail() throws Exception {
        logger.debug("complicatedFail");
        final Manager rm = this.locator.getManager();

        VM[] allvms = rm.getGlobalAll();
        assertEquals(0, allvms.length);
       
        MockShutdownTrash.resetFailCount();

        final int durationSecs = 240;
        final int memory = 64;
        final int numNodes = 5;
        final CreateRequest creq =
                this.populator().getCreateRequest("suite:failure:complicatedFail",
                                                  durationSecs, memory, numNodes);
        final Caller caller = this.populator().getCaller();
        final CreateResult result = rm.create(creq, caller);

        final VM[] vms = result.getVMs();
        assertEquals(5, vms.length);
        for (int i = 0; i < vms.length; i++) {
            assertNotNull(vms[i]);
            logger.info("Leased vm '" + vms[i].getID() + '\'');
            assertTrue(rm.exists(vms[i].getID(), Manager.INSTANCE));
        }

        Thread.sleep(2000L);
        assertEquals(0, MockShutdownTrash.getFailCount());

        // Fail at killing the instances:
        MockShutdownTrash.setFail(true);
        logger.warn("Set to fail.");

        rm.trash(vms[0].getID(), Manager.INSTANCE, caller);
        rm.trash(vms[1].getID(), Manager.INSTANCE, caller);
        Thread.sleep(4000L);

        int lastFailCount = MockShutdownTrash.getFailCount();
        assertFalse(0 == lastFailCount);
        assertTrue(rm.exists(vms[0].getID(), Manager.INSTANCE));
        assertTrue(rm.exists(vms[1].getID(), Manager.INSTANCE));
        allvms = rm.getGlobalAll();
        assertEquals(5, allvms.length);

        Thread.sleep(4000L);
        assertTrue(rm.exists(vms[0].getID(), Manager.INSTANCE));
        assertTrue(rm.exists(vms[1].getID(), Manager.INSTANCE));
        allvms = rm.getGlobalAll();
        assertEquals(5, allvms.length);

        // Start succeeding to kill the instances:
        MockShutdownTrash.setFail(false);
        logger.warn("Set to succeed.");

        Thread.sleep(15000L);

        // Should now be gone:
        assertFalse(rm.exists(vms[0].getID(), Manager.INSTANCE));
        assertFalse(rm.exists(vms[1].getID(), Manager.INSTANCE));
        allvms = rm.getGlobalAll();
        assertEquals(3, allvms.length);

        // Start counting from scratch
        MockShutdownTrash.resetFailCount();

        // Add two in a new group, with very short duration
        final int durationSecs2 = 5;
        final int memory2 = 64;
        final int numNodes2 = 2;
        final CreateRequest creq2 =
                this.populator().getCreateRequest("suite:failure:complicatedFail2",
                                                  durationSecs2, memory2, numNodes2);
        final CreateResult result2 = rm.create(creq2, caller);

        VM[] vms2 = result2.getVMs();
        assertEquals(2, vms2.length);
        for (int i = 0; i < vms2.length; i++) {
            assertNotNull(vms2[i]);
            logger.info("Leased vm '" + vms2[i].getID() + '\'');
            assertTrue(rm.exists(vms2[i].getID(), Manager.INSTANCE));
        }

        allvms = rm.getGlobalAll();
        assertEquals(5, allvms.length);
       
        // Wait for them to terminate successfully
        Thread.sleep(8000L);

        assertEquals(0, MockShutdownTrash.getFailCount());

        // Should now be gone:
        assertFalse(rm.exists(vms2[0].getID(), Manager.INSTANCE));
        assertFalse(rm.exists(vms2[1].getID(), Manager.INSTANCE));
        allvms = rm.getGlobalAll();
        assertEquals(3, allvms.length);

        // Old ones should remain:
        assertTrue(rm.exists(vms[2].getID(), Manager.INSTANCE));
        assertTrue(rm.exists(vms[3].getID(), Manager.INSTANCE));
        assertTrue(rm.exists(vms[4].getID(), Manager.INSTANCE));

        // Fail at killing the instances:
        MockShutdownTrash.setFail(true);
        logger.warn("Set to fail.");

        // Try to kill all but one
        rm.trash(vms[2].getID(), Manager.INSTANCE, caller);
        rm.trash(vms[3].getID(), Manager.INSTANCE, caller);
        Thread.sleep(4000L);

        // Shouldn't work
        allvms = rm.getGlobalAll();
        assertEquals(3, allvms.length);
        assertTrue(rm.exists(vms[2].getID(), Manager.INSTANCE));
        assertTrue(rm.exists(vms[3].getID(), Manager.INSTANCE));
        assertTrue(rm.exists(vms[4].getID(), Manager.INSTANCE));

        // Start succeeding to kill the instances:
        MockShutdownTrash.setFail(false);
        logger.warn("Set to succeed.");

        Thread.sleep(8000L);

        // Should now be gone:
        assertFalse(rm.exists(vms[2].getID(), Manager.INSTANCE));
        assertFalse(rm.exists(vms[3].getID(), Manager.INSTANCE));
        allvms = rm.getGlobalAll();
        assertEquals(1, allvms.length);

        // And this one should now die right away:
        rm.trash(vms[4].getID(), Manager.INSTANCE, caller);
        assertFalse(rm.exists(vms[4].getID(), Manager.INSTANCE));
        allvms = rm.getGlobalAll();
        assertEquals(0, allvms.length);
    }


    /**
     * Lease a VM and then shut it down (but not trash or unpropagate). Then play the
     * termination failed game
     */
    @Test(groups="unpropFail", dependsOnGroups="complicatedfail")
    public void unpropOneFail() throws Exception {
        logger.debug("unpropOneFail");
        final Manager rm = this.locator.getManager();

        VM[] allvms = rm.getGlobalAll();
        assertEquals(0, allvms.length);

        final int durationSecs = 5;
        final int memory = 64;
        final int numNodes = 1;
        final CreateRequest creq =
                this.populator().getCreateRequest("suite:failure:unpropOneFail",
                                                  durationSecs, memory, numNodes);
        final Caller caller = this.populator().getCaller();
        final CreateResult result = rm.create(creq, caller);

        final VM[] vms = result.getVMs();
        assertEquals(1, vms.length);
        assertNotNull(vms[0]);
        logger.info("Leased vm '" + vms[0].getID() + '\'');
        assertTrue(rm.exists(vms[0].getID(), Manager.INSTANCE));

        Thread.sleep(1500L);

        MockShutdownTrash.resetFailCount();

        allvms = rm.getGlobalAll();
        assertEquals(State.STATE_Running, allvms[0].getState().getState());

        // Shutdown (but don't unpropagate or trash it):
        rm.shutdown(vms[0].getID(), Manager.INSTANCE, null, caller);

        allvms = rm.getGlobalAll();
        assertEquals(1, allvms.length);

        // Fail at killing the instances:
        MockShutdownTrash.setFail(true);
        logger.warn("Set to fail.");

        // Wait for complete termination ... to fail.
        Thread.sleep(15000L);

        assertFalse(0 == MockShutdownTrash.getFailCount());

        assertTrue(rm.exists(vms[0].getID(), Manager.INSTANCE));
        allvms = rm.getGlobalAll();
        assertEquals(1, allvms.length);

        // Start succeeding to kill the instance:
        MockShutdownTrash.setFail(false);
        logger.warn("Set to succeed.");

        Thread.sleep(10000L);

        // Should now be gone:
        assertFalse(rm.exists(vms[0].getID(), Manager.INSTANCE));
        allvms = rm.getGlobalAll();
        assertEquals(0, allvms.length);
       
    }
}
TOP

Related Classes of org.globus.workspace.testing.suites.failure.TerminateSuite

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.