Package voldemort.cluster.failuredetector

Source Code of voldemort.cluster.failuredetector.AsyncRecoveryFailureDetector

/*
* Copyright 2009 Mustard Grain, Inc., 2009-2012 LinkedIn, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/

package voldemort.cluster.failuredetector;

import java.lang.Thread.UncaughtExceptionHandler;

import org.apache.log4j.Level;

import voldemort.annotations.jmx.JmxManaged;
import voldemort.cluster.Node;
import voldemort.store.UnreachableStoreException;

/**
* AsyncRecoveryFailureDetector detects failures and then attempts to contact
* the failing node's Store to determine availability.
*
* <p/>
*
* When a node does go down, attempts to access the remote Store for that node
* may take several seconds. Rather than cause the thread to block, we perform
* this check in a background thread.
*
*/

@JmxManaged(description = "Detects the availability of the nodes on which a Voldemort cluster runs")
public class AsyncRecoveryFailureDetector extends AbstractFailureDetector implements Runnable {

    private volatile boolean isRunning;

    public AsyncRecoveryFailureDetector(FailureDetectorConfig failureDetectorConfig) {
        super(failureDetectorConfig);

        isRunning = true;

        Thread recoveryThread = new Thread(this, "AsyncNodeRecoverer");
        recoveryThread.setDaemon(true);
        recoveryThread.setUncaughtExceptionHandler(new UncaughtExceptionHandler() {

            public void uncaughtException(Thread t, Throwable e) {
                if(logger.isEnabledFor(Level.ERROR))
                    logger.error("Uncaught exception in failure detector recovery thread:", e);
            }
        });

        recoveryThread.start();
    }

    public boolean isAvailable(Node node) {
        checkNodeArg(node);
        NodeStatus nodeStatus = getNodeStatus(node);

        synchronized(nodeStatus) {
            return nodeStatus.isAvailable();
        }
    }

    public void recordException(Node node, long requestTime, UnreachableStoreException e) {
        checkArgs(node, requestTime);
        setUnavailable(node, e);
    }

    public void recordSuccess(Node node, long requestTime) {
        // Nodes only become available in our thread, but let's sanity-check our
        // arguments...
        checkArgs(node, requestTime);
    }

    @Override
    public void destroy() {
        isRunning = false;
    }

    public void run() {
        long asyncRecoveryInterval = getConfig().getAsyncRecoveryInterval();

        while(!Thread.currentThread().isInterrupted() && isRunning) {
            try {
                if(logger.isDebugEnabled()) {
                    logger.debug("Sleeping for " + asyncRecoveryInterval
                                 + " ms before checking node availability");
                }

                getConfig().getTime().sleep(asyncRecoveryInterval);
            } catch(InterruptedException e) {
                if(logger.isDebugEnabled()) {
                    logger.debug("InterruptedException while sleeping " + asyncRecoveryInterval
                                 + " ms before checking node availability", e);
                }

                break;
            }

            for(Node node: getConfig().getCluster().getNodes()) {
                if(isAvailable(node))
                    continue;

                if(logger.isDebugEnabled())
                    logger.debug("Checking previously unavailable node " + node.getId());

                StoreVerifier storeVerifier = getConfig().getStoreVerifier();

                try {
                    // This is our test.
                    if(logger.isDebugEnabled())
                        logger.debug("Verifying previously unavailable node " + node.getId());

                    storeVerifier.verifyStore(node);

                    if(logger.isDebugEnabled())
                        logger.debug("Verified previously unavailable node " + node.getId()
                                     + "is now available.");

                    nodeRecovered(node);
                } catch(UnreachableStoreException e) {
                    if(logger.isDebugEnabled()) {
                        logger.debug("Node " + node.getId()
                                     + " still unavailable due to UnreachableStoreException", e);
                    }
                } catch(Exception e) {
                    if(logger.isEnabledFor(Level.ERROR))
                        logger.error("Node " + node.getId() + " unavailable due to error", e);
                }
            }
        }
    }

    protected void nodeRecovered(Node node) {
        setAvailable(node);
    }

}
TOP

Related Classes of voldemort.cluster.failuredetector.AsyncRecoveryFailureDetector

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.