A B C D E F G H I J K L M N O P Q R S T U V W X Y Z

package org.apache.cassandra.mutex

Souce Code of org.apache.cassandra.mutex.ClusterMutex


/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.cassandra.mutex;

import java.util.List;
import java.io.IOException;

import org.apache.cassandra.config.DatabaseDescriptor;

import org.apache.log4j.Logger;

import org.apache.zookeeper.ZooKeeper;
import org.apache.zookeeper.Watcher;
import org.apache.zookeeper.WatchedEvent;
import org.apache.zookeeper.ZooDefs.Ids;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.KeeperException.ConnectionLossException;
import org.apache.zookeeper.KeeperException.NodeExistsException;
import org.apache.zookeeper.KeeperException.NoNodeException;
import org.apache.zookeeper.Watcher.Event.KeeperState;

public class ClusterMutex implements Watcher
{
    private static Logger logger = Logger.getLogger(ClusterMutex.class);

    private static ClusterMutex instance;

    // Lazy on purpose. People who do not want mutex, should not need to have to worry about ZK
    private static class LazyHolder
    {
        private static final ClusterMutex clusterMutex = new ClusterMutex();
    }

    public static ClusterMutex instance()
    {
        return LazyHolder.clusterMutex;
    }

    // this must include hyphen (-) as the last character. substring search relies on it
    private final String LockPrefix = "lock-";

    // if we're disconnected from ZooKeeper server, how many times shall we try the lock
    // operation before giving up
    private final int OperationRetries = 3;

    // how long to sleep between retries. Actual time slept is RetryInterval multiplied by how
    // many times have we already tried.
    private final long RetryInterval = 500L;

    // Session timeout to ZooKeeper
    private final int SessionTimeout = 3000;

    private long lastConnect = 0;

    private ZooKeeper zk = null;
    private String root = "";
    private Integer mutex = null;

    private String hostString = new String();

    private ClusterMutex()
    {
        String zooKeeperRoot = DatabaseDescriptor.getZooKeeperRoot();
        if (zooKeeperRoot != null && !zooKeeperRoot.isEmpty())
            root = "/" + zooKeeperRoot;
        mutex = new Integer(1);

        String zooKeeperPort = DatabaseDescriptor.getZooKeeperPort();

        for (String zooKeeper : DatabaseDescriptor.getZooKeepers())
        {
            logger.warn(zooKeeper);
            hostString += (hostString.isEmpty()) ? "" : ",";
            hostString += zooKeeper + ":" + zooKeeperPort;
            logger.warn(hostString);
        }

        try
        {
            connectZooKeeper();
            if (!root.isEmpty() && zk.exists(root, false) == null)
            {
                logger.info("Mutex root " + root + " does not exists, creating");
                zk.create(root, new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
            }
        }
        catch (Exception e)
        {
            throw new RuntimeException("ClusterMutex initialization failed: " + e.getMessage());
        }
    }

    /**
     * Connect to zookeeper server
     */
    private synchronized void connectZooKeeper() throws IOException
    {
        if (zk != null && zk.getState() != ZooKeeper.States.CLOSED)
            return;
        logger.info("Connecting to ZooKeepers: " + hostString);
        zk = new ZooKeeper(hostString, SessionTimeout, this);
    }

    /**
     * close current session and try to connect to zookeeper server
     */
    private synchronized void reestablishZooKeeperSession() throws IOException
    {
        long now = System.currentTimeMillis();

        // let's not flood zookeeper with connection requests
        if ((now - lastConnect) < SessionTimeout)
        {
            if (logger.isTraceEnabled())
                logger.trace("Only " + (now - lastConnect) + "ms passed since last reconnect, not trying again yet");
            return;
        }

        lastConnect = now;

        try
        {
            zk.close();
        }
        catch (Exception e)
        {
            // ignore all exceptions. we're calling this just to make sure ephemeral nodes are
            // deleted. zk might be in an inconsistent state and cause exception.
        }

        connectZooKeeper();
    }

    /**
     * process any events from ZooKeeper. We simply wake up any clients that are waiting for
     * file deletion. Number of clients is usually very small (most likely just one), so no need
     * for any complex logic.
     */
    public void process(WatchedEvent event)
    {
        if (logger.isTraceEnabled())
            logger.trace("Got event " + event.getType() + ", keeper state " + event.getState() + ", path " + event.getPath());

        synchronized (mutex)
        {
            mutex.notifyAll();
        }
    }

    private boolean isConnected()
    {
        return zk.getState() == ZooKeeper.States.CONNECTED;
    }

    /**
     * lock
     *
     * @param lockName lock to be locked for writing. name can be any string, but it must not
     * include slash (/) or any character disallowed by ZooKeeper (see
     * hadoop.apache.org/zookeeper/docs/current/zookeeperProgrammers.html#ch_zkDataModel).
     * @return name of the znode inside zookeeper holding this lock.
     */
    public String lock(String lockName) throws KeeperException, InterruptedException, IOException
    {
        for (int i=1; i<=OperationRetries; i++)
        {
            try
            {
                return lockInternal(lockName);
            }
            catch (KeeperException.SessionExpiredException e)
            {
                logger.warn("ZooKeeper session expired, reconnecting");
                reestablishZooKeeperSession();
            }
            catch (KeeperException.ConnectionLossException e)
            {
                // ZooKeeper handles lost connection automatically, but in order to reset all
                // ephemeral nodes, we close the whole thing.
                logger.warn("ZooKeeper connection lost, reconnecting");
                reestablishZooKeeperSession();
            }

            try
            {
                Thread.sleep(RetryInterval * i);
            }
            catch (InterruptedException ignore)
            {
                // Just fall through to retry
            }
        }

        throw new KeeperException.ConnectionLossException();
    }

    /**
     * creates lock znode in zookeeper under lockPath. Lock name is
     * LockPrefix plus ephemeral sequence number given by zookeeper
     * 
     * @param lockPath name of the lock (directory in zookeeper)
     */
    private String createLockZNode(String lockPath) throws KeeperException, InterruptedException
    {
	String lockZNode = null;

        try
        {
            lockZNode = zk.create(lockPath + "/" + LockPrefix, new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL_SEQUENTIAL);
        }
        catch (NoNodeException e)
        {
            logger.info(lockPath + " does not exist, creating");
            zk.create(lockPath, new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
            lockZNode = zk.create(lockPath + "/" + LockPrefix, new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL_SEQUENTIAL);
        }

	return lockZNode;
    }

    /**
     * lockInteral does the actual locking.
     *
     * @param same as in lock
     */
    private String lockInternal(String lockName) throws KeeperException, InterruptedException
    {
        String lockZNode = null;
        String lockPath = root + "/" + lockName;

	lockZNode = createLockZNode(lockPath);

        if (logger.isTraceEnabled())
            logger.trace("lockZNode created " + lockZNode);

        while (true)
        {
	    // check what is our ID (sequence number at the end of file name added by ZK)
	    int mySeqNum = Integer.parseInt(lockZNode.substring(lockZNode.lastIndexOf('-') + 1));
	    int previousSeqNum = -1;
	    String predessor = null;

            // get all children of lock znode and find the one that is just before us, if
            // any. This must be inside loop, as children might get deleted out of order because
            // of client disconnects. We cannot assume that the file that is in front of us this
            // time, is there next time. It might have been deleted even though earlier files
            // are still there.
            List<String> children = zk.getChildren(lockPath, false);
	    if (children.isEmpty())
	    {
		logger.warn("No children in " + lockPath + " although one was just created. Going to try again");
		lockZNode = createLockZNode(lockPath);
		continue;
	    }
            for (String child : children)
            {
                if (logger.isTraceEnabled())
                    logger.trace("child: " + child);
                int otherSeqNum = Integer.parseInt(child.substring(child.lastIndexOf('-') + 1));
                if (otherSeqNum < mySeqNum && otherSeqNum > previousSeqNum)
                {
                    previousSeqNum = otherSeqNum;
                    predessor = child;
                }
            }

            // our sequence number is smallest, we have the lock
            if (previousSeqNum == -1)
            {
                if (logger.isTraceEnabled())
                    logger.trace("No smaller znode sequences, " + lockZNode + " acquired lock");
                return lockZNode;
            }

            // there is at least one znode before us. wait for it to be deleted.
            synchronized (mutex)
            {
                if (zk.exists(lockPath + "/" + predessor, true) == null)
                {
                    if (logger.isTraceEnabled())
                        logger.trace(predessor + " does not exists, " + lockZNode + " acquired lock");
                    break;
                }
                else if (logger.isTraceEnabled())
                    logger.trace(predessor + " is still here, " + lockZNode + " must wait");

                mutex.wait();

                if (isConnected() == false)
                {
                    logger.info("ZooKeeper disconnected while waiting for lock");
                    throw new KeeperException.ConnectionLossException();
                }
            }
        }

        return lockZNode;
    }

    /**
     * unlock
     *
     * @param lockZNode this MUST be the string returned by lock call. Otherwise there will be
     * chaos.
     */
    public void unlock(String lockZNode)
    {
        assert (lockZNode != null);

        if (logger.isTraceEnabled())
            logger.trace("deleting " + lockZNode);

        try
        {
            zk.delete(lockZNode, -1);
        }
        catch (Exception e)
        {
            // We do not do anything here. The idea is to check that everything goes OK when
            // locking and let unlock always succeed from client's point of view. Ephemeral
            // nodes should be taken care of by ZooKeeper, so ignoring any errors here should
            // not break anything.
        }
    }

}

TOP

A B C D E F G H I J K L M N O P Q R S T U V W X Y Z

TOP