/* This file is part of VoltDB.
* Copyright (C) 2008-2010 VoltDB Inc.
*
* This file contains original code and/or modifications of original code.
* Any modifications made by VoltDB Inc. are licensed under the following
* terms and conditions:
*
* VoltDB is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* VoltDB is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with VoltDB. If not, see <http://www.gnu.org/licenses/>.
*/
/* Copyright (C) 2008
* Evan Jones
* Massachusetts Institute of Technology
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
* IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
/* This file is part of VoltDB.
* Copyright (C) 2008-2010 VoltDB Inc.
*
* VoltDB is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* VoltDB is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with VoltDB. If not, see <http://www.gnu.org/licenses/>.
*/
package org.voltdb.network;
import java.io.IOException;
import java.lang.ref.WeakReference;
import java.nio.channels.CancelledKeyException;
import java.nio.channels.SelectionKey;
import java.nio.channels.Selector;
import java.nio.channels.SocketChannel;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import org.apache.log4j.Logger;
import org.voltdb.utils.DBBPool;
import org.voltdb.utils.EstTimeUpdater;
import org.voltdb.utils.Pair;
import org.voltdb.utils.VoltLoggerFactory;
import edu.brown.hstore.HStoreConstants;
import edu.brown.hstore.HStoreSite;
import edu.brown.hstore.HStoreThreadManager;
/** Produces work for registered ports that are selected for read, write */
public class VoltNetwork implements Runnable {
private static final Logger m_logger = Logger.getLogger(VoltNetwork.class);
private static final Logger networkLog =
Logger.getLogger("NETWORK", VoltLoggerFactory.instance());
private final Selector m_selector;
private final ArrayDeque<Runnable> m_tasks = new ArrayDeque<Runnable>();
// keep two lists and swap them in and out to minimize contention
private final ArrayDeque<VoltPort> m_selectorUpdates_1 = new ArrayDeque<VoltPort>();//Used as the lock for swapping lists
private final ArrayDeque<VoltPort> m_selectorUpdates_2 = new ArrayDeque<VoltPort>();
private ArrayDeque<VoltPort> m_activeUpdateList = m_selectorUpdates_1;
private volatile boolean m_shouldStop = false;//volatile boolean is sufficient
private final Thread m_thread;
private final HashSet<VoltPort> m_ports = new HashSet<VoltPort>();
private final boolean m_useBlockingSelect;
private final boolean m_useExecutorService;
private final ArrayList<WeakReference<Thread>> m_networkThreads = new ArrayList<WeakReference<Thread>>();
private final ArrayList<DBBPool> m_poolsToClearOnShutdown = new ArrayList<DBBPool>();
/**
* Synchronizes registration and unregistration of channels
*/
private final ReentrantReadWriteLock m_registrationLock = new ReentrantReadWriteLock();
/**
* Start this VoltNetwork's thread;
*/
public void start() {
m_thread.start();
}
/** Used for test only! */
public VoltNetwork(Selector selector) {
m_thread = null;
m_selector = selector;
m_useBlockingSelect = true;
m_useExecutorService = false;
}
public VoltNetwork() {
this(true, true, null, null);
}
public VoltNetwork(HStoreSite hstore_site) {
this(true, true, null, hstore_site);
}
public VoltNetwork(boolean useExecutorService, boolean blockingSelect, Integer threads) {
this(useExecutorService, blockingSelect, threads, null);
}
/**
* Initialize a m_selector and become ready to perform real work
* If the network is not going to provide any threads provideOwnThread should be false
* and runOnce should be called periodically
**/
public VoltNetwork(boolean useExecutorService, boolean blockingSelect, Integer threads, final HStoreSite hstore_site) {
m_thread = new Thread(this, "Volt Network");
m_thread.setDaemon(true);
m_useBlockingSelect = blockingSelect;
try {
m_selector = Selector.open();
} catch (IOException ex) {
m_logger.fatal(null, ex);
throw new RuntimeException(ex);
}
final int availableProcessors = Runtime.getRuntime().availableProcessors();
//Single thread is plenty for 4 cores.
if (availableProcessors <= 4) {
m_useExecutorService = false;
} else {
m_useExecutorService = useExecutorService;
}
if (!m_useExecutorService) {
return;
}
int threadPoolSize = 1;
if (threads != null) {
threadPoolSize = threads.intValue();
} else if (availableProcessors <= 4) {
threadPoolSize = 1;
} else if (availableProcessors <= 8) {
threadPoolSize = 2;
} else if (availableProcessors <= 16) {
threadPoolSize = 3;
} else {
threadPoolSize = 3;
}
m_logger.debug("Network Thread Pool Size: " + threadPoolSize);
final ThreadFactory tf = new ThreadFactory() {
private ThreadGroup group = new ThreadGroup(Thread.currentThread().getThreadGroup(), "Network Threads");
private int threadIndex = 0;
@Override
public Thread newThread(final Runnable run) {
String threadName = String.format("%s-%02d", HStoreConstants.THREAD_NAME_INCOMINGNETWORK, this.threadIndex++);
if (hstore_site != null) {
threadName = HStoreThreadManager.getThreadName(hstore_site, threadName);
}
final Thread t = new Thread(this.group, run, threadName) {
@Override
public void run() {
// HACK: Register the thread if we have an HStoreThreadManager
if (hstore_site != null) {
hstore_site.getThreadManager().registerProcessingThread();
}
try {
run.run();
} finally {
synchronized (m_poolsToClearOnShutdown) {
m_poolsToClearOnShutdown.add(VoltPort.m_pool.get());
}
}
};
};
synchronized (m_networkThreads) {
m_networkThreads.add(new WeakReference<Thread>(t));
}
t.setDaemon(true);
return t;
}
};
for (int ii = 0; ii < threadPoolSize; ii++) {
tf.newThread(new Runnable() {
@Override
public void run() {
//final ArrayDeque<VoltPortFutureTask> nextTasks = new ArrayDeque<VoltPortFutureTask>(3);0
while (true) {
try {
Runnable nextTask = null;
synchronized (m_tasks) {
nextTask = m_tasks.poll();
while (nextTask == null && !m_shouldStop) {
m_tasks.wait();
nextTask = m_tasks.poll();
}
}
if (nextTask == null) {
return;
}
nextTask.run();
} catch (InterruptedException e) {
return;
} catch (Exception e) {
networkLog.error(e);
}
}
}
}).start();
}
//It is really handy to be able to uncomment this and print bandwidth usage. Hopefully
//management tools will replace it.
// new Thread() {
// @Override
// public void run() {
// long last = System.currentTimeMillis();
// while (true) {
// try {
// Thread.sleep(10000);
// } catch (InterruptedException e) {
// // TODO Auto-generated catch block
// e.printStackTrace();
// }
// final long now = System.currentTimeMillis();
// long totalRead = 0;
// long totalMessagesRead = 0;
// long totalWritten = 0;
// long totalMessagesWritten = 0;
// synchronized (m_ports) {
// for (VoltPort p : m_ports) {
// final long read = p.readStream().getBytesRead(true);
// final long writeInfo[] = p.writeStream().getBytesAndMessagesWritten(true);
// final long messagesRead = p.getMessagesRead(true);
// totalRead += read;
// totalMessagesRead += messagesRead;
// totalWritten += writeInfo[0];
// totalMessagesWritten += writeInfo[1];
// }
// }
// double delta = (now - last) / 1000.0;
// double mbRead = totalRead / (1024.0 * 1024.0);
// double mbWritten = totalWritten / (1024.0 * 1024.0);
// System.out.printf("Transferred %.2f/%.2f (IN/OUT)/sec\n", mbRead / delta, mbWritten / delta);
// last = now;
// }
// }
// }.start();
}
/**
* Lock that causes the selection thread to wait for all threads that
* are in the process of registering or unregistering channels to finish
*/
private void waitForRegistrationLock() {
m_registrationLock.writeLock().lock();
m_registrationLock.writeLock().unlock();
}
/**
* Acquire a lock that stops the selection thread while a channel is being registered/unregistered
*/
private void acquireRegistrationLock() {
m_registrationLock.readLock().lock();
m_selector.wakeup();
}
/**
* Release a lock that stops the selection thread while a channel is being registered/unregistered
*/
private void releaseRegistrationLock() {
m_registrationLock.readLock().unlock();
}
/** Instruct the network to stop after the current loop */
public void shutdown() throws InterruptedException {
if (m_thread != null) {
synchronized (this) {
m_shouldStop = true;
m_selector.wakeup();
}
m_thread.join();
} else {
m_shouldStop = true;
}
}
public Connection registerChannel(SocketChannel channel, InputHandler handler) throws IOException {
return registerChannel(channel, handler, SelectionKey.OP_READ);
}
/**
* Register a channel with the selector and create a Connection that will pass incoming events
* to the provided handler.
* @param channel
* @param handler
* @throws IOException
*/
public Connection registerChannel(
SocketChannel channel,
InputHandler handler,
int interestOps) throws IOException {
channel.configureBlocking (false);
channel.socket().setKeepAlive(true);
VoltPort port =
new VoltPort(
this,
handler,
handler.getExpectedOutgoingMessageSize(),
channel.socket().getInetAddress().getHostName());
port.registering();
acquireRegistrationLock();
try {
SelectionKey key = channel.register (m_selector, interestOps, port);
port.setKey (key);
port.registered();
return port;
} finally {
synchronized (m_ports) {
m_ports.add(port);
}
releaseRegistrationLock();
}
}
/**
* Unregister a channel. The connections streams are not drained before finishing.
* @param c
*/
void unregisterChannel (Connection c) {
VoltPort port = (VoltPort)c;
assert(c != null);
SelectionKey selectionKey = port.getKey();
acquireRegistrationLock();
try {
synchronized (m_ports) {
if (!m_ports.contains(port)) {
return;
}
}
port.unregistering();
selectionKey.cancel();
selectionKey.attach(null);
synchronized (m_ports) {
m_ports.remove(port);
}
} finally {
releaseRegistrationLock();
}
port.unregistered();
}
/** Set interest registrations for a port */
public void addToChangeList(VoltPort port) {
synchronized (m_selectorUpdates_1) {
m_activeUpdateList.add(port);
}
if (m_useBlockingSelect) {
m_selector.wakeup();
}
}
@Override
public void run() {
try {
while (m_shouldStop == false) {
try {
while (m_shouldStop == false) {
waitForRegistrationLock();
if (m_useBlockingSelect) {
m_selector.select(5);
} else {
m_selector.selectNow();
}
installInterests();
invokeCallbacks();
EstTimeUpdater.update(System.currentTimeMillis());
}
} catch (Exception ex) {
m_logger.error(null, ex);
}
}
} finally {
p_shutdown();
}
}
private synchronized void p_shutdown() {
//Synchronized so the interruption won't interrupt the network thread
//while it is waiting for the executor service to shutdown
try {
try {
synchronized (m_networkThreads) {
synchronized (m_tasks) {
m_tasks.notifyAll();
}
for (final WeakReference<Thread> r : m_networkThreads) {
final Thread t = r.get();
if (t != null) {
t.join();
}
}
}
} catch (InterruptedException e) {
m_logger.error(e);
}
Set<SelectionKey> keys = m_selector.keys();
for (SelectionKey key : keys) {
VoltPort port = (VoltPort) key.attachment();
if (port != null) {
try {
unregisterChannel (port);
} catch (Exception e) {
networkLog.error("Exception unregisering port " + port, e);
}
}
}
synchronized (m_poolsToClearOnShutdown) {
for (DBBPool p : m_poolsToClearOnShutdown) {
p.clear();
}
m_poolsToClearOnShutdown.clear();
}
try {
m_selector.close();
} catch (IOException e) {
m_logger.error(null, e);
}
} finally {
this.notifyAll();
}
}
protected void installInterests() {
// swap the update lists to avoid contention while
// draining the requested values. also guarantees
// that the end of the list will be reached if code
// appends to the update list without bound.
ArrayDeque<VoltPort> oldlist;
synchronized(m_selectorUpdates_1) {
if (m_activeUpdateList == m_selectorUpdates_1) {
oldlist = m_selectorUpdates_1;
m_activeUpdateList = m_selectorUpdates_2;
}
else {
oldlist = m_selectorUpdates_2;
m_activeUpdateList = m_selectorUpdates_1;
}
}
while (!oldlist.isEmpty()) {
final VoltPort port = oldlist.poll();
try {
if (port.isRunning()) {
continue;
}
if (port.isDead()) {
unregisterChannel(port);
try {
port.m_selectionKey.channel().close();
} catch (IOException e) {}
} else if (port.hasQueuedRunnables()) {
port.lockForHandlingWork();
port.getKey().interestOps(0);
m_selector.selectedKeys().remove(port.getKey());
synchronized (m_tasks) {
m_tasks.offer(getPortCallRunnable(port));
m_tasks.notify();
}
} else {
resumeSelection(port);
}
} catch (java.nio.channels.CancelledKeyException e) {
networkLog.warn(
"Had a cancelled key exception while processing queued runnables for port "
+ port.m_remoteHost, e);
}
}
}
private void resumeSelection( VoltPort port) {
SelectionKey key = port.getKey();
if (key.isValid()) {
key.interestOps (port.interestOps());
} else {
synchronized (m_ports) {
m_ports.remove(port);
}
}
}
private Runnable getPortCallRunnable(final VoltPort port) {
return new Runnable() {
@Override
public void run() {
try {
port.call();
} catch (Exception e) {
port.die();
if (e instanceof IOException) {
m_logger.trace( "VoltPort died, probably of natural causes", e);
} else {
networkLog.error( "VoltPort died due to an unexpected exception", e);
}
} finally {
addToChangeList (port);
}
}
};
}
/** Set the selected interest set on the port and run it. */
protected void invokeCallbacks() {
final Set<SelectionKey> selectedKeys = m_selector.selectedKeys();
ArrayList<Runnable> generatedTasks = null;
for(SelectionKey key : selectedKeys) {
final VoltPort port = (VoltPort) key.attachment();
if (port == null) {
continue;
}
try {
port.lockForHandlingWork();
key.interestOps(0);
final Runnable runner = getPortCallRunnable(port);
if (m_useExecutorService) {
if (generatedTasks == null) generatedTasks = new ArrayList<Runnable>();
generatedTasks.add(runner);
} else {
runner.run();
}
}
catch (CancelledKeyException e) {
e.printStackTrace();
// no need to do anything here until
// shutdown makes more sense
}
}
if (generatedTasks != null && !generatedTasks.isEmpty()) {
synchronized (m_tasks) {
m_tasks.addAll(generatedTasks);
if (m_tasks.size() > 1) {
m_tasks.notifyAll();
} else {
m_tasks.notify();
}
}
}
selectedKeys.clear();
}
public Map<Long, Pair<String, long[]>> getIOStats(boolean interval) {
final HashMap<Long, Pair<String, long[]>> retval =
new HashMap<Long, Pair<String, long[]>>();
long totalRead = 0;
long totalMessagesRead = 0;
long totalWritten = 0;
long totalMessagesWritten = 0;
synchronized (m_ports) {
for (VoltPort p : m_ports) {
final long read = p.readStream().getBytesRead(interval);
final long writeInfo[] = p.writeStream().getBytesAndMessagesWritten(interval);
final long messagesRead = p.getMessagesRead(interval);
totalRead += read;
totalMessagesRead += messagesRead;
totalWritten += writeInfo[0];
totalMessagesWritten += writeInfo[1];
retval.put(
p.connectionId(),
Pair.of(
p.m_remoteHost,
new long[] {
read,
messagesRead,
writeInfo[0],
writeInfo[1] }));
}
}
retval.put(
-1L,
Pair.of(
"GLOBAL",
new long[] {
totalRead,
totalMessagesRead,
totalWritten,
totalMessagesWritten }));
return retval;
}
public ArrayList<Long> getThreadIds() {
ArrayList<Long> ids = new ArrayList<Long>();
if (m_thread != null) {
ids.add(m_thread.getId());
}
for (WeakReference<Thread> ref : m_networkThreads) {
ids.add(ref.get().getId());
}
return ids;
}
}