/*******************************************************************************
* oltpbenchmark.com
*
* Project Info: http://oltpbenchmark.com
* Project Members: Carlo Curino <carlo.curino@gmail.com>
* Evan Jones <ej@evanjones.ca>
* DIFALLAH Djellel Eddine <djelleleddine.difallah@unifr.ch>
* Andy Pavlo <pavlo@cs.brown.edu>
* CUDRE-MAUROUX Philippe <philippe.cudre-mauroux@unifr.ch>
* Yang Zhang <yaaang@gmail.com>
*
* This library is free software; you can redistribute it and/or modify it under the terms
* of the GNU General Public License as published by the Free Software Foundation;
* either version 3.0 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU Lesser General Public License for more details.
******************************************************************************/
package com.oltpbenchmark;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.NoSuchElementException;
import java.util.Set;
import org.apache.commons.collections15.map.ListOrderedMap;
import org.apache.log4j.Logger;
import com.oltpbenchmark.LatencyRecord.Sample;
import com.oltpbenchmark.api.TransactionType;
import com.oltpbenchmark.api.Worker;
import com.oltpbenchmark.types.State;
import com.oltpbenchmark.util.Histogram;
import com.oltpbenchmark.util.QueueLimitException;
import com.oltpbenchmark.util.StringUtil;
public class ThreadBench implements Thread.UncaughtExceptionHandler {
private static final Logger LOG = Logger.getLogger(ThreadBench.class);
private static final int RATE_QUEUE_LIMIT = 10000;
private BenchmarkState testState;
private final List<? extends Worker> workers;
private final ArrayList<Thread> workerThreads;
// private File profileFile;
private static WorkloadConfiguration workConf;
ArrayList<LatencyRecord.Sample> samples = new ArrayList<LatencyRecord.Sample>();
private ThreadBench(List<? extends Worker> workers) {
this(workers, null);
}
public ThreadBench(List<? extends Worker> workers, File profileFile) {
this.workers = workers;
this.workerThreads = new ArrayList<Thread>(workers.size());
}
public static final class TimeBucketIterable implements Iterable<DistributionStatistics> {
private final Iterable<Sample> samples;
private final int windowSizeSeconds;
public TimeBucketIterable(Iterable<Sample> samples, int windowSizeSeconds) {
this.samples = samples;
this.windowSizeSeconds = windowSizeSeconds;
}
@Override
public Iterator<DistributionStatistics> iterator() {
return new TimeBucketIterator(samples.iterator(), windowSizeSeconds);
}
}
public static final class TimeBucketIterator implements Iterator<DistributionStatistics> {
private final Iterator<Sample> samples;
private final int windowSizeSeconds;
private Sample sample;
private long nextStartNs;
private DistributionStatistics next;
public TimeBucketIterator(Iterator<LatencyRecord.Sample> samples, int windowSizeSeconds) {
this.samples = samples;
this.windowSizeSeconds = windowSizeSeconds;
if (samples.hasNext()) {
sample = samples.next();
// TODO: To be totally correct, we would want this to be the
// timestamp of the start
// of the measurement interval. In most cases this won't matter.
nextStartNs = sample.startNs;
calculateNext();
}
}
private void calculateNext() {
assert next == null;
assert sample != null;
assert sample.startNs >= nextStartNs;
// Collect all samples in the time window
ArrayList<Integer> latencies = new ArrayList<Integer>();
long endNs = nextStartNs + windowSizeSeconds * 1000000000L;
while (sample != null && sample.startNs < endNs) {
latencies.add(sample.latencyUs);
if (samples.hasNext()) {
sample = samples.next();
} else {
sample = null;
}
}
// Set up the next time window
assert sample == null || endNs <= sample.startNs;
nextStartNs = endNs;
int[] l = new int[latencies.size()];
for (int i = 0; i < l.length; ++i) {
l[i] = latencies.get(i);
}
next = DistributionStatistics.computeStatistics(l);
}
@Override
public boolean hasNext() {
return next != null;
}
@Override
public DistributionStatistics next() {
if (next == null)
throw new NoSuchElementException();
DistributionStatistics out = next;
next = null;
if (sample != null) {
calculateNext();
}
return out;
}
@Override
public void remove() {
throw new UnsupportedOperationException("unsupported");
}
}
private void createWorkerThreads(boolean isRateLimited) {
assert testState == null;
testState = new BenchmarkState(workers.size() + 1, isRateLimited, RATE_QUEUE_LIMIT);
for (Worker worker : workers) {
worker.setBenchmarkState(testState);
Thread thread = new Thread(worker);
thread.setUncaughtExceptionHandler(this);
thread.start();
this.workerThreads.add(thread);
}
return;
}
private int finalizeWorkers(ArrayList<Thread> workerThreads) throws InterruptedException {
assert testState.getState() == State.DONE || testState.getState() == State.EXIT;
int requests = 0;
new WatchDogThread().start();
for (int i = 0; i < workerThreads.size(); ++i) {
// FIXME not sure this is the best solution... ensure we don't hang
// forever, however we might ignore
// problems
workerThreads.get(i).join(60000); // wait for 60second for threads
// to terminate... hands otherwise
/*
* // CARLO: Maybe we might want to do this to kill threads that are
* hanging... if (workerThreads.get(i).isAlive()) {
* workerThreads.get(i).kill(); try { workerThreads.get(i).join(); }
* catch (InterruptedException e) { } }
*/
requests += workers.get(i).getRequests();
workers.get(i).tearDown(false);
}
testState = null;
return requests;
}
private class WatchDogThread extends Thread {
{
this.setDaemon(true);
}
@Override
public void run() {
Map<String, Object> m = new ListOrderedMap<String, Object>();
LOG.info("Starting WatchDogThread");
while (true) {
try {
Thread.sleep(20000);
} catch (InterruptedException ex) {
return;
}
if (testState == null) return;
m.clear();
for (Thread t : workerThreads) {
m.put(t.getName(), t.isAlive());
}
LOG.info("Worker Thread Status:\n" + StringUtil.formatMaps(m));
} // WHILE
}
} // CLASS
/*
* public static Results runRateLimitedBenchmark(List<Worker> workers, File
* profileFile) throws QueueLimitException, IOException { ThreadBench bench
* = new ThreadBench(workers, profileFile); return
* bench.runRateLimitedFromFile(); }
*/
public static Results runRateLimitedBenchmark(List<Worker> workers) throws QueueLimitException, IOException {
ThreadBench bench = new ThreadBench(workers);
return bench.runRateLimitedMultiPhase();
}
public Results runRateLimitedMultiPhase() throws QueueLimitException, IOException {
this.createWorkerThreads(true);
this.testState.blockForStart();
// long measureStart = start;
long start = System.nanoTime();
long measureEnd = -1;
Phase phase = workConf.getNextPhase();
testState.setCurrentPhase(phase);
LOG.info("[Starting Phase] [Time= " + phase.time + "] [Rate= " + phase.rate + "] [Ratios= " + phase.getWeights() + "]");
long intervalNs = (long) (1000000000. / (double) phase.rate + 0.5);
long nextInterval = start + intervalNs;
int nextToAdd = 1;
boolean resetQueues = true;
long delta = phase.time * 1000000000L;
boolean lastEntry = false;
while (true) {
// posting new work... and reseting the queue in case we have new
// portion of the workload...
testState.addWork(nextToAdd, resetQueues);
resetQueues = false;
// Wait until the interval expires, which may be "don't wait"
long now = System.nanoTime();
long diff = nextInterval - now;
while (diff > 0) { // this can wake early: sleep multiple times to
// avoid that
long ms = diff / 1000000;
diff = diff % 1000000;
try {
Thread.sleep(ms, (int) diff);
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
now = System.nanoTime();
diff = nextInterval - now;
}
assert diff <= 0;
if (start + delta < System.nanoTime() && !lastEntry) {
// enters here after each phase of the test
// reset the queues so that the new phase is not affected by the
// queue of the previous one
resetQueues = true;
// Fetch a new Phase
phase = workConf.getNextPhase();
testState.setCurrentPhase(phase);
if (phase == null) {
// Last phase
lastEntry = true;
} else {
delta += phase.time * 1000000000L;
LOG.info("[Starting Phase] [Time= " + phase.time + "] [Rate= " + phase.rate + "] [Ratios= " + phase.getWeights() + "]");
// update frequency in which we check according to wakeup
// speed
intervalNs = (long) (1000000000. / (double) phase.rate + 0.5);
}
}
// Compute how many messages to deliver
nextToAdd = (int) (-diff / intervalNs + 1);
assert nextToAdd > 0;
nextInterval += intervalNs * nextToAdd;
// Update the test state appropriately
State state = testState.getState();
if (state == State.WARMUP && now >= start) {
testState.startMeasure();
start = now;
// measureEnd = measureStart + measureSeconds * 1000000000L;
} else if (state == State.MEASURE && lastEntry && now >= start + delta) {
testState.startCoolDown();
LOG.info("[Terminate] Waiting for all terminals to finish ..");
measureEnd = now;
} else if (state == State.EXIT) {
// All threads have noticed the done, meaning all measured
// requests have definitely finished.
// Time to quit.
break;
}
}
try {
int requests = finalizeWorkers(this.workerThreads);
// Combine all the latencies together in the most disgusting way
// possible: sorting!
for (Worker w : workers) {
for (LatencyRecord.Sample sample : w.getLatencyRecords()) {
samples.add(sample);
}
}
Collections.sort(samples);
// Compute stats on all the latencies
int[] latencies = new int[samples.size()];
for (int i = 0; i < samples.size(); ++i) {
latencies[i] = samples.get(i).latencyUs;
}
DistributionStatistics stats = DistributionStatistics.computeStatistics(latencies);
Results results = new Results(measureEnd - start, requests, stats, samples);
// Compute transaction histogram
Set<TransactionType> txnTypes = new HashSet<TransactionType>(workConf.getTransTypes());
txnTypes.remove(TransactionType.INVALID);
results.txnSuccess.putAll(txnTypes, 0);
results.txnRetry.putAll(txnTypes, 0);
results.txnAbort.putAll(txnTypes, 0);
results.txnErrors.putAll(txnTypes, 0);
for (Worker w : workers) {
results.txnSuccess.putHistogram(w.getTransactionSuccessHistogram());
results.txnRetry.putHistogram(w.getTransactionRetryHistogram());
results.txnAbort.putHistogram(w.getTransactionAbortHistogram());
results.txnErrors.putHistogram(w.getTransactionErrorHistogram());
for (Entry<TransactionType, Histogram<String>> e : w.getTransactionAbortMessageHistogram().entrySet()) {
Histogram<String> h = results.txnAbortMessages.get(e.getKey());
if (h == null) {
h = new Histogram<String>(true);
results.txnAbortMessages.put(e.getKey(), h);
}
h.putHistogram(e.getValue());
} // FOR
} // FOR
return (results);
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
}
@Override
public void uncaughtException(Thread t, Throwable e) {
// Something bad happened! Tell all of our workers that the party is over!
// synchronized (this) {
// if (this.calledTearDown == false) {
// for (Worker w : this.workers) {
// w.tearDown(true);
// }
// }
// this.calledTearDown = true;
// } // SYNCH
//
// HERE WE HANDLE THE CASE IN WHICH ONE OF OUR WOKERTHREADS DIED
e.printStackTrace();
System.exit(-1);
/*
* Alternatively, we could keep an HashMap<Thread,Worker> storing the
* runnable for each thread, so that we can get the latency numbers from
* a thread that died, and either continue or at least report current
* status. (Remember to remove this thread from the list of threads to
* wait for)
*/
}
public static void setWorkConf(WorkloadConfiguration workConfig) {
// TODO Auto-generated method stub
workConf = workConfig;
}
}