/* This file is part of VoltDB.
* Copyright (C) 2008-2014 VoltDB Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with VoltDB. If not, see <http://www.gnu.org/licenses/>.
*/
package org.voltdb.client;
import java.io.ByteArrayOutputStream;
import java.io.PrintStream;
import java.io.UnsupportedEncodingException;
import java.util.Date;
import java.util.Iterator;
import java.util.concurrent.TimeUnit;
import org.HdrHistogram_voltpatches.Histogram;
import org.HdrHistogram_voltpatches.HistogramData;
import com.google_voltpatches.common.base.Charsets;
import com.google_voltpatches.common.base.Throwables;
/**
* <p>Essentially a set of counters for a specific context with helper
* methods. The context has a time window and can apply to all connections
* and procedures, or a single of connections and/or procedure.</p>
*
* <p>The helper methods such as {@link #getTxnThroughput()} or
* {@link #kPercentileLatency(double)} perform common operations
* on the counters.</p>
*
* <p>This object is immutable outside of the package scope and does not
* directly reference any internal data structures.</p>
*
* <p>See also {@link ClientStatsContext}.</p>
*/
public class ClientStats {
String m_procName;
long m_startTS; // java.util.Date compatible microseconds since epoch
long m_endTS;
long m_connectionId;
String m_hostname;
int m_port;
long m_invocationsCompleted;
long m_invocationAborts;
long m_invocationErrors;
long m_invocationTimeouts;
// cumulative latency measured by client, used to calculate avg. lat.
long m_roundTripTimeNanos;
// cumulative latency measured by the cluster, used to calculate avg lat.
long m_clusterRoundTripTime; // milliseconds
/** The number of buckets tracking latency with 1ms granularity. */
final public static int ONE_MS_BUCKET_COUNT = 50;
/** The number of buckets tracking latency with 10ms granularity. */
final public static int TEN_MS_BUCKET_COUNT = 20;
/** The number of buckets tracking latency with 100ms granularity. */
final public static int HUNDRED_MS_BUCKET_COUNT = 10;
Histogram m_latencyHistogram;
long m_bytesSent;
long m_bytesReceived;
private static final long LOWEST_TRACKABLE = 50;
private static final long HIGHEST_TRACKABLE = 10L * (1000L * 1000L);
private static final int SIGNIFICANT_VALUE_DIGITS = 2;
/*
* Get a that tracks from 1 microsecond to 10 seconds with
* 2 significant value digits
*/
public static Histogram constructHistogram() {
return new Histogram( LOWEST_TRACKABLE, HIGHEST_TRACKABLE, SIGNIFICANT_VALUE_DIGITS);
}
ClientStats() {
m_procName = "";
m_connectionId = -1;
m_hostname = "";
m_port = -1;
m_startTS = Long.MAX_VALUE;
m_endTS = Long.MIN_VALUE;
m_invocationsCompleted = m_invocationAborts = m_invocationErrors = 0;
m_roundTripTimeNanos = m_clusterRoundTripTime = 0;
m_bytesSent = m_bytesReceived = 0;
m_latencyHistogram = constructHistogram();
}
ClientStats(ClientStats other) {
m_procName = other.m_procName;
m_connectionId = other.m_connectionId;
m_hostname = other.m_hostname;
m_port = other.m_port;
m_startTS = other.m_startTS;
m_endTS = other.m_endTS;
m_invocationsCompleted = other.m_invocationsCompleted;
m_invocationAborts = other.m_invocationAborts;
m_invocationErrors = other.m_invocationErrors;
m_invocationTimeouts = other.m_invocationTimeouts;
m_roundTripTimeNanos = other.m_roundTripTimeNanos;
m_clusterRoundTripTime = other.m_clusterRoundTripTime;
m_latencyHistogram = other.m_latencyHistogram.copy();
m_latencyHistogram.reestablishTotalCount();
m_bytesSent = other.m_bytesSent;
m_bytesReceived = other.m_bytesReceived;
}
static ClientStats diff(ClientStats newer, ClientStats older) {
if ((newer.m_procName != older.m_procName) || (newer.m_connectionId != older.m_connectionId)) {
throw new IllegalArgumentException("Can't diff these ClientStats instances.");
}
ClientStats retval = new ClientStats();
retval.m_procName = older.m_procName;
retval.m_connectionId = older.m_connectionId;
retval.m_hostname = older.m_hostname;
retval.m_port = older.m_port;
// the next two values are essentially useless after a diff, but will
// be overwritten by ClientStatsContext
retval.m_startTS = older.m_startTS;
retval.m_endTS = newer.m_endTS;
retval.m_invocationsCompleted = newer.m_invocationsCompleted - older.m_invocationsCompleted;
retval.m_invocationAborts = newer.m_invocationAborts - older.m_invocationAborts;
retval.m_invocationErrors = newer.m_invocationErrors - older.m_invocationErrors;
retval.m_invocationTimeouts = newer.m_invocationTimeouts - older.m_invocationTimeouts;
retval.m_roundTripTimeNanos = newer.m_roundTripTimeNanos - older.m_roundTripTimeNanos;
retval.m_clusterRoundTripTime = newer.m_clusterRoundTripTime - older.m_clusterRoundTripTime;
retval.m_latencyHistogram = Histogram.diff(newer.m_latencyHistogram, older.m_latencyHistogram);
retval.m_bytesSent = newer.m_bytesSent - older.m_bytesSent;
retval.m_bytesReceived = newer.m_bytesReceived - older.m_bytesReceived;
return retval;
}
static ClientStats merge(Iterable<ClientStats> statsIterable) {
return merge(statsIterable.iterator());
}
static ClientStats merge(Iterator<ClientStats> statsIter) {
// empty set
if (!statsIter.hasNext()) {
return new ClientStats();
}
// seed the grouping by the first element
ClientStats seed = statsIter.next();
assert(seed != null);
// non-destructive
seed = (ClientStats) seed.clone();
// add in all the other elements
while (statsIter.hasNext()) {
seed.add(statsIter.next());
}
return seed;
}
void add(ClientStats other) {
if (m_procName.equals(other.m_procName) == false) m_procName = "";
if (m_connectionId != other.m_connectionId) m_connectionId = -1;
if (m_hostname.equals(other.m_hostname) == false) m_hostname = "";
if (m_port != other.m_port) m_port = -1;
m_startTS = Math.min(other.m_startTS, m_startTS);
m_endTS = Math.max(other.m_endTS, m_endTS);
m_invocationsCompleted += other.m_invocationsCompleted;
m_invocationAborts += other.m_invocationAborts;
m_invocationErrors += other.m_invocationErrors;
m_invocationTimeouts += other.m_invocationTimeouts;
m_roundTripTimeNanos += other.m_roundTripTimeNanos;
m_clusterRoundTripTime += other.m_clusterRoundTripTime;
m_latencyHistogram.add(other.m_latencyHistogram);
m_latencyHistogram.reestablishTotalCount();
m_bytesSent += other.m_bytesSent;
m_bytesReceived += other.m_bytesReceived;
}
void update(long roundTripTimeNanos, int clusterRoundTripTime, boolean abort, boolean error, boolean timeout) {
m_invocationsCompleted++;
if (abort) m_invocationAborts++;
if (error) m_invocationErrors++;
if (timeout) m_invocationTimeouts++;
m_roundTripTimeNanos += roundTripTimeNanos;
m_clusterRoundTripTime += clusterRoundTripTime;
//Round up to 50 microseconds. Average is still accurate and it doesn't change the percentile distribution
//above 50 micros
final long roundTripMicros = Math.max(LOWEST_TRACKABLE, TimeUnit.NANOSECONDS.toMicros(roundTripTimeNanos));
if (roundTripMicros > HIGHEST_TRACKABLE) {
m_latencyHistogram.recordValue(roundTripMicros % HIGHEST_TRACKABLE);
int count = (int)(roundTripMicros / HIGHEST_TRACKABLE);
for (int ii = 0; ii < count; ii++) {
m_latencyHistogram.recordValue(HIGHEST_TRACKABLE);
}
} else {
m_latencyHistogram.recordValue(roundTripMicros);
}
}
/**
* Get the name of the procedure this statistics instance applies to.
*
* @return The name of the procedure or the empty string if this stats
* instance covers more than one procedure.
*/
public String getProcedureName() {
return m_procName;
}
/**
* Get the {@link Date}-compatible timestamp that describes the start of
* the range of time this stats instance covers.
*
* @return A timestamp in milliseconds since the epoch.
*/
public long getStartTimestamp() {
return m_startTS;
}
/**
* Get the {@link Date}-compatible timestamp that describes the end of
* the range of time this stats instance covers.
*
* @return A timestamp in milliseconds since the epoch.
*/
public long getEndTimestamp() {
return m_endTS;
}
/**
* Get the number of milliseconds this stats instance covers.
*
* @return The number of milliseconds this stats instance covers.
*/
public long getDuration() {
// this value should never be MIN_VALUE by the time a user can call this
assert(m_endTS != Long.MIN_VALUE);
return m_endTS - m_startTS;
}
/**
* Get the id of the individual socket connection this statistics instance
* applies to. Note that hostname and port combos might not be unique,
* but connection ids will be.
*
* @return The id of the connection or -1 if this stats instance covers more
* than one connection.
*/
public long getConnectionId() {
return m_connectionId;
}
/**
* The hostname or IP (as string) of the connection this stats instance
* covers.
*
* @return The hostname or ip as string, or the empty string if this stats
* instance covers more than one connection.
*/
public String getHostname() {
return m_hostname;
}
/**
* The port number of the connection this stats instance covers.
*
* @return The port number, or -1 if this stats instance covers more than
* one connection.
*/
public int getPort() {
return m_port;
}
/**
* Get the number of transactions acknowledged by the VoltDB server(s)
* during the time period covered by this stats instance.
*
* @return The number of transactions completed.
*/
public long getInvocationsCompleted() {
return m_invocationsCompleted;
}
/**
* Get the number of transactions aborted by the VoltDB server(s)
* during the time period covered by this stats instance.
*
* @return The number of transactions aborted.
*/
public long getInvocationAborts() {
return m_invocationAborts;
}
/**
* Get the number of transactions failed by the VoltDB server(s)
* during the time period covered by this stats instance.
*
* @return The number of transactions that failed.
*/
public long getInvocationErrors() {
return m_invocationErrors;
}
/**
* Get the number of transactions timed out before being sent to or responded by VoltDB server(s)
* during the time period covered by this stats instance.
*
* @return The number of transactions that failed.
*/
public long getInvocationTimeouts() {
return m_invocationTimeouts;
}
/**
* Get the average latency in milliseconds for the time period
* covered by this stats instance. This is computed by summing the client-measured
* round trip times of all transactions and dividing by the competed
* invocation count.
*
* @return Average latency in milliseconds.
*/
public double getAverageLatency() {
if (m_invocationsCompleted == 0) return 0;
return (m_roundTripTimeNanos / (double)m_invocationsCompleted) / 1000000.0;
}
/**
* <p>Get the server-side average latency in milliseconds for the time period
* covered by this stats instance. This is computed by summing the server-reported
* latency times of all transactions and dividing by the competed invocation count.</p>
*
* <p>The server reported latency number measures the time from when a transaction
* is accepted from the socket to when the response is written back. It will be higher
* for multi-node clusters, for clusters with too much load, or for clusters with longer
* running transactions.</p>
*
* @return Average latency in milliseconds.
*/
public double getAverageInternalLatency() {
if (m_invocationsCompleted == 0) return 0;
return (double)m_clusterRoundTripTime / (double)m_invocationsCompleted;
}
/**
* <p>Get the raw buckets used for latency tracking in 1ms increments. For example, if
* a transaction returns in 3.2ms, then the array at index 3 will be incremented by
* one. It can be thought of as a histogram of latencies. It has
* {@link #ONE_MS_BUCKET_COUNT} buckets, for a range of
* <code>ONE_MS_BUCKET_COUNT x 1ms</code></p>
*
* <p>This raw data, along with other bucket sets of different granularity, is used to
* support the {@link #kPercentileLatency(double)} method. This returns a copy of the
* internal array so it is threadsafe and mutable if you wish. Note that the buckets
*
* @return An array containing counts for different latency values.
*/
public long[] getLatencyBucketsBy1ms() {
final long buckets[] = new long[ONE_MS_BUCKET_COUNT];
final HistogramData data = m_latencyHistogram.getHistogramData();
for (int ii = 0; ii < ONE_MS_BUCKET_COUNT; ii++) {
buckets[ii] = data.getCountBetweenValues(ii * 1000, (ii + 1) * 1000);
}
return buckets;
}
/**
* <p>Get the raw buckets used for latency tracking in 10ms increments. For example, if
* a transaction returns in 42ms, then the array at index 4 will be incremented by
* one. It can be thought of as a histogram of latencies. It has
* {@link #TEN_MS_BUCKET_COUNT} buckets, for a range of
* <code>TEN_MS_BUCKET_COUNT x 10ms</code>.</p>
*
* <p>This raw data, along with other bucket sets of different granularity, is used to
* support the {@link #kPercentileLatency(double)} method. This returns a copy of the
* internal array so it is threadsafe and mutable if you wish. Note that the buckets
*
* @return An array containing counts for different latency values.
*/
public long[] getLatencyBucketsBy10ms() {
final long buckets[] = new long[TEN_MS_BUCKET_COUNT];
final HistogramData data = m_latencyHistogram.getHistogramData();
for (int ii = 0; ii < TEN_MS_BUCKET_COUNT; ii++) {
buckets[ii] = data.getCountBetweenValues(ii * 10000, (ii + 1) * 10000);
}
return buckets;
}
/**
* <p>Get the raw buckets used for latency tracking in 1ms increments. For example, if
* a transaction returns in 3.2ms, then the array at index 3 will be incremented by
* one. It can be thought of as a histogram of latencies. It has
* {@link #HUNDRED_MS_BUCKET_COUNT} buckets, for a range of
* <code>HUNDRED_MS_BUCKET_COUNT x 100ms</code>.</p>
*
* <p>This raw data, along with other bucket sets of different granularity, is used to
* support the {@link #kPercentileLatency(double)} method. This returns a copy of the
* internal array so it is threadsafe and mutable if you wish. Note that the buckets
*
* @return An array containing counts for different latency values.
*/
public long[] getLatencyBucketsBy100ms() {
final long buckets[] = new long[HUNDRED_MS_BUCKET_COUNT];
final HistogramData data = m_latencyHistogram.getHistogramData();
for (int ii = 0; ii < HUNDRED_MS_BUCKET_COUNT; ii++) {
buckets[ii] = data.getCountBetweenValues(ii * 100000, (ii + 1) * 100000);
}
return buckets;
}
/**
* Return the number of bytes written over the network during the time period
* covered by this stats instance. This can be specific to a connection or global,
* but is not recorded for per-procedure statistics.
*
* @return The number of bytes written or 0 for per-procedure statistics.
*/
public long getBytesWritten() {
return m_bytesSent;
}
/**
* Return the number of bytes read from the network during the time period
* covered by this stats instance. This can be specific to a connection or global,
* but is not recorded for per-procedure statistics.
*
* @return The number of bytes read or 0 for per-procedure statistics.
*/
public long getBytesRead() {
return m_bytesReceived;
}
/**
* <p>Using the latency bucketing statistics gathered by the client, estimate
* the k-percentile latency value for the time period covered by this stats
* instance.</p>
*
* <p>For example, k=.5 returns an estimate of the median. k=0 returns the
* minimum. k=1.0 returns the maximum.</p>
*
* <p>Latencies longer than the highest trackable value (10 seconds) will be
* reported as multiple entries at the highest trackable value</p>
*
* @param percentile A floating point number between 0.0 and 1.0.
* @return An estimate of k-percentile latency in whole milliseconds.
*/
public int kPercentileLatency(double percentile) {
final HistogramData data = m_latencyHistogram.getHistogramData();
if (data.getTotalCount() == 0) return 0;
percentile = Math.max(0.0, percentile);
//Convert from micros to millis for return value, round to nearest integer
return (int) (Math.round(data.getValueAtPercentile(percentile * 100.0)) / 1000.0);
}
/**
* <p>Using the latency bucketing statistics gathered by the client, estimate
* the k-percentile latency value for the time period covered by this stats
* instance.</p>
*
* <p>For example, k=.5 returns an estimate of the median. k=0 returns the
* minimum. k=1.0 returns the maximum.</p>
*
* <p>Latencies longer than the highest trackable value (10 seconds) will be
* reported as multiple entries at the highest trackable value</p>
*
* @param percentile A floating point number between 0.0 and 1.0.
* @return An estimate of k-percentile latency in whole milliseconds.
*/
public double kPercentileLatencyAsDouble(double percentile) {
final HistogramData data = m_latencyHistogram.getHistogramData();
if (data.getTotalCount() == 0) return 0.0;
percentile = Math.max(0.0, percentile);
//Convert from micros to millis for return value, enjoy having precision
return data.getValueAtPercentile(percentile * 100.0) / 1000.0;
}
/**
* Generate a human-readable report of latencies in the form of a histogram. Latency is
* in milliseconds
*
* @return String containing human-readable report.
*/
public String latencyHistoReport() {
ByteArrayOutputStream baos= new ByteArrayOutputStream();
PrintStream pw = null;
try {
pw = new PrintStream(baos, false, Charsets.UTF_8.name());
} catch (UnsupportedEncodingException e) {
Throwables.propagate(e);
}
//Get a latency report in milliseconds
m_latencyHistogram.getHistogramData().outputPercentileDistributionVolt(pw, 1, 1000.0);
return new String(baos.toByteArray(), Charsets.UTF_8);
}
/**
* <p>Return an average throughput of transactions acknowledged per
* second for the duration covered by this stats instance.</p>
*
* <p>Essentially <code>{@link #getInvocationsCompleted()} divided by
* ({@link #getStartTimestamp()} - {@link #getEndTimestamp()} / 1000.0)</code>,
* but with additional safety checks.</p>
*
* @return Throughput in transactions acknowledged per second.
*/
public long getTxnThroughput() {
assert(m_startTS != Long.MAX_VALUE);
assert(m_endTS != Long.MIN_VALUE);
if (m_invocationsCompleted == 0) return 0;
if (m_endTS < m_startTS) {
m_endTS = m_startTS + 1; // 1 ms duration is sorta cheatin'
}
long durationMs = m_endTS - m_startTS;
return (long) (m_invocationsCompleted / (durationMs / 1000.0));
}
/**
* <p>Return an average throughput of bytes sent per second over the
* network for the duration covered by this stats instance.</p>
*
* <p>Essentially <code>{@link #getBytesWritten()} divided by
* ({@link #getStartTimestamp()} - {@link #getEndTimestamp()} / 1000.0)</code>,
* but with additional safety checks.</p>
*
* @return Throughput in bytes sent per second.
*/
public long getIOWriteThroughput() {
assert(m_startTS != Long.MAX_VALUE);
assert(m_endTS != Long.MIN_VALUE);
if (m_bytesSent == 0) return 0;
if (m_endTS < m_startTS) {
m_endTS = m_startTS + 1; // 1 ms duration is sorta cheatin'
}
long durationMs = m_endTS - m_startTS;
return (long) (m_bytesSent / (durationMs / 1000.0));
}
/**
* <p>Return an average throughput of bytes read per second from the
* network for the duration covered by this stats instance.</p>
*
* <p>Essentially <code>{@link #getBytesRead()} divided by
* ({@link #getStartTimestamp()} - {@link #getEndTimestamp()} / 1000.0)</code>,
* but with additional safety checks.</p>
*
* @return Throughput in bytes read per second.
*/
public long getIOReadThroughput() {
assert(m_startTS != Long.MAX_VALUE);
assert(m_endTS != Long.MIN_VALUE);
if (m_bytesReceived == 0) return 0;
if (m_endTS < m_startTS) {
m_endTS = m_startTS + 1; // 1 ms duration is sorta cheatin'
}
long durationMs = m_endTS - m_startTS;
return (long) (m_bytesReceived / (durationMs / 1000.0));
}
/* (non-Javadoc)
* @see java.lang.Object#toString()
*/
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append(String.format("Start %s - End %s - Procedure: %s - ConnectionId: %d {\n",
new Date(m_startTS).toString(), new Date(m_endTS).toString(), m_procName, m_connectionId));
sb.append(String.format(" hostname: %s:%d\n",
m_hostname, m_port));
sb.append(String.format(" invocations completed/aborted/errors/timeouts: %d/%d/%d/%d\n",
m_invocationsCompleted, m_invocationAborts, m_invocationErrors, m_invocationTimeouts));
if (m_invocationsCompleted > 0) {
sb.append(String.format(" avg latency client/internal: %.2f/%d\n",
(m_roundTripTimeNanos / (double)m_invocationsCompleted) / 1000000.0, m_clusterRoundTripTime / m_invocationsCompleted));
sb.append(latencyHistoReport()).append("\n");
}
return sb.toString();
}
/* (non-Javadoc)
* @see java.lang.Object#clone()
*/
@Override
protected Object clone() {
return new ClientStats(this);
}
}