/**
* Copyright 2011-2012 Akiban Technologies, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.persistit;
import java.lang.ref.WeakReference;
import java.util.Comparator;
import java.util.List;
import java.util.concurrent.atomic.AtomicLong;
import com.persistit.exception.PersistitException;
import com.persistit.exception.PersistitInterruptedException;
/**
* <p>
* An Accumulator accumulates statistical information in the MVCC Transaction
* environment without creating write-write dependency conflicts. Subclasses
* include <code>SumAccumulator</code>, <code>MinAccumulator</code>,
* <code>MaxAccumulator</code> and <code>SeqAccumulator</code> which compute the
* sum, minimum and maximum values of contributions by individual transactions.
* (See <a href="#_SeqAccumulator">below</a> for semantics of the
* <code>SeqAccummulator</code>.) Each contribution is accounted for separately
* as a <code>Delta</code> instance until the transaction is either committed or
* aborted and there are no other concurrently executing transactions that
* started before the commit timestamp. This mechanism is designed to provide a
* "snapshot" view of the Accumulator that is consistent with the snapshot view
* of the database.
* </p>
* <p>
* In more detail: each type of Accumulator has an update method that may only
* be invoked within the scope of a transaction T. That update is not visible to
* any other transaction until T commits. Moreover, any other concurrently
* executing transaction having a start timestamp less than T's commit timestamp
* does not see the results of the update. To accomplish this, the state of the
* Accumulator visible within a transaction is computed by determining which
* updates are visible and aggregating them on demand.
* </p>
* <p>
* The updates invoked on committed Transactions are recorded in the Journal and
* reapplied during recovery to reproduce an accurate version of the Accumulator
* state when Persistit starts up. There can be at most 64 Accumulators per
* Tree. A snapshot value of each Accumulator is stored once per checkpoint.
* Checkpoint snapshot values are held in the directory tree of the volume
* containing the Tree.
* </p>
* <p>
* <h3>Types of Accumulators</h3>
* The following defines intended use cases for the various types of
* accumulators:
* <dl>
* <dt>{@link com.persistit.Accumulator.Type#SUM}</dt>
* <dd>Row count, total size, sums of various other characteristics</dd>
* <dt>{@link com.persistit.Accumulator.Type#MAX}</dt>
* <dd>Maximum value</dd>
* <dt>{@link com.persistit.Accumulator.Type#MIN}</dt>
* <dd>Minimum value</dd>
* <dt>{@link com.persistit.Accumulator.Type#SEQ}</dt>
* <dd>Sequence number generation, e.g., auto-increment or internal primary key
* assignment</dd>
* </dl>
* </p>
* <p>
* <a name="_SnapshotValue" />
* <h3>Snapshot and Live Values</h3>
* Each Accumulator type supplies both a "snapshot" value and a "live" value.
* The snapshot value is computed as described above by selectively applying
* only those updates visible to the transaction. The live value, however, is
* simply the result of applying each update operation atomically to a long
* value maintained by the Accumulator. For example, if ten transactions
* increment a SUM accumulator by one, and then five commit of them and five and
* roll back, the live value is nonetheless increased by ten. Thus the live
* value is only an estimate. Its value is cheap to acquire but not
* transactionally accurate.
* </p>
* <p>
* <a name="_SeqAccumulator" />
* <h3>SeqAccumulator</h3>
* The <code>SeqAccumulator</code> is a combination of
* <code>SumAccumulator</code> and <code>MaxAccumulator</code>. When the
* {@link com.persistit.Accumulator.SeqAccumulator#allocate()} method is called,
* the Accumulator's <code>live</code> value is atomically incremented and the
* resulting value is returned. In addition, a <code>Delta</code> holding the
* resulting sum as a proposed minimum value is added to the transaction. These
* semantics guarantee that every value returned by a SeqAccumulator (within a
* transaction that actually commits) is unique, and that upon recovery after a
* normal shutdown or crash, the first value returned will be larger than the
* maximum value assigned by any transaction that committed successfully before
* the shutdown. Note that a transaction that allocates a value and then aborts
* leaves a gap in the numerical sequence.
* </p>
*
* @author peter
*/
public abstract class Accumulator {
public static enum Type {
SUM, MAX, MIN, SEQ
};
/**
* A Comparator used to sort Accumulators when writing checkpoints
*/
final static Comparator<Accumulator> SORT_COMPARATOR = new Comparator<Accumulator>() {
@Override
public int compare(final Accumulator a, final Accumulator b) {
final String treeNameA = a.getTree() == null ? "" : a.getTree().getName();
final String treeNameB = b.getTree() == null ? "" : b.getTree().getName();
final int compare = treeNameA.compareTo(treeNameB);
if (compare != 0) {
return compare;
} else {
return a.getIndex() - b.getIndex();
}
}
};
// Note: architectural limit of 255 for JournalRecord encoding
final static int MAX_INDEX = 63;
final static int MAX_SERIALIZED_SIZE = Tree.MAX_SERIALIZED_SIZE + 24;
protected final Tree _tree;
private final int _index;
private final TransactionIndex _transactionIndex;
private final AtomicLong _liveValue = new AtomicLong();
/*
* Check-pointed value read during recovery.
*/
private volatile long _baseValue;
/*
* Snapshot value at the most recent checkpoint
*/
private volatile long _checkpointValue;
/*
* Timestamp of the most recent checkpoint for which the checkpointValue has
* been computed
*/
private volatile long _checkpointTimestamp;
/*
* Temporary used only during the computation of checkpoint values.
*/
private long _checkpointTemp;
/*
* Accumulated value per TransactionIndex bucket. This number represents the
* accumulation of all delta values that have been coalesced and are no
* longer present in live TransactionStatus objects. This array has one
* element per TransactionIndexBucket.
*/
private final long[] _bucketValues;
/*
* Object held on the accumulators list in {@link Persistit}. An
* AccumulatorRef is carefully crafted to keep a strong reference to the
* Accumulator when needed a WeakReference used to detect that the there are
* no other references to the Accumulator so that the AccumulatorRef can be
* removed from the accumulators list.
*/
final AccumulatorRef _accumulatorRef;
/**
* An Accumulator that computes a sum
*/
public final static class SumAccumulator extends Accumulator {
private SumAccumulator(final Tree tree, final int index, final long baseValue,
final TransactionIndex transactionIndex) {
super(tree, index, baseValue, transactionIndex);
}
@Override
long applyValue(final long a, final long b) {
return a + b;
}
@Override
long updateValue(final long a, final long b) {
if (b > 0 && a + b < a || b < 0 && a + b > a) {
throw new IllegalArgumentException("Accumulator value overflow: (" + a + "+" + b + ")");
}
return applyValue(a, b);
}
@Override
long selectValue(final long value, final long updated) {
return value;
}
@Override
Type getType() {
return Type.SUM;
}
/**
* <p>
* Add the supplied value to this <code>SumAccumulator</code>. The
* contribution is immediately accumulated into the live value, and it
* is also posted with a <code>Delta</code>instance to the supplied
* {@link Transaction}. This method may be called only within the scope
* of an active <code>Transaction</code>.
* </p>
*
* @param value
* The delta value
*/
public void add(final long value) {
final Transaction txn = _tree.getPersistit().getTransaction();
txn.checkActive();
update(value, txn.getTransactionStatus(), txn.getStep());
}
}
/**
* An Accumulator that computes a minimum value
*/
public final static class MinAccumulator extends Accumulator {
private MinAccumulator(final Tree tree, final int index, final long baseValue,
final TransactionIndex transactionIndex) {
super(tree, index, baseValue, transactionIndex);
}
@Override
long applyValue(final long a, final long b) {
return Math.min(a, b);
}
@Override
long updateValue(final long a, final long b) {
return applyValue(a, b);
}
@Override
long selectValue(final long value, final long updated) {
return value;
}
@Override
Type getType() {
return Type.MIN;
}
/**
* <p>
* Modify the Accumulator so that its value is no less than the supplied
* value. The contribution is immediately accumulated into the live
* value, and it is also posted with a <code>Delta</code> instance to
* the supplied {@link Transaction}. This method may be called only
* within the scope of an active <code>Transaction</code>.
* </p>
*
* @param min
* The candidate minimum value
*/
public void minimum(final long min) {
final Transaction txn = _tree.getPersistit().getTransaction();
txn.checkActive();
update(min, txn.getTransactionStatus(), txn.getStep());
}
}
/**
* An Accumulator that computes a maximum value
*/
public final static class MaxAccumulator extends Accumulator {
private MaxAccumulator(final Tree tree, final int index, final long baseValue,
final TransactionIndex transactionIndex) {
super(tree, index, baseValue, transactionIndex);
}
@Override
long applyValue(final long a, final long b) {
return Math.max(a, b);
}
@Override
long updateValue(final long a, final long b) {
return applyValue(a, b);
}
@Override
long selectValue(final long value, final long updated) {
return value;
}
@Override
Type getType() {
return Type.MAX;
}
/**
* <p>
* Modify the Accumulator so that its value is no greater than the
* supplied value. The contribution is immediately accumulated into the
* live value, and it is also posted with a <code>Delta</code> instance
* to the supplied {@link Transaction}. This method may be called only
* within the scope of an active <code>Transaction</code>.
* </p>
*
* @param max
* The candidate maximum value
*/
public void maximum(final long max) {
final Transaction txn = _tree.getPersistit().getTransaction();
txn.checkActive();
update(max, txn.getTransactionStatus(), txn.getStep());
}
}
/**
* An Accumulator used to generate unique ID values. The
* {@link com.persistit.Accumulator.SeqAccumulator#allocate()} method
* generates a new, unique long value. The transaction records this value as
* a candidate for maximum value of the Accumulator. On recovery, the
* highest such value ever allocated by a committed transaction is
* recovered, and so after recovery the next allocated ID value will be
* larger than any previously consumed.
*/
public final static class SeqAccumulator extends Accumulator {
private SeqAccumulator(final Tree tree, final int index, final long baseValue,
final TransactionIndex transactionIndex) {
super(tree, index, baseValue, transactionIndex);
}
@Override
long applyValue(final long a, final long b) {
return Math.max(a, b);
}
@Override
long updateValue(final long a, final long b) {
if (b <= 0) {
throw new IllegalArgumentException("Update value must be positive");
}
if (a + b < a) {
throw new IllegalArgumentException("Accumulator value overflow: (" + a + "+" + b + ")");
}
return a + b;
}
@Override
long selectValue(final long value, final long updated) {
return updated;
}
@Override
Type getType() {
return Type.SEQ;
}
/**
* <p>
* Allocate a sequence number. The value returned is guaranteed to be
* unique for the lifetime of the database. Values are usually assigned
* as consecutive integers, but in some cases there may be gaps in the
* sequence.
* </p>
* <p>
* The value returned is equal to the <a href="#_SnapshotValue">live
* value</a> the instant it is updated. However, note that the following
* code is <em>not</em> guaranteed to generate a unique value:
* <code><pre>
* seqAccumulator.allocate();
* long id = seqAccumulator.getLiveValue();
* </pre></code>while the following is: <code><pre>
* long id = seqAccumulator.allocate();
* </p>
*
* @return the updated live value
*/
public long allocate() {
final Transaction txn = _tree.getPersistit().getTransaction();
return update(1, txn.getTransactionStatus(), txn.getStep());
}
}
final static class Delta {
Accumulator _accumulator;
int _step;
long _value;
Delta _next;
Accumulator getAccumulator() {
return _accumulator;
}
int getStep() {
return _step;
}
long getValue() {
return _value;
}
void setAccumulator(final Accumulator accumulator) {
_accumulator = accumulator;
}
void setValue(final long newValue) {
_value = newValue;
}
void setStep(final int step) {
_step = step;
}
Delta getNext() {
return _next;
}
void setNext(final Delta delta) {
_next = delta;
}
void merge(final long value) {
_value = _accumulator.applyValue(_value, value);
}
boolean canMerge(final Accumulator accumulator, final int step) {
return (_accumulator == accumulator) && (_step == step);
}
@Override
public String toString() {
return String.format("Delta(type=%s value=%,d%s)", _accumulator == null ? "Null" : _accumulator.getType()
.toString(), _value, _next == null ? "" : "*");
}
}
/**
* <p>
* Device that maintains a strong reference to the Accumulator when it
* contains updates and needs to be checkpointed, and a weak reference
* otherwise. The Persistit instance contains a collection of
* <code>AccumulatorRef</code> instances; these are used when determining
* which accumulators to include in the checkpoint operation. Once an
* <code>Accumulator</code>'s checkpoint has been written, the strong
* reference is removed until there is a subsequent update.
* </p>
* <p>
* Scenario: a process creates a new Tree, creates an Accumulator and then
* releases all references to the Tree. Eventually the Tree and the new
* Accumulator should be garbage collected; however, the Accumulator must be
* retained until any values it has accumulated have been written as part of
* a checkpoint. The dual references in this class are intended to support
* this behavior; the _checkpointRef field is null whenever there are no
* changes to checkpoint; the _weakRef is used to detect when it is
* permissible to remove the <code>AccumulatorRef</code> from the Persistit
* instance's accumulator set.
* <p>
*/
final static class AccumulatorRef {
final WeakReference<Accumulator> _weakRef;
final AtomicLong _latestUpdate = new AtomicLong();
volatile Accumulator _checkpointRef;
AccumulatorRef(final Accumulator acc) {
_weakRef = new WeakReference<Accumulator>(acc);
}
Accumulator takeCheckpointRef(final long timestamp) {
final Accumulator result = _checkpointRef;
if (timestamp > _latestUpdate.get()) {
_checkpointRef = null;
if (timestamp <= _latestUpdate.get()) {
_checkpointRef = result;
}
}
return result;
}
void checkpointNeeded(final Accumulator acc, final long timestamp) {
while (true) {
final long latest = _latestUpdate.get();
if (latest > timestamp) {
return;
}
if (_latestUpdate.compareAndSet(latest, timestamp)) {
break;
}
}
_checkpointRef = acc;
}
boolean isLive() {
return _weakRef.get() != null || _checkpointRef != null;
}
}
private Accumulator(final Tree tree, final int index, final long baseValue, final TransactionIndex transactionIndex) {
if (index < 0 || index > MAX_INDEX) {
throw new IllegalArgumentException("Index out of bounds: " + index);
}
_tree = tree;
_index = index;
_baseValue = baseValue;
_checkpointValue = baseValue;
_liveValue.set(baseValue);
_transactionIndex = transactionIndex;
_bucketValues = new long[transactionIndex.getHashTableSize()];
_accumulatorRef = new AccumulatorRef(this);
}
/**
* Apply the value from a <code>Delta</code> to an aggregate. This method
* must be commutative, that is, apply(x, y) must be equal to apply(y, x).
* This method is called when computing a snapshot value and when
* aggregating <code>Delta</code> instances.
*
* @param a
* @param b
* @return the result of the commutative operation on a and b
*/
abstract long applyValue(long a, long b);
/**
* Compute a live updated value. For <code>SumAccumulator</code>,
* <code>MaxAccumlator</code> and <code>MinAccumulator</code> this method
* returns the same value as {@link #applyValue(long, long)}. For
* <code>SeqAccumulator</code> update(a, b) returns a + b (computed
* atomically) whereas apply(a, b) return Math.max(a, b).
*
* @param a
* @param b
* @return the result of the commutative operation on a and b
*/
abstract long updateValue(long a, long b);
/**
* @param value
* @param updated
* return One of the supplied parameters as the value to be held
* in a <code>Delta</code>.
*
*/
abstract long selectValue(long value, long updated);
abstract Type getType();
void aggregate(final int hashIndex, final Delta delta) {
_bucketValues[hashIndex] = applyValue(_bucketValues[hashIndex], delta.getValue());
}
AccumulatorRef getAccumulatorRef() {
return _accumulatorRef;
}
void checkpointNeeded(final long timestamp) {
_accumulatorRef.checkpointNeeded(this, timestamp);
}
long getBucketValue(final int hashIndex) {
return _bucketValues[hashIndex];
}
void setCheckpointValueAndTimestamp(final long value, final long timestamp) {
_checkpointValue = value;
_checkpointTimestamp = timestamp;
}
long getCheckpointValue() {
return _checkpointValue;
}
long getCheckpointTimestamp() {
return _checkpointTimestamp;
}
void setCheckpointTemp(final long value) {
_checkpointTemp = value;
}
long getCheckpointTemp() {
return _checkpointTemp;
}
/**
* @param type
* Indicates which kind of <code>Accumulator</code> to return
* @param tree
* The {@link Tree} to which this Accumulator will belong
* @param index
* An index number by which this Accumulator can be accessed.
* @param baseValue
* a value that accurately reflects the contributions of all
* transactions that committed before the baseTimestamp
* @param transactionIndex
* the <code>TransactionIndex</code> component
* @return an Accumulator of the specified type
*
*/
static Accumulator accumulator(final Type type, final Tree tree, final int index, final long baseValue,
final TransactionIndex transactionIndex) {
switch (type) {
case SUM:
return new SumAccumulator(tree, index, baseValue, transactionIndex);
case MAX:
return new MaxAccumulator(tree, index, baseValue, transactionIndex);
case MIN:
return new MinAccumulator(tree, index, baseValue, transactionIndex);
case SEQ:
return new SeqAccumulator(tree, index, baseValue, transactionIndex);
default:
throw new IllegalArgumentException("No such type " + type);
}
}
long getBaseValue() {
return _baseValue;
}
/**
* Non-transactional view aggregating all updates applied to this
* Accumulator, whether committed or not. See <a
* href="#_SnapshotValue">Snapshot and Live Values</a>.
*
* @return the live value
*/
public long getLiveValue() {
return _liveValue.get();
}
/**
* Compute the value computed by accumulating values contributed by (a) all
* transactions having commit timestamps less than or equal to the specified
* <code>transaction</code>'s start timestamp, and (b) all operations
* performed by the specified transaction having step numbers equal to or
* less than the <code>transaction</code>'s current step. See <a
* href="#_SnapshotValue">Snapshot and Live Values</a>.
*
* @return the computed snapshot value
* @throws InterruptedException
*/
public long getSnapshotValue() throws PersistitInterruptedException {
final Transaction txn = _tree.getPersistit().getTransaction();
txn.checkActive();
return getSnapshotValue(txn.getStartTimestamp(), txn.getStep());
}
/**
* @param timestamp
* @param step
* @return The value computed by accumulating values contributed by (a) all
* transactions having commit timestamps less than or equal to
* <code>timestamp</code>, and (b) all operations performed by the
* current transaction having step numbers less than
* <code>step</code>.
* @throws InterruptedException
*/
long getSnapshotValue(final long timestamp, final int step) throws PersistitInterruptedException {
try {
return _transactionIndex.getAccumulatorSnapshot(this, timestamp, step, _baseValue);
} catch (final InterruptedException ie) {
throw new PersistitInterruptedException(ie);
}
}
/**
* Apply an update to the base value. This method is used only during
* recovery processing to apply Deltas from recovered committed
* transactions.
*
* @param value
*/
void updateBaseValue(final long value, final long commitTimestamp) {
_baseValue = applyValue(_baseValue, value);
_liveValue.set(_baseValue);
/*
* This method is called during recovery processing to handle a delta
* operation that was part of a transaction that committed after the
* keystone checkpoint. That update requires the accumulator to be saved
* on the next checkpoint.
*/
checkpointNeeded(commitTimestamp);
}
/**
* Update the Accumulator by contributing a value. The contribution is
* immediately accumulated into the live value, and it is also posted with a
* {@link Delta} instance to the supplied {@link Transaction}.
*
* @param value
* The delta value
* @param status
* The TransactionStatus of the transaction it applies to
* @param step
* The step at which the value is applied
*/
long update(final long value, final TransactionStatus status, final int step) {
if (status.getTc() != TransactionStatus.UNCOMMITTED) {
throw new IllegalStateException("Transaction has already committed or aborted");
}
/*
* Update the live value using compare-and-set
*/
long previous;
long updated;
for (;;) {
previous = _liveValue.get();
updated = updateValue(previous, value);
if (_liveValue.compareAndSet(previous, updated)) {
break;
}
}
/*
* Add a Delta to the TransactionStatus
*/
final long selectedValue = selectValue(value, updated);
_transactionIndex.addOrCombineDelta(status, this, step, selectedValue);
return updated;
}
Tree getTree() {
return _tree;
}
int getIndex() {
return _index;
}
@Override
/**
* @return a formatted report showing the Tree, index, type, and accumulated
* values for this <code>Accumulator</code>.
*/
public String toString() {
return String.format("Accumulator(tree=%s index=%d type=%s base=%,d live=%,d)",
_tree == null ? "null" : _tree.getName(), _index, getType(), _baseValue, _liveValue.get());
}
void store(final Value value) {
value.put(_tree == null ? "" : _tree.getName());
value.put(_index);
value.put(getType().toString());
value.put(getCheckpointValue());
}
static AccumulatorState getAccumulatorState(final Tree tree, final int index) throws PersistitException {
final Exchange exchange = tree.getVolume().getStructure().directoryExchange();
exchange.clear().append(VolumeStructure.DIRECTORY_TREE_NAME).append(VolumeStructure.TREE_ACCUMULATOR)
.append(tree.getName()).append(index).fetch();
if (exchange.getValue().isDefined()) {
return (AccumulatorState) exchange.getValue().get();
} else {
return null;
}
}
static void saveAccumulatorCheckpointValues(final List<Accumulator> list) throws PersistitException {
Exchange exchange = null;
for (final Accumulator accumulator : list) {
final Volume volume = accumulator.getTree().getVolume();
if (exchange == null || !exchange.getVolume().equals(volume)) {
exchange = volume.getStructure().accumulatorExchange();
}
exchange.clear().append(VolumeStructure.DIRECTORY_TREE_NAME).append(VolumeStructure.TREE_ACCUMULATOR)
.append(accumulator.getTree().getName()).append(accumulator.getIndex());
exchange.getValue().put(accumulator);
exchange.store();
}
}
}