/**
* Copyright 2013 David Rusek <dave dot rusek at gmail dot com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.robotninjas.barge.log;
import com.google.common.base.Function;
import com.google.common.base.Objects;
import com.google.common.base.Optional;
import com.google.common.collect.FluentIterable;
import com.google.common.collect.Maps;
import com.google.common.util.concurrent.FutureCallback;
import com.google.common.util.concurrent.Futures;
import com.google.common.util.concurrent.ListenableFuture;
import com.google.common.util.concurrent.SettableFuture;
import com.google.inject.Inject;
import journal.io.api.Journal;
import org.robotninjas.barge.ClusterConfig;
import org.robotninjas.barge.Replica;
import org.robotninjas.barge.api.AppendEntries;
import org.robotninjas.barge.api.Entry;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.slf4j.MDC;
import javax.annotation.Nonnegative;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import javax.annotation.concurrent.NotThreadSafe;
import java.nio.ByteBuffer;
import java.util.List;
import java.util.TreeMap;
import java.util.concurrent.ConcurrentMap;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.base.Throwables.propagate;
import static com.google.common.collect.Lists.newArrayList;
import static java.util.Collections.unmodifiableList;
@NotThreadSafe
public class RaftLog {
private static final Logger LOGGER = LoggerFactory.getLogger(RaftLog.class);
private static final byte[] EMPTY = new byte[0];
private static final Entry SENTINEL = Entry.newBuilder().setCommand(EMPTY).setTerm(0).build();
private final TreeMap<Long, RaftJournal.Mark> log = Maps.newTreeMap();
private final ClusterConfig config;
private final StateMachineProxy stateMachine;
private final RaftJournal journal;
private final ConcurrentMap<Object, SettableFuture<Object>> operationResults = Maps.newConcurrentMap();
private volatile long lastLogIndex = 0;
private volatile long lastLogTerm = 0;
private volatile long currentTerm = 0;
private volatile Optional<Replica> votedFor = Optional.absent();
private volatile long commitIndex = 0;
private volatile long lastApplied = 0;
@Inject
RaftLog(@Nonnull Journal journal, @Nonnull ClusterConfig config,
@Nonnull StateMachineProxy stateMachine) {
this.journal = new RaftJournal(checkNotNull(journal), checkNotNull(config));
this.config = checkNotNull(config);
this.stateMachine = checkNotNull(stateMachine);
}
public void load() {
LOGGER.info("Replaying log");
journal.replay(new RaftJournal.Visitor() {
@Override
public void term(RaftJournal.Mark mark, long term) {
currentTerm = Math.max(currentTerm, term);
}
@Override
public void vote(RaftJournal.Mark mark, Optional<Replica> vote) {
votedFor = vote;
}
@Override
public void commit(RaftJournal.Mark mark, long commit) {
commitIndex = Math.max(commitIndex, commit);
}
@Override
public void append(RaftJournal.Mark mark, Entry entry, long index) {
lastLogIndex = Math.max(index, lastLogIndex);
lastLogTerm = Math.max(entry.getTerm(), lastLogTerm);
log.put(index, mark);
}
});
fireComitted();
LOGGER.info("Finished replaying log lastIndex {}, currentTerm {}, commitIndex {}, lastVotedFor {}",
lastLogIndex, currentTerm, commitIndex, votedFor.orNull());
}
private SettableFuture<Object> storeEntry(final long index, @Nonnull Entry entry) {
LOGGER.debug("{} storing {}", config.local(), entry);
RaftJournal.Mark mark = journal.appendEntry(entry, index);
log.put(index, mark);
SettableFuture<Object> result = SettableFuture.create();
operationResults.put(index, result);
return result;
}
public ListenableFuture<Object> append(@Nonnull byte[] operation) {
long index = ++lastLogIndex;
lastLogTerm = currentTerm;
Entry entry =
Entry.newBuilder()
.setCommand(operation)
.setTerm(currentTerm)
.build();
return storeEntry(index, entry);
}
public boolean append(@Nonnull AppendEntries appendEntries) {
final long prevLogIndex = appendEntries.getPrevLogIndex();
final long prevLogTerm = appendEntries.getPrevLogTerm();
final List<Entry> entries = appendEntries.getEntriesList();
if (log.containsKey(prevLogIndex)) {
RaftJournal.Mark previousMark = log.get(prevLogIndex);
Entry previousEntry = journal.get(previousMark);
if ((prevLogIndex > 0) && previousEntry.getTerm() != prevLogTerm) {
LOGGER.debug("Append prevLogIndex {} prevLogTerm {}", prevLogIndex, prevLogTerm);
return false;
}
journal.truncateTail(previousMark);
log.tailMap(prevLogIndex, false).clear();
}
lastLogIndex = prevLogIndex;
for (Entry entry : entries) {
storeEntry(++lastLogIndex, entry);
lastLogTerm = entry.getTerm();
}
return true;
}
@Nonnull
public GetEntriesResult getEntriesFrom(@Nonnegative long beginningIndex, @Nonnegative int max) {
checkArgument(beginningIndex >= 0);
long previousIndex = beginningIndex - 1;
Entry previous = previousIndex <= 0 ? SENTINEL : journal.get(log.get(previousIndex));
Iterable<Entry> entries = FluentIterable
.from(log.tailMap(beginningIndex).values())
.limit(max)
.transform(new Function<RaftJournal.Mark, Entry>() {
@Nullable
@Override
public Entry apply(@Nullable RaftJournal.Mark input) {
return journal.get(input);
}
});
return new GetEntriesResult(previous.getTerm(), previousIndex, entries);
}
void fireComitted() {
try {
for (long i = lastApplied + 1; i <= Math.min(commitIndex, lastLogIndex); ++i, ++lastApplied) {
Entry entry = journal.get(log.get(i));
byte[] rawCommand = entry.getCommand();
final ByteBuffer operation = ByteBuffer.wrap(rawCommand).asReadOnlyBuffer();
ListenableFuture<Object> result = stateMachine.dispatchOperation(operation);
final SettableFuture<Object> returnedResult = operationResults.remove(i);
// returnedResult may be null on log replay
if (returnedResult != null) {
Futures.addCallback(result, new PromiseBridge<Object>(returnedResult));
}
}
} catch (Exception e) {
throw propagate(e);
}
}
public long lastLogIndex() {
return lastLogIndex;
}
public long lastLogTerm() {
return lastLogTerm;
}
public long commitIndex() {
return commitIndex;
}
public ClusterConfig config() {
return config;
}
public void commitIndex(long index) {
commitIndex = index;
journal.appendCommit(index);
fireComitted();
}
public long currentTerm() {
return currentTerm;
}
public void currentTerm(@Nonnegative long term) {
checkArgument(term >= 0);
MDC.put("term", Long.toString(term));
LOGGER.debug("New term {}", term);
currentTerm = term;
votedFor = Optional.absent();
journal.appendTerm(term);
}
@Nonnull
public Optional<Replica> votedFor() {
return votedFor;
}
public void votedFor(@Nonnull Optional<Replica> vote) {
LOGGER.debug("Voting for {}", vote.orNull());
votedFor = vote;
journal.appendVote(vote);
}
@Nonnull
public Replica self() {
return config.local();
}
@Nonnull
public List<Replica> members() {
return unmodifiableList(newArrayList(config.remote()));
}
@Nonnull
public Replica getReplica(String info) {
return config.getReplica(info);
}
@Override
public String toString() {
return Objects.toStringHelper(getClass())
.add("lastLogIndex", lastLogIndex)
.add("lastApplied", lastApplied)
.add("commitIndex", commitIndex)
.add("lastVotedFor", votedFor)
.toString();
}
private static class PromiseBridge<V> implements FutureCallback<V> {
private final SettableFuture<V> promise;
private PromiseBridge(SettableFuture<V> promise) {
this.promise = promise;
}
@Override
public void onSuccess(@Nullable V result) {
promise.set(result);
}
@Override
public void onFailure(Throwable t) {
promise.setException(t);
}
}
}