Package net.kuujo.copycat.internal.state

Source Code of net.kuujo.copycat.internal.state.StateController

/*
* Copyright 2014 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package net.kuujo.copycat.internal.state;

import net.kuujo.copycat.CopycatException;
import net.kuujo.copycat.CopycatState;
import net.kuujo.copycat.event.MembershipChangeEvent;
import net.kuujo.copycat.event.VoteCastEvent;
import net.kuujo.copycat.internal.StateMachineExecutor;
import net.kuujo.copycat.internal.cluster.RemoteNode;
import net.kuujo.copycat.internal.log.ConfigurationEntry;
import net.kuujo.copycat.internal.log.CopycatEntry;
import net.kuujo.copycat.internal.log.OperationEntry;
import net.kuujo.copycat.internal.log.SnapshotEntry;
import net.kuujo.copycat.log.Entry;
import net.kuujo.copycat.protocol.*;
import org.slf4j.Logger;

import java.io.IOException;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.atomic.AtomicBoolean;

/**
* Base replica state controller.
*
* @author <a href="http://github.com/kuujo">Jordan Halterman</a>
*/
abstract class StateController implements RequestHandler {
  protected StateContext context;
  private final AtomicBoolean transition = new AtomicBoolean();

  /**
   * Returns the controller state.
   *
   * @return The controller state.
   */
  abstract CopycatState state();

  /**
   * Returns the state logger.
   */
  abstract Logger logger();

  /**
   * Logs a request.
   */
  protected final <R extends Request> R logRequest(R request) {
    logger().debug("{} - Received {}", context.clusterManager().localNode(), request);
    return request;
  }

  /**
   * Logs a response.
   */
  protected final <R extends Response> R logResponse(R response) {
    logger().debug("{} - Sent {}", context.clusterManager().localNode(), response);
    return response;
  }

  /**
   * Initializes the controller.
   *
   * @param context The state context.
   */
  void init(StateContext context) {
    this.context = context;
    context.clusterManager().localNode().server().requestHandler(this);
  }

  @Override
  public synchronized CompletableFuture<PingResponse> ping(final PingRequest request) {
    CompletableFuture<PingResponse> future = CompletableFuture.completedFuture(logResponse(handlePing(logRequest(request))));
    // If a transition is required then transition back to the follower state.
    // If the node is already a follower then the transition will be ignored.
    if (transition.get()) {
      context.transition(FollowerController.class);
    }
    return future;
  }

  /**
   * Handles a ping request.
   */
  private synchronized PingResponse handlePing(PingRequest request) {
    // If the request indicates a term that is greater than the current term then
    // assign that term and leader to the current context and step down as leader.
    if (request.term() > context.currentTerm() || (request.term() == context.currentTerm() && context.currentLeader() == null)) {
      context.currentTerm(request.term());
      context.currentLeader(request.leader());
      transition.set(true);
    }

    // If the request term is less than the current term then immediately
    // reply false and return our current term. The leader will receive
    // the updated term and step down.
    if (request.term() < context.currentTerm()) {
      logger().warn("{} - Rejected {}: request term is less than the current term ({})", context.clusterManager()
        .localNode(), request, context.currentTerm());
      return new PingResponse(request.id(), context.currentTerm(), false);
    } else if (request.logIndex() > 0 && request.logTerm() > 0) {
      return doCheckPingEntry(request);
    }
    return new PingResponse(request.id(), context.currentTerm(), true);
  }

  /**
   * Checks the ping log entry for consistency.
   */
  private synchronized PingResponse doCheckPingEntry(PingRequest request) {
    if (request.logIndex() > context.log().lastIndex()) {
      logger().warn("{} - Rejected {}: previous index ({}) is greater than the local log's last index ({})", context.clusterManager().localNode(), request, request.logIndex(), context.log().lastIndex());
      return new PingResponse(request.id(), context.currentTerm(), false);
    }

    // If the log entry exists then load the entry.
    // If the last log entry's term is not the same as the given
    // prevLogTerm then return false. This will cause the leader to
    // decrement this node's nextIndex and ultimately retry with the
    // leader's previous log entry so that the inconsistent entry
    // can be overwritten.
    CopycatEntry entry = context.log().getEntry(request.logIndex());
    if (entry == null) {
      logger().warn("{} - Rejected {}: request entry not found in local log", context.clusterManager().localNode(), request);
      return new PingResponse(request.id(), context.currentTerm(), false);
    } else if (entry.term() != request.logTerm()) {
      logger().warn("{} - Rejected {}: request entry term does not match local log", context.clusterManager().localNode(), request);
      return new PingResponse(request.id(), context.currentTerm(), false);
    } else {
      doApplyCommits(request.commitIndex());
      return new PingResponse(request.id(), context.currentTerm(), true);
    }
  }

  @Override
  public synchronized CompletableFuture<SyncResponse> sync(final SyncRequest request) {
    CompletableFuture<SyncResponse> future = CompletableFuture.completedFuture(logResponse(handleSync(logRequest(request))));
    // If a transition is required then transition back to the follower state.
    // If the node is already a follower then the transition will be ignored.
    if (transition.get()) {
      context.transition(FollowerController.class);
    }
    return future;
  }

  /**
   * Starts the sync process.
   */
  private synchronized SyncResponse handleSync(SyncRequest request) {
    // If the request indicates a term that is greater than the current term then
    // assign that term and leader to the current context and step down as leader.
    if (request.term() > context.currentTerm() || (request.term() == context.currentTerm() && context.currentLeader() == null)) {
      context.currentTerm(request.term());
      context.currentLeader(request.leader());
      transition.set(true);
    }

    // If the request term is less than the current term then immediately
    // reply false and return our current term. The leader will receive
    // the updated term and step down.
    if (request.term() < context.currentTerm()) {
      logger().warn("{} - Rejected {}: request term is less than the current term ({})", context.clusterManager().localNode(), request, context.currentTerm());
      return new SyncResponse(request.id(), context.currentTerm(), false, context.log().lastIndex());
    } else if (request.prevLogIndex() > 0 && request.prevLogTerm() > 0) {
      return doCheckPreviousEntry(request);
    } else {
      return doAppendEntries(request);
    }
  }

  /**
   * Checks the previous log entry for consistency.
   */
  private synchronized SyncResponse doCheckPreviousEntry(SyncRequest request) {
    if (request.prevLogIndex() > context.log().lastIndex()) {
      logger().warn("{} - Rejected {}: previous index ({}) is greater than the local log's last index ({})", context.clusterManager().localNode(), request, request.prevLogIndex(), context.log().lastIndex());
      return new SyncResponse(request.id(), context.currentTerm(), false, context.log().lastIndex());
    }

    // If the log entry exists then load the entry.
    // If the last log entry's term is not the same as the given
    // prevLogTerm then return false. This will cause the leader to
    // decrement this node's nextIndex and ultimately retry with the
    // leader's previous log entry so that the inconsistent entry
    // can be overwritten.
    CopycatEntry entry = context.log().getEntry(request.prevLogIndex());
    if (entry == null) {
      logger().warn("{} - Rejected {}: request entry not found in local log", context.clusterManager().localNode(), request);
      return new SyncResponse(request.id(), context.currentTerm(), false, context.log().lastIndex());
    } else if (entry.term() != request.prevLogTerm()) {
      logger().warn("{} - Rejected {}: request entry term does not match local log", context.clusterManager().localNode(), request);
      return new SyncResponse(request.id(), context.currentTerm(), false, context.log().lastIndex());
    } else {
      return doAppendEntries(request);
    }
  }

  /**
   * Appends entries to the local log.
   */
  private synchronized SyncResponse doAppendEntries(SyncRequest request) {
    // If the log contains entries after the request's previous log index
    // then remove those entries to be replaced by the request entries.
    if (!request.entries().isEmpty()) {
      synchronized (context.log()) {
        long index = request.prevLogIndex();
        for (CopycatEntry entry : request.<CopycatEntry>entries()) {
          index++;
          // Replicated snapshot entries are *always* immediately logged and applied to the state machine
          // since snapshots are only taken of committed state machine state. This will cause all previous
          // entries to be removed from the log.
          if (entry instanceof SnapshotEntry) {
            installSnapshot(index, (SnapshotEntry) entry);
          } else {
            CopycatEntry match = context.log().getEntry(index);
            if (match != null) {
              if (entry.term() != match.term()) {
                logger().warn("{} - Synced entry does not match local log, removing incorrect entries", context.clusterManager().localNode());
                context.log().removeAfter(index - 1);
                context.log().appendEntry(entry);
                logger().debug("{} - Appended {} to log at index {}", context.clusterManager().localNode(), entry, index);
              }
            } else {
              context.log().appendEntry(entry);
              logger().debug("{} - Appended {} to log at index {}", context.clusterManager().localNode(), entry, index);
            }
          }
        }
      }
    }
    doApplyCommits(request.commitIndex());
    return new SyncResponse(request.id(), context.currentTerm(), true, context.log().lastIndex());
  }

  /**
   * Applies commits to the local state machine.
   */
  private synchronized void doApplyCommits(long commitIndex) {
    // If the synced commit index is greater than the local commit index then
    // apply commits to the local state machine.
    // Also, it's possible that one of the previous command applications failed
    // due to asynchronous communication errors, so alternatively check if the
    // local commit index is greater than last applied. If all the state machine
    // commands have not yet been applied then we want to re-attempt to apply them.
    if (commitIndex > context.commitIndex() || context.commitIndex() > context.lastApplied()) {
      // Update the local commit index with min(request commit, last log // index)
      long lastIndex = context.log().lastIndex();
      context.commitIndex(Math.min(Math.max(commitIndex, context.commitIndex()), lastIndex));

      // If the updated commit index indicates that commits remain to be
      // applied to the state machine, iterate entries and apply them.
      if (context.commitIndex() > context.lastApplied()) {
        // Starting after the last applied entry, iterate through new entries
        // and apply them to the state machine up to the commit index.
        for (long i = context.lastApplied() + 1; i <= Math.min(context.commitIndex(), lastIndex); i++) {
          // Apply the entry to the state machine.
          applyEntry(i);
        }

        // Once entries have been applied check whether we need to compact the log.
        compactLog();
      }
    }
  }

  /**
   * Installs a replicated snapshot.
   */
  private synchronized void installSnapshot(long index, SnapshotEntry entry) {
    logger().info("{} - Installing snapshot {}", context.clusterManager().localNode(), entry);
    logger().info("{} - Compacting log", context.clusterManager().localNode());
    try {
      context.log().compact(index, entry);
    } catch (IOException e) {
      logger().error(e.getMessage());
    }

    applySnapshot(index, (SnapshotEntry) entry);
    context.commitIndex(index);
    context.lastApplied(index);
  }

  /**
   * Applies the entry at the given index.
   *
   * @param index The index of the entry to apply.
   */
  protected void applyEntry(long index) {
    applyEntry(index, context.log().getEntry(index));
  }

  /**
   * Applies the entry at the given index.
   *
   * @param index The index of the entry to apply.
   * @param entry The entry to apply.
   */
  protected void applyEntry(long index, Entry entry) {
    // Validate that the entry being applied is the next entry in the log.
    if (context.lastApplied() == index-1) {
      // Ensure that the entry exists.
      if (entry == null) {
        throw new IllegalStateException("null entry cannot be applied to state machine");
      }
 
      // If the entry is a command entry, apply the command to the state machine.
      if (entry instanceof OperationEntry) {
        applyCommand(index, (OperationEntry) entry);
      }
      // If the entry is a configuration entry, update the local cluster configuration.
      else if (entry instanceof ConfigurationEntry) {
        applyConfig(index, (ConfigurationEntry) entry);
      }
      // If the entry is a snapshot entry, apply the snapshot to the local state machine.
      else if (entry instanceof SnapshotEntry) {
        applySnapshot(index, (SnapshotEntry) entry);
      }
      // If the entry is of another type, e.g. a no-op entry, simply set last applied.
      else {
        context.lastApplied(index);
      }
    }
  }

  /**
   * Applies a command entry to the state machine.
   *
   * @param index The index of the entry being applied.
   * @param entry The entry to apply.
   */
  protected void applyCommand(long index, OperationEntry entry) {
    try {
      logger().debug("{} - Apply operation: {}", context.clusterManager().localNode(), entry.operation());
      StateMachineExecutor.Operation operation = context.stateMachineExecutor().getOperation(entry.operation());
      if (operation != null) {
        operation.apply(entry.args());
      }
    } catch (Exception e) {
    } finally {
      context.lastApplied(index);
    }
  }

  /**
   * Applies a configuration entry to the replica.
   *
   * @param index The index of the entry being applied.
   * @param entry The entry to apply.
   */
  protected void applyConfig(long index, ConfigurationEntry entry) {
    try {
      logger().debug("{} - Apply configuration change: {}", context.clusterManager().localNode(), entry.cluster());
      context.clusterManager().cluster().update(entry.cluster(), null);
      context.events().membershipChange().handle(new MembershipChangeEvent(entry.cluster().getMembers()));
    } catch (Exception e) {
    } finally {
      context.lastApplied(index);
    }
  }

  /**
   * Applies a single snapshot entry to the state machine.
   *
   * @param index The index of the entry to apply.
   * @param entry The entry to apply.
   */
  protected void applySnapshot(long index, SnapshotEntry entry) {
    logger().debug("{} - Apply snapshot: {}", context.clusterManager().localNode(), entry.cluster());
    synchronized (context.log()) {
      // Apply the snapshot to the local state machine.
      context.stateMachineExecutor().stateMachine().installSnapshot(entry.data());

      // If the log is compactable then compact it at the snapshot index.
      try {
        context.log().compact(index, entry);
      } catch (IOException e) {
        throw new CopycatException(e, "Failed to compact log.");
      }

      // Set the local cluster configuration according to the snapshot cluster membership.
      context.clusterManager().cluster().update(entry.cluster(), null);
      context.events().membershipChange().handle(new MembershipChangeEvent(entry.cluster().getMembers()));

      // Finally, if necessary, increment the current term.
      context.currentTerm(Math.max(context.currentTerm(), entry.term()));
      context.lastApplied(index);
    }
  }

  /**
   * Creates a snapshot of the state machine state.
   *
   * @return A snapshot of the state machine state.
   */
  protected SnapshotEntry createSnapshot() {
    byte[] snapshot = context.stateMachineExecutor().stateMachine().takeSnapshot();
    if (snapshot != null) {
      return new SnapshotEntry(context.currentTerm(), context.clusterManager().cluster().config().copy(), snapshot);
    }
    return null;
  }

  /**
   * Compacts the local log.
   */
  protected CompletableFuture<Void> compactLog() {
    // If the log has now grown past the maximum local log size, create a
    // snapshot of the current state machine state and compact the log
    // using the state machine snapshot as the first entry. The snapshot
    // is stored as a log entry in order to simplify replication to
    // out-of-sync followers.
    if (context.log().size() > context.config().getMaxLogSize()) {
      synchronized (context.log()) {
        logger().info("{} - Compacting log", context.clusterManager().localNode());
        final long lastApplied = context.lastApplied();
        final SnapshotEntry snapshot = createSnapshot();
        if (snapshot != null) {
          try {
            context.log().compact(lastApplied, snapshot);
          } catch (IOException e) {
            throw new CopycatException(e, "Failed to compact log.");
          }
        } else {
          logger().warn("{} - Failed to compact log: no snapshot data provided", context.clusterManager().localNode());
        }
      }
    }
    return CompletableFuture.completedFuture(null);
  }

  @Override
  public CompletableFuture<PollResponse> poll(PollRequest request) {
    logger().debug("{} - Received {}", context.clusterManager().localNode(), request);
    return CompletableFuture.completedFuture(logResponse(handlePoll(logRequest(request))));
  }

  /**
   * Handles a vote request.
   */
  private PollResponse handlePoll(PollRequest request) {
    // If the request indicates a term that is greater than the current term then
    // assign that term and leader to the current context and step down as leader.
    if (request.term() > context.currentTerm()) {
      context.currentTerm(request.term());
      context.currentLeader(null);
      context.lastVotedFor(null);
    }

    // If the request term is not as great as the current context term then don't
    // vote for the candidate. We want to vote for candidates that are at least
    // as up to date as us.
    if (request.term() < context.currentTerm()) {
      logger().debug("{} - Rejected {}: candidate's term is less than the current term", context.clusterManager().localNode(), request);
      return new PollResponse(request.id(), context.currentTerm(), false);
    }
    // If the requesting candidate is ourself then always vote for ourself. Votes
    // for self are done by calling the local node. Note that this obviously
    // doesn't make sense for a leader.
    else if (request.candidate().equals(context.clusterManager().localNode().member().id())) {
      context.lastVotedFor(context.clusterManager().localNode().member().id());
      context.events().voteCast().handle(new VoteCastEvent(context.currentTerm(), context.clusterManager().localNode().member()));
      logger().debug("{} - Accepted {}: candidate is the local node", context.clusterManager().localNode(), request);
      return new PollResponse(request.id(), context.currentTerm(), true);
    }
    // If the requesting candidate is not a known member of the cluster (to this
    // node) then don't vote for it. Only vote for candidates that we know about.
    else if (context.clusterManager().node(request.candidate()) == null) {
      logger().debug("{} - Rejected {}: candidate is not known do the local node", context.clusterManager().localNode(), request);
      return new PollResponse(request.id(), context.currentTerm(), false);
    }
    // If we've already voted for someone else then don't vote again.
    else if (context.lastVotedFor() == null || context.lastVotedFor().equals(request.candidate())) {
      // If the log is empty then vote for the candidate.
      if (context.log().isEmpty()) {
        context.lastVotedFor(request.candidate());
        context.events().voteCast().handle(new VoteCastEvent(context.currentTerm(), context.clusterManager().node(request.candidate()).member()));
        logger().debug("{} - Accepted {}: candidate's log is up-to-date", context.clusterManager().localNode(), request);
        return new PollResponse(request.id(), context.currentTerm(), true);
      } else {
        // Otherwise, load the last entry in the log. The last entry should be
        // at least as up to date as the candidates entry and term.
        synchronized (context.log()) {
          long lastIndex = context.log().lastIndex();
          CopycatEntry entry = context.log().getEntry(lastIndex);
          if (entry == null) {
            context.lastVotedFor(request.candidate());
            context.events()
              .voteCast()
              .handle(new VoteCastEvent(context.currentTerm(), context.clusterManager()
                .node(request.candidate())
                .member()));
            logger().debug("{} - Accepted {}: candidate's log is up-to-date", context.clusterManager().localNode(), request);
            return new PollResponse(request.id(), context.currentTerm(), true);
          }

          long lastTerm = entry.term();
          if (request.lastLogIndex() >= lastIndex) {
            if (request.lastLogTerm() >= lastTerm) {
              context.lastVotedFor(request.candidate());
              context.events()
                .voteCast()
                .handle(new VoteCastEvent(context.currentTerm(), context.clusterManager()
                  .node(request.candidate())
                  .member()));
              logger().debug("{} - Accepted {}: candidate's log is up-to-date", context.clusterManager().localNode(), request);
              return new PollResponse(request.id(), context.currentTerm(), true);
            } else {
              logger().debug("{} - Rejected {}: candidate's last log term ({}) is in conflict with local log ({})", context.clusterManager().localNode(), request, request.lastLogTerm(), lastTerm);
              return new PollResponse(request.id(), context.currentTerm(), false);
            }
          } else {
            logger().debug("{} - Rejected {}: candidate's last log entry ({}) is at a lower index than the local log ({})", context.clusterManager().localNode(), request, request.lastLogIndex(), lastIndex);
            return new PollResponse(request.id(), context.currentTerm(), false);
          }
        }
      }
    }
    // In this case, we've already voted for someone else.
    else {
      logger().debug("{} - Rejected {}: already voted for {}", context.clusterManager().localNode(), request, context.lastVotedFor());
      return new PollResponse(request.id(), context.currentTerm(), false);
    }
  }

  @Override
  public CompletableFuture<SubmitResponse> submit(SubmitRequest request) {
    logRequest(request);

    // If consistent queries are disabled then this node can accept reads regardless of the
    // current state of the system. Note that this only applies to read-only operations.
    CompletableFuture<SubmitResponse> future = new CompletableFuture<>();
    if (!context.config().isConsistentQueryExecution()) {
      StateMachineExecutor.Operation operation = context.stateMachineExecutor().getOperation(request.operation());
      if (operation != null && operation.isReadOnly()) {
        try {
          future.complete(logResponse(new SubmitResponse(request.id(), operation.apply(request.args()))));
        } catch (Exception e) {
          future.completeExceptionally(e);
        }
        return future;
      }
    }

    // If the cluster has a leader then locate the leader and forward the operation.
    if (context.currentLeader() != null) {
      @SuppressWarnings("rawtypes")
      RemoteNode leader = context.clusterManager().remoteNode(context.currentLeader());
      if (leader != null) {
        logger().debug("{} - Forwarding {} to leader {}", context.clusterManager().localNode(), request, leader);
        leader.client().connect().whenComplete((r, e) -> {
          if (e != null) {
            future.completeExceptionally(e);
          } else {
            leader.client().submit(request).whenComplete((result, error) -> {
              if (error != null) {
                future.completeExceptionally(error);
              } else {
                future.complete(result);
              }
            });
          }
        });
        return future;
      }
    }
    return CompletableFuture.completedFuture(logResponse(new SubmitResponse(request.id(), "Not the leader")));
  }

  /**
   * Destroys the state controller.
   */
  void destroy() {
    context.clusterManager().localNode().server().requestHandler(null);
  }

  @Override
  public String toString() {
    return String.format("%s[context=%s]", getClass().getSimpleName(), context);
  }

}
TOP

Related Classes of net.kuujo.copycat.internal.state.StateController

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.