Package voldemort.client.rebalance

Source Code of voldemort.client.rebalance.RebalanceBatchPlan

/*
* Copyright 2013 LinkedIn, Inc
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/

package voldemort.client.rebalance;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;

import voldemort.cluster.Cluster;
import voldemort.cluster.Node;
import voldemort.routing.StoreRoutingPlan;
import voldemort.store.StoreDefinition;
import voldemort.store.readonly.ReadOnlyStorageConfiguration;
import voldemort.utils.MoveMap;
import voldemort.utils.Pair;
import voldemort.utils.RebalanceUtils;
import voldemort.utils.Utils;

import com.google.common.collect.Maps;

/**
* Constructs a batch plan that goes from currentCluster to finalCluster. The
* partition-stores included in the move are based on those listed in storeDefs.
* This batch plan is execution-agnostic, i.e., a plan is generated and later
* stealer- versus donor-based execution of that plan is decided.
*
* Long term, its unclear if the notion of RebalanceBatchPlan separate from
* RebalancePlan is needed. Batching tends to increase the overall cost of
* rebalancing and has historically been error prone. (I.e., the transition
* between batches has had intermittent failures.) Its value, if any, lies in
* allowing long-running (days or weeks) rebalancing jobs to have interim
* checkpoints such that single node failures don't force a restart from initial
* state. Should consider deprecating batching after zone expansion and zone
* shrinking have been done successfully as short (less than a day or two),
* single-batch rebalances.
*/
public class RebalanceBatchPlan {

    private final Cluster currentCluster;
    private final List<StoreDefinition> currentStoreDefs;
    private final Cluster finalCluster;
    private final List<StoreDefinition> finalStoreDefs;

    protected final List<RebalanceTaskInfo> batchPlan;

    /**
     * Develops a batch plan to go from current cluster/stores to final
     * cluster/stores.
     *
     * @param currentCluster
     * @param currentStoreDefs
     * @param finalCluster
     * @param finalStoreDefs
     */
    public RebalanceBatchPlan(final Cluster currentCluster,
                              final List<StoreDefinition> currentStoreDefs,
                              final Cluster finalCluster,
                              final List<StoreDefinition> finalStoreDefs) {
        this.currentCluster = currentCluster;
        this.currentStoreDefs = currentStoreDefs;
        this.finalCluster = finalCluster;
        this.finalStoreDefs = finalStoreDefs;
        RebalanceUtils.validateCurrentFinalCluster(currentCluster, finalCluster);
        RebalanceUtils.validateClusterStores(currentCluster, currentStoreDefs);
        RebalanceUtils.validateClusterStores(finalCluster, finalStoreDefs);

        this.batchPlan = constructBatchPlan();

    }

    /**
     * Develops a batch plan to go from current cluster to final cluster for
     * given stores. (Stores is common for current and final cluster.)
     *
     * @param currentCluster
     * @param finalCluster
     * @param commonStoreDefs
     */
    public RebalanceBatchPlan(final Cluster currentCluster,
                              final Cluster finalCluster,
                              final List<StoreDefinition> commonStoreDefs) {
        this(currentCluster, commonStoreDefs, finalCluster, commonStoreDefs);
    }

    public Cluster getCurrentCluster() {
        return currentCluster;
    }

    public List<StoreDefinition> getCurrentStoreDefs() {
        return currentStoreDefs;
    }

    public Cluster getFinalCluster() {
        return finalCluster;
    }

    public List<StoreDefinition> getFinalStoreDefs() {
        return finalStoreDefs;
    }

    public List<RebalanceTaskInfo> getBatchPlan() {
        return batchPlan;
    }

    public RebalanceBatchPlanProgressBar getProgressBar(int batchId) {
        return new RebalanceBatchPlanProgressBar(batchId, getTaskCount(), getPartitionStoreMoves());
    }

    public MoveMap getZoneMoveMap() {
        MoveMap moveMap = new MoveMap(finalCluster.getZoneIds());

        for (RebalanceTaskInfo info : batchPlan) {
            int fromZoneId = finalCluster.getNodeById(info.getDonorId()).getZoneId();
            int toZoneId = finalCluster.getNodeById(info.getStealerId()).getZoneId();
            moveMap.add(fromZoneId, toZoneId, info.getPartitionStoreMoves());
        }

        return moveMap;
    }

    public MoveMap getNodeMoveMap() {
        MoveMap moveMap = new MoveMap(finalCluster.getNodeIds());

        for (RebalanceTaskInfo info : batchPlan) {
            moveMap.add(info.getDonorId(), info.getStealerId(), info.getPartitionStoreMoves());
        }

        return moveMap;
    }

    /**
     * Determines total number of partition-stores moved across zones.
     *
     * @return number of cross zone partition-store moves
     */
    public int getCrossZonePartitionStoreMoves() {
        int xzonePartitionStoreMoves = 0;
        for (RebalanceTaskInfo info : batchPlan) {
            Node donorNode = finalCluster.getNodeById(info.getDonorId());
            Node stealerNode = finalCluster.getNodeById(info.getStealerId());

            if(donorNode.getZoneId() != stealerNode.getZoneId()) {
                xzonePartitionStoreMoves += info.getPartitionStoreMoves();
            }
        }

        return xzonePartitionStoreMoves;
    }

    /**
     * Return the total number of partition-store moves
     *
     * @return Number of moves
     */
    public int getPartitionStoreMoves() {
        int partitionStoreMoves = 0;

        for (RebalanceTaskInfo info : batchPlan) {
            partitionStoreMoves += info.getPartitionStoreMoves();
        }

        return partitionStoreMoves;
    }

    /**
     * Returns the number of rebalance tasks in this batch.
     *
     * @return number of rebalance tasks in this batch
     */
    public int getTaskCount() {
        return batchPlan.size();
    }

    /**
     * Gathers all of the state necessary to build a
     * List<RebalanceTaskInfo> which is effectively a (batch) plan.
     */
    private class RebalanceTaskInfoBuilder {

        final HashMap<Pair<Integer, Integer>, HashMap<String, List<Integer>>> stealerDonorToStoreToStealPartition;

        RebalanceTaskInfoBuilder() {
            stealerDonorToStoreToStealPartition = Maps.newHashMap();
        }

        public void addPartitionStoreMove(int stealerNodeId,
                                          int donorNodeId,
                                          String storeName,
                                          int partitionId) {
            Pair<Integer, Integer> stealerDonor = new Pair<Integer, Integer>(stealerNodeId,
                                                                             donorNodeId);
            if (!stealerDonorToStoreToStealPartition.containsKey(stealerDonor)) {
                stealerDonorToStoreToStealPartition.put(stealerDonor,
                                                        new HashMap<String, List<Integer>>());
            }

            HashMap<String, List<Integer>> storeToStealPartition = stealerDonorToStoreToStealPartition.get(stealerDonor);
            if (!storeToStealPartition.containsKey(storeName)) {
                storeToStealPartition.put(storeName, new ArrayList<Integer>());
            }
            List<Integer> partitionIds = storeToStealPartition.get(storeName);
            partitionIds.add(partitionId);
        }

        public List<RebalanceTaskInfo> buildRebalanceTaskInfos() {
            final List<RebalanceTaskInfo> result = new ArrayList<RebalanceTaskInfo>();

            for(Pair<Integer, Integer> stealerDonor: stealerDonorToStoreToStealPartition.keySet()) {
                result.add(new RebalanceTaskInfo(stealerDonor.getFirst(),
                                                 stealerDonor.getSecond(),
                                                 stealerDonorToStoreToStealPartition.get(stealerDonor),
                                                 currentCluster));
            }
            return result;
        }
    }

    /**
     * Determine the batch plan and return it. The batch plan has the following
     * properties:
     *
     * 1) A stealer node does not steal any partition-stores it already hosts.
     *
     * 2) Use current policy to decide which node to steal from: see getDonorId
     * method.
     *
     * Currently, this batch plan avoids all unnecessary cross zone moves,
     * distributes cross zone moves into new zones evenly across existing zones,
     * and copies replicaFactor partition-stores into any new zone.
     *
     * @return the batch plan
     */
    private List<RebalanceTaskInfo> constructBatchPlan() {
        // Construct all store routing plans once.
        HashMap<String, StoreRoutingPlan> currentStoreRoutingPlans = new HashMap<String, StoreRoutingPlan>();
        for(StoreDefinition storeDef: currentStoreDefs) {
            currentStoreRoutingPlans.put(storeDef.getName(), new StoreRoutingPlan(currentCluster,
                                                                                  storeDef));
        }
        HashMap<String, StoreRoutingPlan> finalStoreRoutingPlans = new HashMap<String, StoreRoutingPlan>();
        for(StoreDefinition storeDef: finalStoreDefs) {
            finalStoreRoutingPlans.put(storeDef.getName(), new StoreRoutingPlan(finalCluster,
                                                                                storeDef));
        }

        RebalanceTaskInfoBuilder rpiBuilder = new RebalanceTaskInfoBuilder();
        // For every node in the final cluster ...
        for(Node stealerNode: finalCluster.getNodes()) {
            int stealerZoneId = stealerNode.getZoneId();
            int stealerNodeId = stealerNode.getId();

            // Consider all store definitions ...
            for(StoreDefinition storeDef: finalStoreDefs) {
                StoreRoutingPlan currentSRP = currentStoreRoutingPlans.get(storeDef.getName());
                StoreRoutingPlan finalSRP = finalStoreRoutingPlans.get(storeDef.getName());
                for(int stealerPartitionId: finalSRP.getZoneNAryPartitionIds(stealerNodeId)) {
                    // ... and all nary partition-stores,
                    // now steal what is needed

                    // Optimization for RW stores: Do not steal a partition-store you already host!
                    if (!storeDef.getType().equalsIgnoreCase(ReadOnlyStorageConfiguration.TYPE_NAME)) {
                        if (currentSRP.getReplicationNodeList(stealerPartitionId)
                                      .contains(stealerNodeId)) {
                            continue;
                        }
                    }
                    // Determine which node to steal from. This will find node that hosts the appropriate zone-nary
                    int donorNodeId = getDonorId(currentSRP,
                                                 finalSRP,
                                                 stealerZoneId,
                                                 stealerNodeId,
                                                 stealerPartitionId);
                    // Make sure that both donor and stealer aren't the same node
                    // Since optimization on line 275 wouldn't apply for RO stores, there is a chance that donorNodeId
                    // and stealerNodeId are the same, hence this check is needed for RO stores
                    if (donorNodeId != stealerNodeId) {
                        rpiBuilder.addPartitionStoreMove(stealerNodeId,
                                                         donorNodeId,
                                                         storeDef.getName(),
                                                         stealerPartitionId);
                    }
                }
            }
        }

        return rpiBuilder.buildRebalanceTaskInfos();
    }

    /**
     * Decide which donor node to steal from. This is a policy implementation.
     * I.e., in the future, additional policies could be considered. At that
     * time, this method should be overridden in a sub-class, or a policy object
     * ought to implement this algorithm.
     *
     * Current policy:
     *
     * 1) If possible, a stealer node that is the zone n-ary in the finalCluster
     * steals from the zone n-ary in the currentCluster in the same zone.
     *
     * 2) If there are no partition-stores to steal in the same zone (i.e., this
     * is the "zone expansion" use case), then a differnt policy must be used.
     * The stealer node that is the zone n-ary in the finalCluster determines
     * which pre-existing zone in the currentCluster hosts the primary partition
     * id for the partition-store. The stealer then steals the zone n-ary from
     * that pre-existing zone.
     *
     * This policy avoids unnecessary cross-zone moves and distributes the load
     * of cross-zone moves approximately-uniformly across pre-existing zones.
     *
     * Other policies to consider:
     *
     * - For zone expansion, steal all partition-stores from one specific
     * pre-existing zone.
     *
     * - Replace heuristic to approximately uniformly distribute load among
     * existing zones to something more concrete (i.e. track steals from each
     * pre-existing zone and forcibly balance them).
     *
     * - Select a single donor for all replicas in a new zone. This will require
     * donor-based rebalancing to be run (at least for this specific part of the
     * plan). This would reduce the number of donor-side scans of data. (But
     * still send replication factor copies over the WAN.) This would require
     * apparatus in the RebalanceController to work.
     *
     * - Set up some sort of chain-replication in which a single stealer in the
     * new zone steals some replica from a pre-exising zone, and then other
     * n-aries in the new zone steal from the single cross-zone stealer in the
     * zone. This would require apparatus in the RebalanceController to work.
     *
     * @param currentSRP
     * @param finalSRP
     * @param stealerZoneId
     * @param stealerNodeId
     * @param stealerPartitionId
     * @return the node id of the donor for this partition Id.
     */
    protected int getDonorId(StoreRoutingPlan currentSRP,
                             StoreRoutingPlan finalSRP,
                             int stealerZoneId,
                             int stealerNodeId,
                             int stealerPartitionId) {
        int stealerZoneNAry = finalSRP.getZoneNaryForNodesPartition(stealerZoneId,
                                                                    stealerNodeId,
                                                                    stealerPartitionId);

        int donorZoneId;
        if(currentSRP.zoneNAryExists(stealerZoneId, stealerZoneNAry, stealerPartitionId)) {
            // Steal from local n-ary (since one exists).
            donorZoneId = stealerZoneId;
        } else {
            // Steal from zone that hosts primary partition Id.
            int currentMasterNodeId = currentSRP.getNodeIdForPartitionId(stealerPartitionId);
            donorZoneId = currentCluster.getNodeById(currentMasterNodeId).getZoneId();
        }

        return currentSRP.getNodeIdForZoneNary(donorZoneId, stealerZoneNAry, stealerPartitionId);

    }

    @Override
    public String toString() {
        if(batchPlan == null || batchPlan.isEmpty()) {
            return "No rebalancing required since batch plan is empty";
        }

        StringBuilder builder = new StringBuilder();
        builder.append("Rebalancing Batch Plan : ").append(Utils.NEWLINE);
        builder.append(RebalanceTaskInfo.taskListToString(batchPlan));

        return builder.toString();
    }
}
TOP

Related Classes of voldemort.client.rebalance.RebalanceBatchPlan

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.