Package com.splout.db.qnode

Source Code of com.splout.db.qnode.Deployer$ManageDeploy

package com.splout.db.qnode;

/*
* #%L
* Splout SQL Server
* %%
* Copyright (C) 2012 Datasalt Systems S.L.
* %%
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program.  If not, see <http://www.gnu.org/licenses/>.
* #L%
*/

import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;

import com.splout.db.common.JSONSerDe;
import org.apache.commons.collections.MapUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.thrift.transport.TTransportException;

import com.hazelcast.core.ICountDownLatch;
import com.hazelcast.core.IMap;
import com.splout.db.common.PartitionEntry;
import com.splout.db.common.ReplicationEntry;
import com.splout.db.common.Tablespace;
import com.splout.db.hazelcast.CoordinationStructures;
import com.splout.db.hazelcast.TablespaceVersion;
import com.splout.db.qnode.beans.DeployInfo;
import com.splout.db.qnode.beans.DeployRequest;
import com.splout.db.qnode.beans.DeployStatus;
import com.splout.db.qnode.beans.QueryStatus;
import com.splout.db.qnode.beans.SwitchVersionRequest;
import com.splout.db.thrift.DNodeService;
import com.splout.db.thrift.DeployAction;
import com.splout.db.thrift.PartitionMetadata;

/**
* The Deployer is a specialized module ({@link com.splout.db.qnode.QNodeHandlerModule}) of the
* {@link com.splout.db.qnode.QNode} that performs the business logic associated with a distributed deployment. It is
* used by the {@link com.splout.db.qnode.QNodeHandler}.
*/
@SuppressWarnings({ "unchecked", "rawtypes" })
public class Deployer extends QNodeHandlerModule {

  private final static Log log = LogFactory.getLog(Deployer.class);
  private ExecutorService deployThread;

  @SuppressWarnings("serial")
  public static class UnexistingVersion extends Exception {

    public UnexistingVersion() {
      super();
    }

    public UnexistingVersion(String message) {
      super(message);
    }
  }

  /**
   * Runnable that deals with the asynchronous part of the deployment. Particularly, it waits until DNodes finish their
   * work, and then performs the version switch.
   */
  public class ManageDeploy implements Runnable {

    // Number of seconds to wait until another
    // check to see if timeout was reached or
    // if a DNode failed.
    private long secondsToCheckFailureOrTimeout = 60l;

    private long version;
    private List<String> dnodes;
    private long timeoutSeconds;
    private List<DeployRequest> deployRequests;
    private long dnodesSpreadMetadataTimeout;
    private boolean isReplicaBalancingEnabled;

    public ManageDeploy(List<String> dnodes, List<DeployRequest> deployRequests, long version,
        long timeoutSeconds, long secondsToCheckFailureOrTimeout, long dnodesSpreadMetadataTimeout, boolean isReplicaBalancingEnabled) {
      this.dnodes = dnodes;
      this.deployRequests = deployRequests;
      this.version = version;
      this.timeoutSeconds = timeoutSeconds;
      this.secondsToCheckFailureOrTimeout = secondsToCheckFailureOrTimeout;
      this.dnodesSpreadMetadataTimeout = Math.max(dnodesSpreadMetadataTimeout, 1);
      this.isReplicaBalancingEnabled = isReplicaBalancingEnabled;
    }

    @Override
    public void run() {
      log.info(context.getConfig().getProperty(QNodeProperties.PORT) + " Executing deploy for version ["
          + version + "]");
      CoordinationStructures.DEPLOY_IN_PROGRESS.incrementAndGet();

      try {
        long waitSeconds = 0;
        ICountDownLatch countDownLatchForDeploy = context.getCoordinationStructures()
            .getCountDownLatchForDeploy(version);
        boolean finished;
        do {
          finished = countDownLatchForDeploy.await(secondsToCheckFailureOrTimeout, TimeUnit.SECONDS);
          waitSeconds += secondsToCheckFailureOrTimeout;
          if(!finished) {
            // If any of the DNodes failed, then we cancel the deployment.
            if(checkForFailure()) {
              explainErrors();
              abortDeploy(dnodes, "One or more DNodes failed", version);
              return;
            }
            // Let's see if we reached the timeout.
            // Negative timeoutSeconds => waits forever
            if(waitSeconds > timeoutSeconds && timeoutSeconds >= 0) {
              log.warn("Deploy of version [" + version + "] timed out. Reached [" + waitSeconds
                  + "] seconds.");
              abortDeploy(dnodes, "Timeout reached", version);
              return;
            }
          }
        } while(!finished);

        // It's still possible that the deploy failed so let's check it again
        if(checkForFailure()) {
          explainErrors();
          abortDeploy(dnodes, "One or more DNodes failed.", version);
          return;
        }

        // Check after the wait than the complete tablespaces are available to that QNode. If that is the
        // case for this QNode it will be probably the case for the rest of QNodes.
        long millisToWait = 50;
        double acumulatedMillis = 0.;
        List<SwitchVersionRequest> versionsToCheck = switchActions();
        do {
          Thread.sleep(millisToWait);
          acumulatedMillis += millisToWait;

          // Let's see if we reached the timeout.
          // Negative timeoutSeconds => waits forever
          if((acumulatedMillis/1000) > dnodesSpreadMetadataTimeout) {
            log.warn("Deploy of version [" + version + "] timed out when waiting DNodes to spread the metadata. Reached [" + (acumulatedMillis/1000)
                + "] seconds.");
            abortDeploy(dnodes, "Timeout reached", version);
            return;
          }

          Iterator<SwitchVersionRequest> it = versionsToCheck.iterator();
          while(it.hasNext()) {
            SwitchVersionRequest req = it.next();
            Tablespace t = context.getTablespaceVersionsMap().get(
                new TablespaceVersion(req.getTablespace(), req.getVersion()));
            // Check that this TablespaceVersion has been reported by some node through Hazelcast
            if(t != null && t.getReplicationMap() != null && t.getPartitionMap() != null
                && t.getPartitionMap().getPartitionEntries() != null
                && t.getReplicationMap().getReplicationEntries() != null
                && t.getReplicationMap().getReplicationEntries().size() > 0) {
              if(t.getPartitionMap().getPartitionEntries().size() == t.getReplicationMap()
                  .getReplicationEntries().size()) {
                log.info("Ok, TablespaceVersion [" + req.getTablespace() + ", " + req.getVersion()
                    + "] being handled by enough DNodes as reported by Hazelcast. ("
                    + t.getReplicationMap().getReplicationEntries() + ")");
                it.remove();
              }
            }
          }
        } while(versionsToCheck.size() > 0);

        log.info("All DNodes performed the deploy of version [" + version
            + "]. Publishing tablespaces...");

        // We finish by publishing the versions table with the new versions.
        try {
          switchVersions(switchActions());
        } catch(UnexistingVersion e) {
          throw new RuntimeException(
              "Unexisting version after deploying this version. Sounds like a bug.", e);
        }

        // If some replicas are under-replicated, start a balancing process
        context.maybeBalance();

        log.info("Deploy of version [" + version + "] Finished PROPERLY. :-)");
        context.getCoordinationStructures().logDeployMessage(version,
            "Deploy of version [" + version + "] finished properly.");
        context.getCoordinationStructures().getDeploymentsStatusPanel()
            .put(version, DeployStatus.FINISHED);
      } catch(InterruptedException e) {
        log.error("Error while deploying version [" + version + "]", e);
        abortDeploy(dnodes, e.getMessage(), version);
      } catch(Throwable t) {
        t.printStackTrace();
        throw new RuntimeException(t);
      } finally {
        CoordinationStructures.DEPLOY_IN_PROGRESS.decrementAndGet();
      }
    }

    /**
     * Compose the list of switch actions to switch
     *
     * @return
     */
    private List<SwitchVersionRequest> switchActions() {
      ArrayList<SwitchVersionRequest> actions = new ArrayList<SwitchVersionRequest>();
      for(DeployRequest req : deployRequests) {
        actions.add(new SwitchVersionRequest(req.getTablespace(), version));
      }
      return actions;
    }

    /**
     * Log DNodes errors in deployment. We log both to the QNode logger and to Hazelcast so the info is persisted in the
     * session.
     */
    private void explainErrors() {
      IMap<String, String> deployErrorPanel = context.getCoordinationStructures().getDeployErrorPanel(
          version);
      String msg = "Deployment of version [" + version + "] failed in DNode[";
      for(Entry<String, String> entry : deployErrorPanel.entrySet()) {
        String fMsg = msg + entry.getKey() + "] - it failed with the error [" + entry.getValue() + "]";
        log.error(fMsg);
        context.getCoordinationStructures().logDeployMessage(version, fMsg);
      }
    }

    /**
     * Return true if one or more of the DNodes reported an error.
     */
    private boolean checkForFailure() {
      IMap<String, String> deployErrorPanel = context.getCoordinationStructures().getDeployErrorPanel(
          version);
      if(!isReplicaBalancingEnabled) {
        return !deployErrorPanel.isEmpty();
      }
      // If replica balancing is enabled we check whether we could survive after the failed DNodes
      Set<String> failedDNodes = new HashSet<String>(deployErrorPanel.keySet());
      // Check if deploy needs to be canceled or if the system could auto-rebalance itself afterwards
      for(DeployRequest deployRequest : deployRequests) {
        for(ReplicationEntry repEntry : deployRequest.getReplicationMap()) {
          if(failedDNodes.containsAll(repEntry.getNodes())) {
            // There is AT LEAST one partition that depends on the failed DNodes so the deploy must fail!
            return true;
          }
        }
      }
      return false;
    }
  } /* End ManageDeploy */

  /**
   * The Deployer deals with deploy and switch version requests.
   */
  public Deployer(QNodeHandlerContext context) {
    super(context);
    deployThread = Executors.newFixedThreadPool(1);
  }

  /**
   * Call this method for starting an asynchronous deployment given a proper deploy request - proxy method for
   * {@link QNodeHandler}. Returns a {@link QueryStatus} with the status of the request.
   *
   * @throws InterruptedException
   */
  public DeployInfo deploy(List<DeployRequest> deployRequests) throws InterruptedException {
    DeployInfo deployInfo = new DeployInfo();

    // A new unique version number is generated.
    long version = context.getCoordinationStructures().uniqueVersionId();
    deployInfo.setVersion(version);

    List<String> tablespaces = new ArrayList<String>();
    List<String> dataURIs = new ArrayList<String>();

    for(DeployRequest request : deployRequests) {
      tablespaces.add(request.getTablespace());
      dataURIs.add(request.getData_uri());
    }

    deployInfo.setTablespacesDeployed(tablespaces);
    deployInfo.setDataURIs(dataURIs);

    Date startTime = new Date();
    deployInfo.setStartedAt(new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(startTime));

    context.getCoordinationStructures().logDeployMessage(version,
        "Deploy [" + version + "] for tablespaces " + tablespaces + " started.");
    context.getCoordinationStructures().getDeploymentsStatusPanel().put(version, DeployStatus.ONGOING);

    // Generate the list of actions per DNode
    Map<String, List<DeployAction>> actionsPerDNode = generateDeployActionsPerDNode(deployRequests,
        version);

    // Starting the countdown latch.
    ICountDownLatch countDownLatchForDeploy = context.getCoordinationStructures()
        .getCountDownLatchForDeploy(version);
    Set<String> dnodesInvolved = actionsPerDNode.keySet();
    countDownLatchForDeploy.trySetCount(dnodesInvolved.size());

    // Sending deploy signals to each DNode
    for(Map.Entry<String, List<DeployAction>> actionPerDNode : actionsPerDNode.entrySet()) {
      DNodeService.Client client = null;
      boolean renew = false;
      try {
        try {
          client = context.getDNodeClientFromPool(actionPerDNode.getKey());
        } catch(TTransportException e) {
          renew = true;
          throw e;
        }
        client.deploy(actionPerDNode.getValue(), version);
      } catch(Exception e) {
        String errorMsg = "Error sending deploy actions to DNode [" + actionPerDNode.getKey() + "]";
        log.error(errorMsg, e);
        abortDeploy(new ArrayList<String>(actionsPerDNode.keySet()), errorMsg, version);
        deployInfo.setError("Error connecting to DNode " + actionPerDNode.getKey());
        context.getCoordinationStructures().getDeployInfoPanel().put(version, deployInfo);
        return deployInfo;
      } finally {
        context.returnDNodeClientToPool(actionPerDNode.getKey(), client, renew);
      }
    }

    // Initiating an asynchronous process to manage the deployment
    deployThread.execute(new ManageDeploy(new ArrayList(actionsPerDNode.keySet()), deployRequests,
        version, context.getConfig().getLong(QNodeProperties.DEPLOY_TIMEOUT, -1), context.getConfig()
            .getLong(QNodeProperties.DEPLOY_SECONDS_TO_CHECK_ERROR),
        context.getConfig().getLong(QNodeProperties.DEPLOY_DNODES_SPREAD_METADATA_TIMEOUT, 180),
        context.getConfig().getBoolean(QNodeProperties.REPLICA_BALANCE_ENABLE)));

    context.getCoordinationStructures().getDeployInfoPanel().put(version, deployInfo);
    return deployInfo;
  }

  /**
   * DNodes are informed to stop the deployment, as something failed.
   *
   * @throws InterruptedException
   */
  public void abortDeploy(List<String> dnodes, String deployerErrorMessage, long version) {
    for(String dnode : dnodes) {
      DNodeService.Client client = null;
      boolean renew = false;
      try {
        try {
          client = context.getDNodeClientFromPool(dnode);
        } catch(TTransportException e) {
          renew = true;
          throw e;
        }
        client.abortDeploy(version);
      } catch(Exception e) {
        log.error("Error sending abort deploy flag to DNode [" + dnode + "]", e);
      } finally {
        if(client != null) {
          context.returnDNodeClientToPool(dnode, client, renew);
        }
      }
    }
    context.getCoordinationStructures().logDeployMessage(version,
        "Deploy failed due to: " + deployerErrorMessage);
    context.getCoordinationStructures().getDeploymentsStatusPanel().put(version, DeployStatus.FAILED);
    CoordinationStructures.DEPLOY_IN_PROGRESS.decrementAndGet();
  }

  /**
   * Switches current versions being served for some tablespaces, in an atomic way.
   */
  public void switchVersions(List<SwitchVersionRequest> switchRequest) throws UnexistingVersion {
    // We compute the new versions table, and then try to update it
    // We use optimistic locking: we read the original
    // map and try to update it. If the original has changed during
    // this process, we retry: reload the original map, ...
    Map<String, Long> versionsTable;
    Map<String, Long> newVersionsTable;
    do {
      versionsTable = context.getCoordinationStructures().getCopyVersionsBeingServed();
      newVersionsTable = new HashMap<String, Long>();
      if(versionsTable != null) {
        newVersionsTable.putAll(versionsTable);
      }

      for(SwitchVersionRequest req : switchRequest) {
        TablespaceVersion tsv = new TablespaceVersion(req.getTablespace(), req.getVersion());
        newVersionsTable.put(tsv.getTablespace(), tsv.getVersion());
      }
    } while(!context.getCoordinationStructures().updateVersionsBeingServed(versionsTable,
        newVersionsTable));
  }

  /**
   * Generates the list of individual deploy actions that has to be sent to each DNode.
   */
  private static Map<String, List<DeployAction>> generateDeployActionsPerDNode(
      List<DeployRequest> deployRequests, long version) {
    HashMap<String, List<DeployAction>> actions = new HashMap<String, List<DeployAction>>();

    long deployDate = System.currentTimeMillis(); // Here is where we decide the data of the deployment for all deployed
                                                  // tablespaces

    for(DeployRequest req : deployRequests) {
      for(Object obj : req.getReplicationMap()) {
        ReplicationEntry rEntry = (ReplicationEntry) obj;
        PartitionEntry pEntry = null;
        for(PartitionEntry partEntry : req.getPartitionMap()) {
          if(partEntry.getShard().equals(rEntry.getShard())) {
            pEntry = partEntry;
          }
        }
        if(pEntry == null) {
          String msg = "No Partition metadata for shard: " + rEntry.getShard()
              + " this is very likely to be a software bug.";
          log.error(msg);
          try {
            log.error("Partition map: " + JSONSerDe.ser(req.getPartitionMap()));
            log.error("Replication map: " + JSONSerDe.ser(req.getReplicationMap()));
          } catch (JSONSerDe.JSONSerDeException e) {
            log.error("JSON error", e);
          }
          throw new RuntimeException(msg);
        }
        // Normalize DNode ids -> The convention is that DNodes are identified by host:port . So we need to strip the
        // protocol, if any
        for(int i = 0; i < rEntry.getNodes().size(); i++) {
          String dnodeId = rEntry.getNodes().get(i);
          if(dnodeId.startsWith("tcp://")) {
            dnodeId = dnodeId.substring("tcp://".length(), dnodeId.length());
          }
          rEntry.getNodes().set(i, dnodeId);
        }
        for(String dNode : rEntry.getNodes()) {
          List<DeployAction> actionsSoFar = (List<DeployAction>) MapUtils.getObject(actions, dNode,
              new ArrayList<DeployAction>());
          actions.put(dNode, actionsSoFar);
          DeployAction deployAction = new DeployAction();
          deployAction.setDataURI(req.getData_uri() + "/" + rEntry.getShard() + ".db");
          deployAction.setTablespace(req.getTablespace());
          deployAction.setVersion(version);
          deployAction.setPartition(rEntry.getShard());

          // Add partition metadata to the deploy action for DNodes to save it
          PartitionMetadata metadata = new PartitionMetadata();
          metadata.setMinKey(pEntry.getMin());
          metadata.setMaxKey(pEntry.getMax());
          metadata.setNReplicas(rEntry.getNodes().size());
          metadata.setDeploymentDate(deployDate);
          metadata.setInitStatements(req.getInitStatements());
          metadata.setEngineId(req.getEngine());

          deployAction.setMetadata(metadata);
          actionsSoFar.add(deployAction);
        }
      }
    }
    return actions;
  }
}
TOP

Related Classes of com.splout.db.qnode.Deployer$ManageDeploy

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.