Package org.apache.helix.task

Source Code of org.apache.helix.task.TaskDriver

package org.apache.helix.task;

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*   http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*/

import java.io.File;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.I0Itec.zkclient.DataUpdater;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.OptionGroup;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.helix.AccessOption;
import org.apache.helix.HelixAdmin;
import org.apache.helix.HelixDataAccessor;
import org.apache.helix.HelixManager;
import org.apache.helix.HelixManagerFactory;
import org.apache.helix.HelixProperty;
import org.apache.helix.InstanceType;
import org.apache.helix.PropertyKey;
import org.apache.helix.ZNRecord;
import org.apache.helix.model.IdealState;
import org.apache.helix.model.builder.CustomModeISBuilder;
import org.apache.log4j.Logger;

import com.google.common.base.Joiner;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;

/**
* CLI for scheduling/canceling workflows
*/
public class TaskDriver {
  /** For logging */
  private static final Logger LOG = Logger.getLogger(TaskDriver.class);

  /** Required option name for Helix endpoint */
  private static final String ZK_ADDRESS = "zk";

  /** Required option name for cluster against which to run task */
  private static final String CLUSTER_NAME_OPTION = "cluster";

  /** Required option name for task resource within target cluster */
  private static final String RESOURCE_OPTION = "resource";

  /** Field for specifying a workflow file when starting a job */
  private static final String WORKFLOW_FILE_OPTION = "file";

  private final HelixManager _manager;
  private final HelixAdmin _admin;
  private final String _clusterName;

  /** Commands which may be parsed from the first argument to main */
  private enum DriverCommand {
    start,
    stop,
    delete,
    resume,
    list
  }

  public TaskDriver(HelixManager manager) {
    _manager = manager;
    _clusterName = manager.getClusterName();
    _admin = manager.getClusterManagmentTool();
  }

  /**
   * Parses the first argument as a driver command and the rest of the
   * arguments are parsed based on that command. Constructs a Helix
   * message and posts it to the controller
   */
  public static void main(String[] args) throws Exception {
    String[] cmdArgs = Arrays.copyOfRange(args, 1, args.length);
    CommandLine cl = parseOptions(cmdArgs, constructOptions(), args[0]);
    String zkAddr = cl.getOptionValue(ZK_ADDRESS);
    String clusterName = cl.getOptionValue(CLUSTER_NAME_OPTION);
    String resource = cl.getOptionValue(RESOURCE_OPTION);

    if (zkAddr == null || clusterName == null || resource == null) {
      printUsage(constructOptions(), "[cmd]");
      throw new IllegalArgumentException(
          "zk, cluster, and resource must all be non-null for all commands");
    }

    HelixManager helixMgr =
        HelixManagerFactory.getZKHelixManager(clusterName, "Admin", InstanceType.ADMINISTRATOR,
            zkAddr);
    helixMgr.connect();
    TaskDriver driver = new TaskDriver(helixMgr);
    try {
      DriverCommand cmd = DriverCommand.valueOf(args[0]);
      switch (cmd) {
      case start:
        if (cl.hasOption(WORKFLOW_FILE_OPTION)) {
          driver.start(Workflow.parse(new File(cl.getOptionValue(WORKFLOW_FILE_OPTION))));
        } else {
          throw new IllegalArgumentException("Workflow file is required to start flow.");
        }
        break;
      case stop:
        driver.setTaskTargetState(resource, TargetState.STOP);
        break;
      case resume:
        driver.setTaskTargetState(resource, TargetState.START);
        break;
      case delete:
        driver.setTaskTargetState(resource, TargetState.DELETE);
        break;
      case list:
        driver.list(resource);
      default:
        throw new IllegalArgumentException("Unknown command " + args[0]);
      }
    } catch (IllegalArgumentException e) {
      LOG.error("Unknown driver command " + args[0]);
      throw e;
    }

    helixMgr.disconnect();
  }

  /** Schedules a new workflow */
  public void start(Workflow flow) throws Exception {
    // TODO: check that namespace for workflow is available
    LOG.info("Starting workflow " + flow.getName());
    flow.validate();

    String flowName = flow.getName();

    // first, add workflow config to ZK
    _admin.setConfig(TaskUtil.getResourceConfigScope(_clusterName, flowName), flow
        .getWorkflowConfig().getResourceConfigMap());

    // then schedule jobs
    for (String job : flow.getJobConfigs().keySet()) {
      JobConfig.Builder builder = JobConfig.Builder.fromMap(flow.getJobConfigs().get(job));
      if (flow.getTaskConfigs() != null && flow.getTaskConfigs().containsKey(job)) {
        builder.addTaskConfigs(flow.getTaskConfigs().get(job));
      }
      scheduleJob(job, builder.build());
    }
  }

  /** Creates a new named job queue (workflow) */
  public void createQueue(JobQueue queue) throws Exception {
    String queueName = queue.getName();
    HelixDataAccessor accessor = _manager.getHelixDataAccessor();
    HelixProperty property = new HelixProperty(queueName);
    property.getRecord().getSimpleFields().putAll(queue.getResourceConfigMap());
    boolean created =
        accessor.createProperty(accessor.keyBuilder().resourceConfig(queueName), property);
    if (!created) {
      throw new IllegalArgumentException("Queue " + queueName + " already exists!");
    }
  }

  /** Flushes a named job queue */
  public void flushQueue(String queueName) throws Exception {
    WorkflowConfig config = TaskUtil.getWorkflowCfg(_manager, queueName);
    if (config == null) {
      throw new IllegalArgumentException("Queue does not exist!");
    }

    // Remove all ideal states and resource configs to trigger a drop event
    HelixDataAccessor accessor = _manager.getHelixDataAccessor();
    PropertyKey.Builder keyBuilder = accessor.keyBuilder();
    final Set<String> toRemove = Sets.newHashSet(config.getJobDag().getAllNodes());
    for (String resourceName : toRemove) {
      accessor.removeProperty(keyBuilder.idealStates(resourceName));
      accessor.removeProperty(keyBuilder.resourceConfig(resourceName));
      // Delete context
      String contextKey = Joiner.on("/").join(TaskConstants.REBALANCER_CONTEXT_ROOT, resourceName);
      _manager.getHelixPropertyStore().remove(contextKey, AccessOption.PERSISTENT);
    }

    // Now atomically clear the DAG
    String path = keyBuilder.resourceConfig(queueName).getPath();
    DataUpdater<ZNRecord> updater = new DataUpdater<ZNRecord>() {
      @Override
      public ZNRecord update(ZNRecord currentData) {
        JobDag jobDag = JobDag.fromJson(currentData.getSimpleField(WorkflowConfig.DAG));
        for (String resourceName : toRemove) {
          for (String child : jobDag.getDirectChildren(resourceName)) {
            jobDag.getChildrenToParents().get(child).remove(resourceName);
          }
          for (String parent : jobDag.getDirectParents(resourceName)) {
            jobDag.getParentsToChildren().get(parent).remove(resourceName);
          }
          jobDag.getChildrenToParents().remove(resourceName);
          jobDag.getParentsToChildren().remove(resourceName);
          jobDag.getAllNodes().remove(resourceName);
        }
        try {
          currentData.setSimpleField(WorkflowConfig.DAG, jobDag.toJson());
        } catch (Exception e) {
          throw new IllegalArgumentException(e);
        }
        return currentData;
      }
    };
    accessor.getBaseDataAccessor().update(path, updater, AccessOption.PERSISTENT);

    // Now atomically clear the results
    path =
        Joiner.on("/")
            .join(TaskConstants.REBALANCER_CONTEXT_ROOT, queueName, TaskUtil.CONTEXT_NODE);
    updater = new DataUpdater<ZNRecord>() {
      @Override
      public ZNRecord update(ZNRecord currentData) {
        Map<String, String> states = currentData.getMapField(WorkflowContext.JOB_STATES);
        if (states != null) {
          states.keySet().removeAll(toRemove);
        }
        return currentData;
      }
    };
    _manager.getHelixPropertyStore().update(path, updater, AccessOption.PERSISTENT);
  }

  /** Adds a new job to the end an existing named queue */
  public void enqueueJob(final String queueName, final String jobName, JobConfig.Builder jobBuilder)
      throws Exception {
    // Get the job queue config and capacity
    HelixDataAccessor accessor = _manager.getHelixDataAccessor();
    HelixProperty workflowConfig =
        accessor.getProperty(accessor.keyBuilder().resourceConfig(queueName));
    if (workflowConfig == null) {
      throw new IllegalArgumentException("Queue " + queueName + " does not yet exist!");
    }
    boolean isTerminable =
        workflowConfig.getRecord().getBooleanField(WorkflowConfig.TERMINABLE, true);
    if (isTerminable) {
      throw new IllegalArgumentException(queueName + " is not a queue!");
    }
    final int capacity =
        workflowConfig.getRecord().getIntField(JobQueue.CAPACITY, Integer.MAX_VALUE);

    // Create the job to ensure that it validates
    JobConfig jobConfig = jobBuilder.setWorkflow(queueName).build();

    // Add the job to the end of the queue in the DAG
    final String namespacedJobName = TaskUtil.getNamespacedJobName(queueName, jobName);
    DataUpdater<ZNRecord> updater = new DataUpdater<ZNRecord>() {
      @Override
      public ZNRecord update(ZNRecord currentData) {
        // Add the node to the existing DAG
        JobDag jobDag = JobDag.fromJson(currentData.getSimpleField(WorkflowConfig.DAG));
        Set<String> allNodes = jobDag.getAllNodes();
        if (allNodes.size() >= capacity) {
          throw new IllegalStateException("Queue " + queueName + " is at capacity, will not add "
              + jobName);
        }
        if (allNodes.contains(namespacedJobName)) {
          throw new IllegalStateException("Could not add to queue " + queueName + ", job "
              + jobName + " already exists");
        }
        jobDag.addNode(namespacedJobName);

        // Add the node to the end of the queue
        String candidate = null;
        for (String node : allNodes) {
          if (!node.equals(namespacedJobName) && jobDag.getDirectChildren(node).isEmpty()) {
            candidate = node;
            break;
          }
        }
        if (candidate != null) {
          jobDag.addParentToChild(candidate, namespacedJobName);
        }

        // Save the updated DAG
        try {
          currentData.setSimpleField(WorkflowConfig.DAG, jobDag.toJson());
        } catch (Exception e) {
          throw new IllegalStateException(
              "Could not add job " + jobName + " to queue " + queueName, e);
        }
        return currentData;
      }
    };
    String path = accessor.keyBuilder().resourceConfig(queueName).getPath();
    boolean status = accessor.getBaseDataAccessor().update(path, updater, AccessOption.PERSISTENT);
    if (!status) {
      throw new IllegalArgumentException("Could not enqueue job");
    }
    // Schedule the job
    scheduleJob(namespacedJobName, jobConfig);
  }

  /** Posts new job to cluster */
  private void scheduleJob(String jobResource, JobConfig jobConfig) throws Exception {
    // Set up job resource based on partitions from target resource
    int numIndependentTasks = jobConfig.getTaskConfigMap().size();
    int numPartitions =
        (numIndependentTasks > 0) ? numIndependentTasks : _admin
            .getResourceIdealState(_clusterName, jobConfig.getTargetResource()).getPartitionSet()
            .size();
    _admin.addResource(_clusterName, jobResource, numPartitions, TaskConstants.STATE_MODEL_NAME);

    // Set the job configuration
    HelixDataAccessor accessor = _manager.getHelixDataAccessor();
    PropertyKey.Builder keyBuilder = accessor.keyBuilder();
    HelixProperty resourceConfig = new HelixProperty(jobResource);
    resourceConfig.getRecord().getSimpleFields().putAll(jobConfig.getResourceConfigMap());
    Map<String, TaskConfig> taskConfigMap = jobConfig.getTaskConfigMap();
    if (taskConfigMap != null) {
      for (TaskConfig taskConfig : taskConfigMap.values()) {
        resourceConfig.getRecord().setMapField(taskConfig.getId(), taskConfig.getConfigMap());
      }
    }
    accessor.setProperty(keyBuilder.resourceConfig(jobResource), resourceConfig);

    // Push out new ideal state based on number of target partitions
    CustomModeISBuilder builder = new CustomModeISBuilder(jobResource);
    builder.setRebalancerMode(IdealState.RebalanceMode.TASK);
    builder.setNumReplica(1);
    builder.setNumPartitions(numPartitions);
    builder.setStateModel(TaskConstants.STATE_MODEL_NAME);
    for (int i = 0; i < numPartitions; i++) {
      builder.add(jobResource + "_" + i);
    }
    IdealState is = builder.build();
    if (taskConfigMap != null && !taskConfigMap.isEmpty()) {
      is.setRebalancerClassName(GenericTaskRebalancer.class.getName());
    } else {
      is.setRebalancerClassName(FixedTargetTaskRebalancer.class.getName());
    }
    _admin.setResourceIdealState(_clusterName, jobResource, is);
  }

  /** Public method to resume a workflow/queue */
  public void resume(String workflow) {
    setTaskTargetState(workflow, TargetState.START);
  }

  /** Public method to stop a workflow/queue */
  public void stop(String workflow) {
    setTaskTargetState(workflow, TargetState.STOP);
  }

  /** Public method to delete a workflow/queue */
  public void delete(String workflow) {
    setTaskTargetState(workflow, TargetState.DELETE);
  }

  /** Helper function to change target state for a given task */
  private void setTaskTargetState(String jobResource, TargetState state) {
    setSingleTaskTargetState(jobResource, state);

    // For recurring schedules, child workflows must also be handled
    HelixDataAccessor accessor = _manager.getHelixDataAccessor();
    List<String> resources = accessor.getChildNames(accessor.keyBuilder().resourceConfigs());
    for (String resource : resources) {
      String prefix = resource + "_" + TaskConstants.SCHEDULED;
      if (resource.startsWith(prefix)) {
        setSingleTaskTargetState(resource, state);
      }
    }
  }

  /** Helper function to change target state for a given task */
  private void setSingleTaskTargetState(String jobResource, final TargetState state) {
    HelixDataAccessor accessor = _manager.getHelixDataAccessor();
    DataUpdater<ZNRecord> updater = new DataUpdater<ZNRecord>() {
      @Override
      public ZNRecord update(ZNRecord currentData) {
        // Only update target state for non-completed workflows
        String finishTime = currentData.getSimpleField(WorkflowContext.FINISH_TIME);
        if (finishTime == null || finishTime.equals(WorkflowContext.UNFINISHED)) {
          currentData.setSimpleField(WorkflowConfig.TARGET_STATE, state.name());
        }
        return currentData;
      }
    };
    List<DataUpdater<ZNRecord>> updaters = Lists.newArrayList();
    updaters.add(updater);
    List<String> paths = Lists.newArrayList();
    paths.add(accessor.keyBuilder().resourceConfig(jobResource).getPath());
    accessor.updateChildren(paths, updaters, AccessOption.PERSISTENT);
    invokeRebalance();
  }

  public void list(String resource) {
    WorkflowConfig wCfg = TaskUtil.getWorkflowCfg(_manager, resource);
    if (wCfg == null) {
      LOG.error("Workflow " + resource + " does not exist!");
      return;
    }
    WorkflowContext wCtx = TaskUtil.getWorkflowContext(_manager, resource);

    LOG.info("Workflow " + resource + " consists of the following tasks: "
        + wCfg.getJobDag().getAllNodes());
    String workflowState =
        (wCtx != null) ? wCtx.getWorkflowState().name() : TaskState.NOT_STARTED.name();
    LOG.info("Current state of workflow is " + workflowState);
    LOG.info("Job states are: ");
    LOG.info("-------");
    for (String job : wCfg.getJobDag().getAllNodes()) {
      TaskState jobState = (wCtx != null) ? wCtx.getJobState(job) : TaskState.NOT_STARTED;
      LOG.info("Job " + job + " is " + jobState);

      // fetch job information
      JobConfig jCfg = TaskUtil.getJobCfg(_manager, job);
      JobContext jCtx = TaskUtil.getJobContext(_manager, job);
      if (jCfg == null || jCtx == null) {
        LOG.info("-------");
        continue;
      }

      // calculate taskPartitions
      List<Integer> partitions = Lists.newArrayList(jCtx.getPartitionSet());
      Collections.sort(partitions);

      // report status
      for (Integer partition : partitions) {
        String taskId = jCtx.getTaskIdForPartition(partition);
        taskId = (taskId != null) ? taskId : jCtx.getTargetForPartition(partition);
        LOG.info("Task: " + taskId);
        TaskConfig taskConfig = jCfg.getTaskConfig(taskId);
        if (taskConfig != null) {
          LOG.info("Configuration: " + taskConfig.getConfigMap());
        }
        TaskPartitionState state = jCtx.getPartitionState(partition);
        state = (state != null) ? state : TaskPartitionState.INIT;
        LOG.info("State: " + state);
        String assignedParticipant = jCtx.getAssignedParticipant(partition);
        if (assignedParticipant != null) {
          LOG.info("Assigned participant: " + assignedParticipant);
        }
        LOG.info("-------");
      }
      LOG.info("-------");
    }
  }

  /**
   * Hack to invoke rebalance until bug concerning resource config changes not driving rebalance is
   * fixed
   */
  public void invokeRebalance() {
    // find a task
    for (String resource : _admin.getResourcesInCluster(_clusterName)) {
      IdealState is = _admin.getResourceIdealState(_clusterName, resource);
      if (is.getStateModelDefRef().equals(TaskConstants.STATE_MODEL_NAME)) {
        HelixDataAccessor accessor = _manager.getHelixDataAccessor();
        accessor.updateProperty(accessor.keyBuilder().idealStates(resource), is);
        break;
      }
    }
  }

  /** Constructs options set for all basic control messages */
  private static Options constructOptions() {
    Options options = new Options();
    options.addOptionGroup(contructGenericRequiredOptionGroup());
    options.addOptionGroup(constructStartOptionGroup());
    return options;
  }

  /** Constructs option group containing options required by all drivable jobs */
  @SuppressWarnings("static-access")
  private static OptionGroup contructGenericRequiredOptionGroup() {
    Option zkAddressOption =
        OptionBuilder.isRequired().withLongOpt(ZK_ADDRESS)
            .withDescription("ZK address managing cluster").create();
    zkAddressOption.setArgs(1);
    zkAddressOption.setArgName("zkAddress");

    Option clusterNameOption =
        OptionBuilder.isRequired().withLongOpt(CLUSTER_NAME_OPTION).withDescription("Cluster name")
            .create();
    clusterNameOption.setArgs(1);
    clusterNameOption.setArgName("clusterName");

    Option taskResourceOption =
        OptionBuilder.isRequired().withLongOpt(RESOURCE_OPTION)
            .withDescription("Workflow or job name").create();
    taskResourceOption.setArgs(1);
    taskResourceOption.setArgName("resourceName");

    OptionGroup group = new OptionGroup();
    group.addOption(zkAddressOption);
    group.addOption(clusterNameOption);
    group.addOption(taskResourceOption);
    return group;
  }

  /** Constructs option group containing options required by all drivable jobs */
  private static OptionGroup constructStartOptionGroup() {
    @SuppressWarnings("static-access")
    Option workflowFileOption =
        OptionBuilder.withLongOpt(WORKFLOW_FILE_OPTION)
            .withDescription("Local file describing workflow").create();
    workflowFileOption.setArgs(1);
    workflowFileOption.setArgName("workflowFile");

    OptionGroup group = new OptionGroup();
    group.addOption(workflowFileOption);
    return group;
  }

  /** Attempts to parse options for given command, printing usage under failure */
  private static CommandLine parseOptions(String[] args, Options options, String cmdStr) {
    CommandLineParser cliParser = new GnuParser();
    CommandLine cmd = null;

    try {
      cmd = cliParser.parse(options, args);
    } catch (ParseException pe) {
      LOG.error("CommandLineClient: failed to parse command-line options: " + pe.toString());
      printUsage(options, cmdStr);
      System.exit(1);
    }
    boolean ret = checkOptionArgsNumber(cmd.getOptions());
    if (!ret) {
      printUsage(options, cmdStr);
      System.exit(1);
    }

    return cmd;
  }

  /** Ensures options argument counts are correct */
  private static boolean checkOptionArgsNumber(Option[] options) {
    for (Option option : options) {
      int argNb = option.getArgs();
      String[] args = option.getValues();
      if (argNb == 0) {
        if (args != null && args.length > 0) {
          System.err.println(option.getArgName() + " shall have " + argNb + " arguments (was "
              + Arrays.toString(args) + ")");
          return false;
        }
      } else {
        if (args == null || args.length != argNb) {
          System.err.println(option.getArgName() + " shall have " + argNb + " arguments (was "
              + Arrays.toString(args) + ")");
          return false;
        }
      }
    }
    return true;
  }

  /** Displays CLI usage for given option set and command name */
  private static void printUsage(Options cliOptions, String cmd) {
    HelpFormatter helpFormatter = new HelpFormatter();
    helpFormatter.setWidth(1000);
    helpFormatter.printHelp("java " + TaskDriver.class.getName() + " " + cmd, cliOptions);
  }
}
TOP

Related Classes of org.apache.helix.task.TaskDriver

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.