package org.apache.helix.integration.task;
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import org.apache.helix.HelixManager;
import org.apache.helix.HelixManagerFactory;
import org.apache.helix.InstanceType;
import org.apache.helix.api.id.StateModelDefId;
import org.apache.helix.manager.zk.MockParticipant;
import org.apache.helix.manager.zk.MockController;
import org.apache.helix.participant.StateMachineEngine;
import org.apache.helix.task.JobConfig;
import org.apache.helix.task.Task;
import org.apache.helix.task.TaskCallbackContext;
import org.apache.helix.task.TaskDriver;
import org.apache.helix.task.TaskFactory;
import org.apache.helix.task.TaskResult;
import org.apache.helix.task.TaskState;
import org.apache.helix.task.TaskStateModelFactory;
import org.apache.helix.task.Workflow;
import org.apache.helix.testutil.ZkTestBase;
import org.apache.helix.tools.ClusterStateVerifier;
import org.apache.log4j.Logger;
import org.testng.Assert;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
import com.google.common.collect.ImmutableMap;
public class TestTaskRebalancerStopResume extends ZkTestBase {
private static final Logger LOG = Logger.getLogger(TestTaskRebalancerStopResume.class);
private static final int n = 5;
private static final int START_PORT = 12918;
private static final String MASTER_SLAVE_STATE_MODEL = "MasterSlave";
private static final String TIMEOUT_CONFIG = "Timeout";
private static final String TGT_DB = "TestDB";
private static final String JOB_RESOURCE = "SomeJob";
private static final int NUM_PARTITIONS = 20;
private static final int NUM_REPLICAS = 3;
private final String CLUSTER_NAME = "TestTaskRebalancerStopResume";
private final MockParticipant[] _participants = new MockParticipant[n];
private MockController _controller;
private HelixManager _manager;
private TaskDriver _driver;
@BeforeClass
public void beforeClass() throws Exception {
String namespace = "/" + CLUSTER_NAME;
if (_zkclient.exists(namespace)) {
_zkclient.deleteRecursive(namespace);
}
_setupTool.addCluster(CLUSTER_NAME, true);
for (int i = 0; i < n; i++) {
String storageNodeName = "localhost_" + (START_PORT + i);
_setupTool.addInstanceToCluster(CLUSTER_NAME, storageNodeName);
}
// Set up target db
_setupTool.addResourceToCluster(CLUSTER_NAME, TGT_DB, NUM_PARTITIONS, MASTER_SLAVE_STATE_MODEL);
_setupTool.rebalanceStorageCluster(CLUSTER_NAME, TGT_DB, NUM_REPLICAS);
Map<String, TaskFactory> taskFactoryReg = new HashMap<String, TaskFactory>();
taskFactoryReg.put("Reindex", new TaskFactory() {
@Override
public Task createNewTask(TaskCallbackContext context) {
return new ReindexTask(context);
}
});
// start dummy participants
for (int i = 0; i < n; i++) {
String instanceName = "localhost_" + (START_PORT + i);
_participants[i] = new MockParticipant(_zkaddr, CLUSTER_NAME, instanceName);
// Register a Task state model factory.
StateMachineEngine stateMachine = _participants[i].getStateMachineEngine();
stateMachine.registerStateModelFactory(StateModelDefId.from("Task"),
new TaskStateModelFactory(_participants[i], taskFactoryReg));
_participants[i].syncStart();
}
// start controller
String controllerName = "controller_0";
_controller = new MockController(_zkaddr, CLUSTER_NAME, controllerName);
_controller.syncStart();
// create cluster manager
_manager =
HelixManagerFactory.getZKHelixManager(CLUSTER_NAME, "Admin", InstanceType.ADMINISTRATOR,
_zkaddr);
_manager.connect();
_driver = new TaskDriver(_manager);
boolean result =
ClusterStateVerifier.verifyByZkCallback(new ClusterStateVerifier.MasterNbInExtViewVerifier(
_zkaddr, CLUSTER_NAME));
Assert.assertTrue(result);
result =
ClusterStateVerifier
.verifyByZkCallback(new ClusterStateVerifier.BestPossAndExtViewZkVerifier(_zkaddr,
CLUSTER_NAME));
Assert.assertTrue(result);
}
@AfterClass
public void afterClass() throws Exception {
_controller.syncStop();
for (int i = 0; i < n; i++) {
_participants[i].syncStop();
}
_manager.disconnect();
}
@Test
public void stopAndResume() throws Exception {
Map<String, String> commandConfig = ImmutableMap.of(TIMEOUT_CONFIG, String.valueOf(100));
Workflow flow =
WorkflowGenerator.generateDefaultSingleJobWorkflowBuilderWithExtraConfigs(JOB_RESOURCE,
commandConfig).build();
LOG.info("Starting flow " + flow.getName());
_driver.start(flow);
TestUtil.pollForWorkflowState(_manager, JOB_RESOURCE, TaskState.IN_PROGRESS);
LOG.info("Pausing job");
_driver.stop(JOB_RESOURCE);
TestUtil.pollForWorkflowState(_manager, JOB_RESOURCE, TaskState.STOPPED);
LOG.info("Resuming job");
_driver.resume(JOB_RESOURCE);
TestUtil.pollForWorkflowState(_manager, JOB_RESOURCE, TaskState.COMPLETED);
}
@Test
public void stopAndResumeWorkflow() throws Exception {
String workflow = "SomeWorkflow";
Workflow flow = WorkflowGenerator.generateDefaultRepeatedJobWorkflowBuilder(workflow).build();
LOG.info("Starting flow " + workflow);
_driver.start(flow);
TestUtil.pollForWorkflowState(_manager, workflow, TaskState.IN_PROGRESS);
LOG.info("Pausing workflow");
_driver.stop(workflow);
TestUtil.pollForWorkflowState(_manager, workflow, TaskState.STOPPED);
LOG.info("Resuming workflow");
_driver.resume(workflow);
TestUtil.pollForWorkflowState(_manager, workflow, TaskState.COMPLETED);
}
public static class ReindexTask implements Task {
private final long _delay;
private volatile boolean _canceled;
public ReindexTask(TaskCallbackContext context) {
JobConfig jobCfg = context.getJobConfig();
Map<String, String> cfg = jobCfg.getJobCommandConfigMap();
if (cfg == null) {
cfg = Collections.emptyMap();
}
_delay = cfg.containsKey(TIMEOUT_CONFIG) ? Long.parseLong(cfg.get(TIMEOUT_CONFIG)) : 200L;
}
@Override
public TaskResult run() {
long expiry = System.currentTimeMillis() + _delay;
long timeLeft;
while (System.currentTimeMillis() < expiry) {
if (_canceled) {
timeLeft = expiry - System.currentTimeMillis();
return new TaskResult(TaskResult.Status.CANCELED, String.valueOf(timeLeft < 0 ? 0
: timeLeft));
}
sleep(50);
}
timeLeft = expiry - System.currentTimeMillis();
return new TaskResult(TaskResult.Status.COMPLETED,
String.valueOf(timeLeft < 0 ? 0 : timeLeft));
}
@Override
public void cancel() {
_canceled = true;
}
private static void sleep(long d) {
try {
Thread.sleep(d);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
}