@Test
public void testExecutionWithFailingTaskManager() {
final int NUM_TASKS = 20;
try {
final AbstractJobVertex sender = new AbstractJobVertex("Sender");
final AbstractJobVertex receiver = new AbstractJobVertex("Receiver");
sender.setInvokableClass(Sender.class);
receiver.setInvokableClass(BlockingReceiver.class);
sender.setParallelism(NUM_TASKS);
receiver.setParallelism(NUM_TASKS);
receiver.connectNewDataSetAsInput(sender, DistributionPattern.POINTWISE);
SlotSharingGroup sharingGroup = new SlotSharingGroup();
sender.setSlotSharingGroup(sharingGroup);
receiver.setSlotSharingGroup(sharingGroup);
final JobGraph jobGraph = new JobGraph("Pointwise Job", sender, receiver);
final JobManager jm = startJobManager(2, NUM_TASKS / 2);
final TaskManager tm1 = ((LocalInstanceManager) jm.getInstanceManager()).getTaskManagers()[0];
final TaskManager tm2 = ((LocalInstanceManager) jm.getInstanceManager()).getTaskManagers()[1];
final GlobalBufferPool bp1 = tm1.getChannelManager().getGlobalBufferPool();
final GlobalBufferPool bp2 = tm2.getChannelManager().getGlobalBufferPool();
try {
JobSubmissionResult result = jm.submitJob(jobGraph);
if (result.getReturnCode() != AbstractJobResult.ReturnCode.SUCCESS) {
System.out.println(result.getDescription());
}
assertEquals(AbstractJobResult.ReturnCode.SUCCESS, result.getReturnCode());
ExecutionGraph eg = jm.getCurrentJobs().get(jobGraph.getJobID());
// wait until everyone has settled in
long deadline = System.currentTimeMillis() + 2000;
while (System.currentTimeMillis() < deadline) {
boolean allrunning = true;
for (ExecutionVertex v : eg.getJobVertex(receiver.getID()).getTaskVertices()) {
if (v.getCurrentExecutionAttempt().getState() != ExecutionState.RUNNING) {
allrunning = false;
break;
}
}