public static String createJobGroupName(ResourceGroup group) {
return GROUP_JOB_NAME_PREFIX + group.getId();
}
public void execute(JobExecutionContext context) throws JobExecutionException {
GroupOperationSchedule schedule = null;
GroupOperationHistory groupHistory;
Subject user = null;
try {
JobDetail jobDetail = context.getJobDetail();
OperationManagerLocal operationManager = LookupUtil.getOperationManager();
updateOperationScheduleEntity(jobDetail, context.getNextFireTime(), operationManager);
// we only got here because the user was allowed to execute / schedule the operation in the first place,
// thus it's safe to pass in the overlord here
schedule = operationManager.getGroupOperationSchedule(LookupUtil.getSubjectManager().getOverlord(),
jobDetail);
// create a new session even if user is logged in elsewhere, we don't want to attach to that user's session
user = getUserWithSession(schedule.getSubject(), false);
ResourceGroup group = schedule.getGroup();
// we need the operation definition to fill in the history item
OperationDefinition op;
op = operationManager.getSupportedGroupOperation(user, group.getId(), schedule.getOperationName(), false);
// first we need to create an INPROGRESS *group* history item
Configuration parameters = schedule.getParameters();
if (parameters != null) {
parameters = parameters.deepCopy(false); // we need a copy to avoid constraint violations upon delete
}
groupHistory = new GroupOperationHistory(jobDetail.getName(), jobDetail.getGroup(), user.getName(), op,
parameters, group);
groupHistory = (GroupOperationHistory) operationManager.updateOperationHistory(user, groupHistory);
// get the resources to operate on, ordered or not
List<Resource> resourcesToOperateOn;
if (schedule.getExecutionOrder() != null) {
resourcesToOperateOn = schedule.getExecutionOrder();
} else {
ResourceManagerLocal resourceManager = LookupUtil.getResourceManager();
PageControl pageControl = PageControl.getUnlimitedInstance();
resourcesToOperateOn = resourceManager.findExplicitResourcesByResourceGroup(user, group, pageControl);
}
// now create detail composites from the resource list
List<ResourceOperationDetailsComposite> resourceComposites = new ArrayList<ResourceOperationDetailsComposite>();
getUserWithSession(user, true); // refresh our session to reset the timeout clock
for (Resource nextResourceToOperateOn : resourcesToOperateOn) {
// create the non-quartz schedule entity for the given job execution context data
ResourceOperationSchedule resourceSchedule = createScheduleForResource(schedule, jobDetail.getGroup(),
user, nextResourceToOperateOn);
// create the resource-level history entity for the newly created non-quartz schedule entity
// this method also does the persisting
ResourceOperationHistory resourceHistory = createOperationHistory(resourceSchedule.getJobName(),
resourceSchedule.getJobGroup(), resourceSchedule, groupHistory, operationManager);
// add all three elements to the composite, which will be iterated over below for the bulk of the work
resourceComposites.add(new ResourceOperationDetailsComposite(nextResourceToOperateOn, resourceHistory,
resourceSchedule));
}
// now tell the agents to invoke the operation for all resources
if (schedule.getExecutionOrder() != null) {
boolean hadFailure = false;
// synchronously execute, waiting for each operation to finish before going to the next
for (ResourceOperationDetailsComposite composite : resourceComposites) {
try {
if (hadFailure) {
// there was a failure during execution of this group operation;
// thus, mark all remaining operation histories as cancelled
composite.history.setErrorMessage("This has been cancelled due to halt-on-error "
+ "being set on the parent group operation schedule. "
+ "A previous resource operation that executed prior "
+ "to this resource operation failed, thus causing "
+ "this resource operation to be cancelled.");
composite.history.setStatus(OperationRequestStatus.CANCELED);
composite.history = (ResourceOperationHistory) operationManager.updateOperationHistory(
getUserWithSession(user, true), composite.history);
continue;
}
invokeOperationOnResource(composite, operationManager);
int resourceHistoryId = composite.history.getId();
OperationHistory updatedOperationHistory = null;
long sleep = 1000L; // quick sleep for fast ops, then slow down
long maxSleep = 5000L;
do {
Thread.sleep(sleep);
sleep = (sleep == maxSleep) ? sleep : sleep + 1000L;
// it's unlikely but possible that a client program could actually query for, process, and
// delete the history before this code gets a chance to run. If the record is gone just
// assume things were handled externally.
try {
updatedOperationHistory = operationManager.getOperationHistoryByHistoryId(
getUserWithSession(user, true), resourceHistoryId);
} catch (IllegalArgumentException e) {
if (log.isDebugEnabled()) {
log.debug("Failed to find operation history", e);
}
break;
}
// if the duration was ridiculously long, let's break out of here. this will rarely
// be triggered because our operation manager will timeout long running operations for us
// (based on the operation's define timeout). But, me being paranoid, I want to be able
// to break this infinite loop if for some reason the operation manager isn't doing its job.
// if the operation took longer than 24 hours, this breaks the loop.
if (updatedOperationHistory.getDuration() > (GroupOperationJob.BREAK_VALUE)) {
break;
}
} while (updatedOperationHistory.getStatus() == OperationRequestStatus.INPROGRESS);
// halt the rest if we got a failure and were told not to go on
if (null != updatedOperationHistory
&& (updatedOperationHistory.getStatus() != OperationRequestStatus.SUCCESS)
&& schedule.isHaltOnFailure()) {
hadFailure = true;
}
} catch (Exception e) {
// failed to even send to the agent, immediately mark the job as failed
groupHistory.setErrorMessage(ThrowableUtil.getStackAsString(e));
groupHistory = (GroupOperationHistory) operationManager.updateOperationHistory(
getUserWithSession(user, true), groupHistory);
if (schedule.isHaltOnFailure()) {
hadFailure = true;
}
}
}
} else {
// send the invocation requests without waiting for each to return
for (ResourceOperationDetailsComposite composite : resourceComposites) {
try {
invokeOperationOnResource(composite, operationManager);
} catch (Exception e) {
if (e instanceof CancelJobException) {
throw e;
}
// failed to even send to the agent, immediately mark the job as failed
groupHistory.setErrorMessage(ThrowableUtil.getStackAsString(e));
groupHistory = (GroupOperationHistory) operationManager.updateOperationHistory(
getUserWithSession(user, true), groupHistory);
// Note: in actuality - I don't think users have a way in the user interface to turn on halt-on-failure for parallel execution.
// So this isHaltOnFailure will probably always be false. But in case we want to support this, leave this here.
// What will happen is we will stop sending requests to the agents to invoke more resource operations. Any previous
// resource operations invoked, however, will still be running and allowed to finish on their respective agents.
if (schedule.isHaltOnFailure()) {
throw e;
}
}
}
}