// 分配任务和结果提交处理由于是单线程处理,
// 因此本身不用做状态池并发控制,将消耗较多的发送操作交给ServerConnector多线程操作
@Override
public void addTaskResultToQueue(SendResultsRequestEvent jobResponseEvent) {
JobTaskResult jobTaskResult = jobResponseEvent.getJobTaskResult();
if (jobTaskResult.getTaskIds() != null && jobTaskResult.getTaskIds().size() > 0) {
// 判断是否是过期的一些老任务数据,根据task和taskresult的createtime来判断
// 以后要扩展成为如果发现当前的epoch < 结果的epoch,表明这台可能是从属的master,负责reduce,但是速度跟不上了
if(jobTaskPool.get(jobTaskResult.getTaskIds().get(0)) == null) {
logger.error("jobTask is null " + jobTaskResult.getTaskIds().get(0));
masterNode.echoSendJobTaskResults(jobResponseEvent.getSequence(), "success", jobResponseEvent.getChannel());
return;
}
if (jobTaskResult.getJobEpoch() != jobTaskPool.get(jobTaskResult.getTaskIds().get(0)).getJobEpoch() && this.config.getDispatchMaster()) {
// 结果过期, 肯能是任务超时后, 被重新分配了
if (jobTaskResult.getJobEpoch() < jobTaskPool.get(jobTaskResult.getTaskIds().get(0)).getJobEpoch()) {
logger.error("old task result will be discard! job:" + jobTaskPool.get(jobTaskResult.getTaskIds().get(0)).getJobName() + ",epoch:" + jobTaskResult.getJobEpoch() + ",slave:" + jobResponseEvent.getChannel());
masterNode.echoSendJobTaskResults(jobResponseEvent.getSequence(), "success", jobResponseEvent.getChannel());
return;
}
else {
// 给一定的容忍时间,暂时定为5秒
jobs.get(jobTaskPool.get(jobTaskResult.getTaskIds().get(0)).getJobName()).blockToResetJob(15000);
// 这块有点疑问, 什么情况会出现
if (jobTaskResult.getJobEpoch() > jobTaskPool.get(jobTaskResult.getTaskIds().get(0)).getJobEpoch()) {
logger.error("otherMaster can't merge in time!job:" + jobTaskPool.get(jobTaskResult.getTaskIds().get(0)).getJobName() + ",taskResult epoch:" + jobTaskResult.getJobEpoch() + ", task epoch:" + jobTaskPool.get(jobTaskResult.getTaskIds().get(0)).getJobEpoch());
masterNode.echoSendJobTaskResults(jobResponseEvent.getSequence(), "success", jobResponseEvent.getChannel());
if(!this.config.getDispatchMaster()) {
jobs.get(jobTaskResult.getJobName()).reset(this);
} else {
return;
}
}
}
}
if (logger.isWarnEnabled()) {
StringBuilder ts =
new StringBuilder("Receive slave analysis result, jobTaskIds : ")
.append(jobTaskResult.toString()).append(", ").append(jobTaskResult.getTaskIds().size());
logger.warn(ts.toString());
}
if(jobs.get(jobTaskPool.get(jobTaskResult.getTaskIds().get(0)).getJobName()).isMerged().get()) {
masterNode.echoSendJobTaskResults(jobResponseEvent.getSequence(), "success", jobResponseEvent.getChannel());
return;
}
// 先放入队列,防止小概率多线程并发问题
jobTaskResultsQueuePool.get(jobTaskPool.get(jobTaskResult.getTaskIds().get(0)).getJobName()).offer(
jobTaskResult);
if(logger.isInfoEnabled()) {
StringBuilder sb = new StringBuilder("add result [");
for(String s : jobTaskResult.getTaskIds()) {
sb.append(s).append(",");
}
sb.append("] to queue:").append(jobTaskPool.get(jobTaskResult.getTaskIds().get(0)).getJobName());
logger.info(sb.toString());
}
Iterator<String> iter = jobTaskResult.getTaskIds().iterator();
while (iter.hasNext()) {
String taskId = iter.next();
JobTask jobTask = jobTaskPool.get(taskId);
if (jobTask == null)
{
logger.error(new StringBuilder("taskId :").append(taskId).append("not exist!").toString());
continue;
}
Job job = jobs.get(jobTask.getJobName());
if(job == null) {
logger.error(new StringBuilder("job :").append(jobTask.getJobName()).append("not exist!").toString());
continue;
}
if (statusPool.replace(taskId, JobTaskStatus.DOING, JobTaskStatus.DONE)
|| statusPool.replace(taskId, JobTaskStatus.UNDO, JobTaskStatus.DONE)) {
logger.info("task " + jobTask.getJobName() + " of job " + job.getJobName() + " done");
jobTask.setStatus(JobTaskStatus.DONE);
jobTask.getTailCursor().compareAndSet(true, false);
jobTask.setEndTime(System.currentTimeMillis());
jobTask.setLastMergedEpoch(job.getEpoch().get());
job.getCompletedTaskCount().incrementAndGet();
} else {
if(!this.config.getDispatchMaster()) {
jobTask.setStatus(JobTaskStatus.DONE);
jobTask.getTailCursor().compareAndSet(true, false);
jobTask.setEndTime(System.currentTimeMillis());
jobTask.setLastMergedEpoch(job.getEpoch().get());
statusPool.put(taskId, JobTaskStatus.DONE);
iter.remove();
}
}
//对jobTask的执行结果打点
StringBuilder log = new StringBuilder(ReportUtil.SLAVE_LOG).append(",timeStamp=")
.append(System.currentTimeMillis()).append(",epoch=")
.append(job.getEpoch()).append(",jobName=");
log.append(jobTask.getJobName()).append(",taskId=")
.append(jobTask.getTaskId()).append(",recycleCounter=")
.append(jobTask.getRecycleCounter().get()).append(",slaveIp=")
.append(jobTaskResult.getSlaveIp()).append(",efficiency=")
.append(jobTaskResult.getEfficiency()).append(",");
JobTaskExecuteInfo executeInfo = jobTaskResult.getTaskExecuteInfos().get(jobTask.getTaskId());
if (executeInfo != null) {
log.append("analysisConsume=").append(executeInfo.getAnalysisConsume()).append(",")
.append("jobDataSize=").append(executeInfo.getJobDataSize()).append(",").append("totalLine=")
.append(executeInfo.getTotalLine()).append(",").append("errorLine=")