/**
*
*/
package com.taobao.top.analysis.node.operation;
import java.util.List;
import java.util.Map;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.TimeUnit;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import com.taobao.top.analysis.config.MasterConfig;
import com.taobao.top.analysis.exception.AnalysisException;
import com.taobao.top.analysis.node.job.Job;
import com.taobao.top.analysis.node.job.JobMergedResult;
import com.taobao.top.analysis.statistics.reduce.IReducer.ReduceType;
import com.taobao.top.analysis.util.ReportUtil;
/**
* 合并任务操作,支持主干和分支两种合并模式
* @author fangweng
* @Email fangweng@taobao.com
* 2011-11-29
*
*/
public class MergeJobOperation implements Runnable {
private static final Log logger = LogFactory.getLog(MergeJobOperation.class);
private Job job;
private int mergeCount = 0;
private List<Map<String, Map<String, Object>>> mergeResults;
private MasterConfig config;
private BlockingQueue<JobMergedResult> branchResultQueue;
private boolean stopping = false;
public MergeJobOperation(Job job,int mergeCount,
List<Map<String, Map<String, Object>>> mergeResults
,MasterConfig config,BlockingQueue<JobMergedResult> branchResultQueue) {
this.job = job;
this.mergeCount = mergeCount;
this.mergeResults = mergeResults;
this.config = config;
this.branchResultQueue = branchResultQueue;
}
public MergeJobOperation(Job job,int mergeCount,
List<Map<String, Map<String, Object>>> mergeResults
,MasterConfig config,BlockingQueue<JobMergedResult> branchResultQueue, boolean stopping) {
this.job = job;
this.mergeCount = mergeCount;
this.mergeResults = mergeResults;
this.config = config;
this.branchResultQueue = branchResultQueue;
this.stopping = stopping;
}
@Override
public void run() {
long beg = System.currentTimeMillis();
// 尝试获取锁,如果失败先合并其他结果最后通过锁来合并主干
boolean gotIt = job.getTrunkLock().writeLock().tryLock();
try
{
// 和主干内容一起合并
if (gotIt)
{
mergeTrunk(beg);
job.getJobMergeTime().addAndGet(System.currentTimeMillis() - beg);
}
else
{
mergeBranch(beg);
job.getJobMergeBranchCount().incrementAndGet();
}
} catch (Exception ex) {
logger.error("MergeJobTask execute error", ex);
} finally {
if (gotIt)
job.getTrunkLock().writeLock().unlock();
}
}
void mergeBranch(long beg)
{
int epoch = job.getEpoch().get();
if (job.getJobTimeOut().get())
return;
// 开始中间结果合并
logger.warn(new StringBuilder(
"==>Start noTrunk merge,instance:"
+ job.getJobName())
.append(".merge count : ").append(mergeCount)
.append(", total merged count: ")
.append(job.getMergedTaskCount()));
int size = mergeResults.size();
@SuppressWarnings("unchecked")
Map<String, Map<String, Object>>[] results = new java.util.HashMap[size];
for (Map<String, Map<String, Object>> r : mergeResults) {
size -= 1;
results[size] = r;
}
Map<String, Map<String, Object>> otherResult;
if (mergeResults.size() == 1)
otherResult = results[0];
else
otherResult = ReportUtil.mergeEntryResult(results, job.getStatisticsRule().getEntryPool(), false,ReduceType.SHALLOW_MERGE);
//对于timeout引起的reset做一层保护,丢弃掉分支合并的结果
if (job.getEpoch().get() == epoch) {
// 将结果放入到队列中等待获得锁的线程去执行
JobMergedResult jr = new JobMergedResult();
jr.setMergeCount(mergeCount);
jr.setMergedResult(otherResult);
branchResultQueue.offer(jr);
} else {
logger.error(String.format("Discard one branch result, because of epoch not equals, of job:%s", job.getJobName()));
}
logger.warn(new StringBuilder(
"==>End noTrunk merge,instance:"
+ job.getJobName())
.append(",once merge consume : ")
.append(System.currentTimeMillis() - beg)
.toString());
results = null;
mergeResults.clear();
}
public static boolean mergeToTrunk(Job job,
List<Map<String, Map<String, Object>>> mergeResults, MasterConfig config)
{
boolean gotIt = false;
try
{
gotIt = job.getTrunkLock().writeLock().tryLock(1, TimeUnit.MINUTES);
boolean flag = false;
if (gotIt)
{
int size = mergeResults.size();
long beg = System.currentTimeMillis();
if (config.getSaveTmpResultToFile()) {
if (job.getJobResult() != null && job.getJobResult().size() > 0) {
flag = true;
size += 1;
}
}
@SuppressWarnings("unchecked")
Map<String, Map<String, Object>>[] results = new java.util.HashMap[size];
if (flag)
results[0] = job.getJobResult();
for (Map<String, Map<String, Object>> r : mergeResults) {
size -= 1;
results[size] = r;
}
logger.warn(new StringBuilder(
"==>Start Trunk merge(data recover),instance:"
+ job.getJobName())
.append(".merge count : ").append(size)
.append(", total merged count: ")
.append(job.getMergedTaskCount()).toString());
job.setJobResult(ReportUtil.mergeEntryResult(results, job.getStatisticsRule().getEntryPool(), false,ReduceType.DEEP_MERGE));
logger.warn(new StringBuilder(
"==>End Trunk merge(data recover),instance:"
+ job.getJobName())
.append(",once merge consume : ")
.append(System.currentTimeMillis() - beg)
.toString());
results = null;
mergeResults.clear();
}
else
{
logger.error("can't got trunk to load recover data.");
}
}
catch(InterruptedException ex)
{
//do nothing
}
finally
{
if(gotIt)
job.getTrunkLock().writeLock().unlock();
}
return gotIt;
}
void mergeTrunk(long beg) throws InterruptedException
{
int size = mergeResults.size();
boolean flag = false;
if(job.isMerged().get())
return;
Map<String, Map<String, Object>> diskTmpResult = null;
//已经到了最后一轮合并, Job超时了, 当然会是最后一次merge
if (job.getMergedTaskCount().addAndGet(mergeCount) == job.getTaskCount() || job.getJobTimeOut().get() || stopping)
{
//磁盘换内存模式
if (config.getSaveTmpResultToFile())
{
if (job.getNeedLoadResultFile().compareAndSet(true, false))
{
try {
// load result from file to job.diskResult
JobDataOperation.loadDataToTmp(job, config);
}
catch (AnalysisException e) {
logger.error("loadDataToTmp error.",e);
}
}
boolean gotLock = job.getLoadLock().tryLock(80, TimeUnit.SECONDS);
logger.warn("merge diskResult of " + job.getJobName());
if (gotLock)
{
try
{
diskTmpResult = job.getDiskResult();
job.setDiskResult(null);
}
finally
{
job.getLoadLock().unlock();
}
}
else
{
logger.warn("load Disk Result Error! check now!!!");
throw new java.lang.RuntimeException("load Disk Result Error! check now!!!");
}
if(job.getDiskResult() == null)
job.setDiskResultMerged(true);
if (diskTmpResult != null) {
size += 1;
logger.warn("diskTmpResult is not null");
}
} else if (job.getJobResult() == null) {
try {
JobDataOperation.loadData(job, config);
}
catch (AnalysisException e) {
logger.error("loadData error.",e);
}
}
}
else
{
if (!config.getSaveTmpResultToFile() &&
job.getJobResult() == null)
{
try {
// load result from file to job.jobResult
JobDataOperation.loadData(job, config);
}
catch (AnalysisException e) {
logger.error("loadData error.",e);
}
}
}
if (job.getJobResult() != null
&& job.getJobResult().size() > 0) {
flag = true;
size += 1;
}
@SuppressWarnings("unchecked")
Map<String, Map<String, Object>>[] results = new java.util.HashMap[size];
if (flag)
results[0] = job.getJobResult();
if (diskTmpResult != null)
{
if (flag)
results[1] = diskTmpResult;
else
results[0] = diskTmpResult;
}
for (Map<String, Map<String, Object>> r : mergeResults) {
size -= 1;
results[size] = r;
}
logger.warn(new StringBuilder(
"==>Start Trunk merge,instance:"
+ job.getJobName())
.append(".merge count : ").append(mergeCount)
.append(", total merged count: ")
.append(job.getMergedTaskCount()).toString());
job.setJobResult(ReportUtil.mergeEntryResult(results, job.getStatisticsRule().getEntryPool(), false,ReduceType.DEEP_MERGE));
logger.warn(new StringBuilder(
"==>End Trunk merge,instance:"
+ job.getJobName())
.append(",once merge consume : ")
.append(System.currentTimeMillis() - beg)
.toString());
boolean checkDisk = true;
if(config.getSaveTmpResultToFile())
checkDisk = job.isDiskResultMerged();
//全部合并结束,后续可以输出数据了
if (job.getMergedTaskCount().get() == job.getTaskCount() || (job.getJobTimeOut().get() && checkDisk))
job.isMerged().set(true);
results = null;
mergeResults.clear();
}
}