package com.taobao.top.analysis.node.component;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.locks.ReentrantLock;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.velocity.VelocityContext;
import com.taobao.top.analysis.config.MasterConfig;
import com.taobao.top.analysis.exception.AnalysisException;
import com.taobao.top.analysis.node.job.JobTaskExecuteInfo;
import com.taobao.top.analysis.node.monitor.IMonitor;
import com.taobao.top.analysis.node.monitor.JobExecutionLog;
import com.taobao.top.analysis.node.monitor.JobTaskExecutionLog;
import com.taobao.top.analysis.node.monitor.MasterMonitorInfo;
import com.taobao.top.analysis.node.monitor.SlaveMonitorInfo;
import com.taobao.top.analysis.util.ChartUtil;
import com.taobao.top.analysis.util.NamedThreadFactory;
import com.taobao.top.analysis.util.ChartUtil.LineEntry;
/**
* Master端监控组件
* @author sihai
*
*/
public class MasterMonitor implements IMonitor<MasterConfig> {
private static final String MONITOR_SYSTEM = "monitorSystem"; //
private static final String MONITOR_JOB = "monitorJob"; //
private static final String MONITOR_JOB_TASK = "monitorJobTask"; //
private static final int MAX_CACHE_SIZE = 100; //
private static final Log logger = LogFactory.getLog(MasterMonitor.class); //
private static final Log systemLogger = LogFactory.getLog(MONITOR_SYSTEM); //
private static final Log jobLogger = LogFactory.getLog(MONITOR_JOB); //
private static final Log jobTaskLogger = LogFactory.getLog(MONITOR_JOB_TASK); //
// 系统级别监控信息
private MasterConfig config; // Master端配置信息
private ConcurrentHashMap<String, SlaveMonitorInfoContainer> cache; // 缓存的最近的数据, 以为实时分析
private ScheduledExecutorService executor; // 导出报表线程, 单线程
// job级别监控
private ConcurrentHashMap<String, JobExecutionLogContainer> jobExecutionLogCache; // 缓存的最近的数据, 以为实时分析
@Override
public MasterConfig getConfig() {
return config;
}
@Override
public void init() throws AnalysisException {
cache = new ConcurrentHashMap<String, SlaveMonitorInfoContainer>();
jobExecutionLogCache = new ConcurrentHashMap<String, JobExecutionLogContainer>();
executor = Executors.newSingleThreadScheduledExecutor(new NamedThreadFactory("Master-Monitor", true));
executor.scheduleWithFixedDelay(new ExportTask(), config.getExportMonitorInterval() * 2, config.getExportMonitorInterval(), TimeUnit.SECONDS);
logger.info("monitor init end");
}
@Override
public void releaseResource() {
if(executor != null) {
executor.shutdown();
}
if(cache != null) {
cache.clear();
}
if(jobExecutionLogCache != null) {
jobExecutionLogCache.clear();
}
}
@Override
public void setConfig(MasterConfig config) {
this.config = config;
}
/**
* 接收Slave的监控信息, 只有一个线程会调用
* @param info
*/
public MasterMonitorInfo report(SlaveMonitorInfo info) {
// 设置时间戳以Master为准
info.setTimeStamp(System.currentTimeMillis());
// 更新最新缓存数据
updateCache(info);
// 记录日志
systemLogger.info(info);
// 目前还没用, 暂且返回一个空的
return new MasterMonitorInfo();
}
/**
* 接收Slave报告的任务执行统计信息
* @param infos
*/
public void report(String jobName, Collection<JobTaskExecuteInfo> infos) {
if(infos.isEmpty()) {
return;
}
// 更新最新缓存数据
updateCache(jobName, infos);
}
/**
*
* @param context
*/
public void getData(VelocityContext context) {
Map<String, List<SlaveMonitorInfo>> snapshot = new HashMap<String, List<SlaveMonitorInfo>>();
// 构建每一个Slave最新的状态
// 每一个Slave的当前快照信息
List<SlaveMonitorInfo> slaveList = new ArrayList<SlaveMonitorInfo>(cache.size());
SlaveMonitorInfoContainer container = null;
for(Map.Entry<String, SlaveMonitorInfoContainer> entry : cache.entrySet()) {
container = entry.getValue();
try {
container.lock.lock();
snapshot.put(entry.getKey(), new ArrayList<SlaveMonitorInfo>(container.infoList));
slaveList.add(container.infoList.get(container.infoList.size() - 1));
} finally {
container.lock.unlock();
}
}
context.put("slaveList", slaveList);
context.put("picList", draw(snapshot));
}
/**
* 更新缓存数据, 使每个slave的数据保持在最新的100个
* @param info
*/
private void updateCache(SlaveMonitorInfo info) {
SlaveMonitorInfoContainer old = null;
SlaveMonitorInfoContainer container = cache.get(info.getIp());
if(container == null) {
container = new SlaveMonitorInfoContainer();
old = cache.putIfAbsent(info.getIp(), container);
if(old != null) {
container = old;
}
}
try {
container.lock.lock();
if(container.infoList.size() == MAX_CACHE_SIZE) {
// 移除最老的一个
container.infoList.remove(0);
}
container.infoList.add(info);
} finally {
container.lock.unlock();
}
}
private void updateCache(String jobName, Collection<JobTaskExecuteInfo> infos) {
JobExecutionLogContainer old = null;
JobExecutionLogContainer container = null;
container = jobExecutionLogCache.get(jobName);
if(container == null) {
container = new JobExecutionLogContainer(jobName);
old = jobExecutionLogCache.putIfAbsent(jobName, container);
if(old != null) {
container = old;
}
}
try {
container.lock.lock();
JobTaskExecutionLog taskLog = null;
for(JobTaskExecuteInfo info : infos) {
taskLog = new JobTaskExecutionLog(jobName, info);
if(container.logList.size() == MAX_CACHE_SIZE) {
// 移除最老的一个
container.logList.remove(0);
}
jobTaskLogger.info(taskLog);
container.logList.add(taskLog);
container.jobExecutionLog.plus(taskLog);
}
} finally {
container.lock.unlock();
}
}
private List<Pic> draw(Map<String, List<SlaveMonitorInfo>> snapshot) {
String name = null;
String fileName = null;
List<Pic> picList = new ArrayList<Pic>();
// 绘制各个Slave的最近一分钟load走势
name = "最近一分钟load走势";
fileName = "load.jpg";
ChartUtil.drawLine(name, "时间", "Load", config
.getMonitorDocRoot()
+ File.separator + "images" + File.separator + fileName,
getMultiLineData(snapshot, 0));
picList.add(new Pic(name, "/images/" + fileName));
name = "JVM持有的内存总大小走势";
fileName = "jvmTotalMemory.jpg";
ChartUtil.drawLine(name, "时间", "JVM Total Memory", config
.getMonitorDocRoot()
+ File.separator
+ "images"
+ File.separator
+ fileName, getMultiLineData(snapshot, 1));
picList.add(new Pic(name, "/images/" + fileName));
name = "JVM空闲内存大小走势";
fileName = "jvmFreeMemory.jpg";
ChartUtil.drawLine(name, "时间", "JVM Free Memory", config
.getMonitorDocRoot()
+ File.separator
+ "images"
+ File.separator
+ fileName, getMultiLineData(snapshot, 2));
picList.add(new Pic(name, "/images/" + fileName));
name = "JVM可使用的最大内存大小走势";
fileName = "jvmMaxMemory.jpg";
ChartUtil.drawLine(name, "时间", "JVM Max Memory", config
.getMonitorDocRoot()
+ File.separator
+ "images"
+ File.separator
+ fileName, getMultiLineData(snapshot, 3));
picList.add(new Pic(name, "/images/" + fileName));
name = "JVM活着的总线程数走势";
fileName = "jvmThreadCount.jpg";
ChartUtil.drawLine(name, "时间", "JVM Live Thread Count",
config.getMonitorDocRoot() + File.separator + "images"
+ File.separator + fileName,
getMultiLineData(snapshot, 4));
picList.add(new Pic(name, "/images/" + fileName));
name = "自从 JVM启动或峰值重置以来峰值活动线程计数走势";
fileName = "jvmPeakThreadCount.jpg";
ChartUtil.drawLine(name, "时间",
"JVM Peak Thread Count", config.getMonitorDocRoot()
+ File.separator + "images" + File.separator
+ fileName, getMultiLineData(snapshot,
5));
picList.add(new Pic(name, "/images/" + fileName));
name = "Map key总数走势";
fileName = "mapKey.jpg";
ChartUtil.drawLine(name, "时间",
"Map Key", config.getMonitorDocRoot()
+ File.separator + "images" + File.separator
+ fileName, getMultiLineData(snapshot,
9));
picList.add(new Pic(name, "/images/" + fileName));
name = "Map value总数走势";
fileName = "mapValue.jpg";
ChartUtil.drawLine(name, "时间",
"Map Value", config.getMonitorDocRoot()
+ File.separator + "images" + File.separator
+ fileName, getMultiLineData(snapshot,
10));
picList.add(new Pic(name, "/images/" + fileName));
name = "处理数据总大小走势";
fileName = "consumeDataSize.jpg";
ChartUtil.drawLine(name, "时间",
"Consume Data Size", config.getMonitorDocRoot()
+ File.separator + "images" + File.separator
+ fileName, getMultiLineData(snapshot,
11));
picList.add(new Pic(name, "/images/" + fileName));
name = "处理数据总行数走势";
fileName = "consumeDataLine.jpg";
ChartUtil.drawLine(name, "时间",
"Consume Data Line", config.getMonitorDocRoot()
+ File.separator + "images" + File.separator
+ fileName, getMultiLineData(snapshot,
12));
picList.add(new Pic(name, "/images/" + fileName));
name = "处理空数据行数走势";
fileName = "consumeEmptyDataLine.jpg";
ChartUtil.drawLine(name, "时间",
"Consume Empty Data Line", config.getMonitorDocRoot()
+ File.separator + "images" + File.separator
+ fileName, getMultiLineData(snapshot,
13));
picList.add(new Pic(name, "/images/" + fileName));
name = "处理异常数据行数走势";
fileName = "consumeExceptionDataLine.jpg";
ChartUtil.drawLine(name, "时间",
"Consume Exception Data Line", config.getMonitorDocRoot()
+ File.separator + "images" + File.separator
+ fileName, getMultiLineData(snapshot,
14));
picList.add(new Pic(name, "/images/" + fileName));
name = "Slave尝试拉取任务次数趋势";
fileName = "slaveTryPullTaskCount.jpg";
ChartUtil.drawLine(name, "时间",
"Slave Try Pull Task Count", config.getMonitorDocRoot()
+ File.separator + "images" + File.separator
+ fileName, getMultiLineData(snapshot,
19));
picList.add(new Pic(name, "/images/" + fileName));
name = "Slave消耗在拉取任务的时间";
fileName = "slavePullTaskConsumeTime.jpg";
ChartUtil.drawLine(name, "时间",
"Slave Pull Task Consume Time", config.getMonitorDocRoot()
+ File.separator + "images" + File.separator
+ fileName, getMultiLineData(snapshot,
18));
picList.add(new Pic(name, "/images/" + fileName));
name = "Slave拉取任务总数";
fileName = "slavePulledTaskCount.jpg";
ChartUtil.drawLine(name, "时间",
"Slave Pulled Task Count", config.getMonitorDocRoot()
+ File.separator + "images" + File.separator
+ fileName, getMultiLineData(snapshot,
17));
picList.add(new Pic(name, "/images/" + fileName));
name = "Slave消耗在执行任务的时间";
fileName = "slaveExecuteTaskTime.jpg";
ChartUtil.drawLine(name, "时间",
"Slave Execute Task Consume Time", config.getMonitorDocRoot()
+ File.separator + "images" + File.separator
+ fileName, getMultiLineData(snapshot,
16));
picList.add(new Pic(name, "/images/" + fileName));
name = "Slave执行任务总数";
fileName = "slaveExecutedTaskCount.jpg";
ChartUtil.drawLine(name, "时间",
"Slave Executed Task Count", config.getMonitorDocRoot()
+ File.separator + "images" + File.separator
+ fileName, getMultiLineData(snapshot,
15));
picList.add(new Pic(name, "/images/" + fileName));
name = "一个任务平均消耗时间趋势";
fileName = "averageTaskConsumeTime.jpg";
ChartUtil.drawLine(name, "时间",
"Average Task Consume Time", config.getMonitorDocRoot()
+ File.separator + "images" + File.separator
+ fileName, getMultiLineData(snapshot,
8));
picList.add(new Pic(name, "/images/" + fileName));
name = "平均每次成功拉取任务个数趋势";
fileName = "averagePulledTaskCount.jpg";
ChartUtil.drawLine(name, "时间",
"Average Pulled Task Count", config.getMonitorDocRoot()
+ File.separator + "images" + File.separator
+ fileName, getMultiLineData(snapshot,
6));
picList.add(new Pic(name, "/images/" + fileName));
name = "平均每次拉取任务消耗时间趋势";
fileName = "averagePullTaskConsumeTime.jpg";
ChartUtil.drawLine(name, "时间",
"Average Pull Task Consume Time", config.getMonitorDocRoot()
+ File.separator + "images" + File.separator
+ fileName, getMultiLineData(snapshot,
7));
picList.add(new Pic(name, "/images/" + fileName));
return picList;
}
/**
*
* @param snapshot
* @param type
* @return
*/
private Map<String, List<LineEntry>> getMultiLineData(Map<String, List<SlaveMonitorInfo>> snapshot, int type) {
Map<String, List<LineEntry>> data = new HashMap<String, List<LineEntry>>();
for(Map.Entry<String, List<SlaveMonitorInfo>> entry : snapshot.entrySet()) {
List<LineEntry> line = new ArrayList<LineEntry>();
int time = 0;
for(SlaveMonitorInfo info : entry.getValue()) {
if(type == 0) {
line.add(new LineEntry(time++, info.getSystemLoadAverage()));
} else if(type == 1) {
line.add(new LineEntry(time++, info.getJvmTotalMemory()));
} else if(type == 2) {
line.add(new LineEntry(time++, info.getJvmFreeMemory()));
} else if(type == 3) {
line.add(new LineEntry(time++, info.getJvmMaxMemory()));
} else if(type == 4) {
line.add(new LineEntry(time++, info.getJvmThreadCount()));
} else if(type == 5) {
line.add(new LineEntry(time++, info.getJvmPeakThreadCount()));
} else if(type == 6) {
line.add(new LineEntry(time++, info.getAveragePulledTaskCount()));
} else if(type == 7) {
line.add(new LineEntry(time++, info.getAveragePullTaskConsumeTime()));
} else if(type == 8) {
line.add(new LineEntry(time++, info.getAverageTaskConsumeTime()));
} else if(type == 9) {
line.add(new LineEntry(time++, info.getKeyCount()));
} else if(type == 10) {
line.add(new LineEntry(time++, info.getValueCount()));
} else if(type == 11) {
line.add(new LineEntry(time++, info.getSlaveConsumeDataSize()));
} else if(type == 12) {
line.add(new LineEntry(time++, info.getSlaveConsumeDataLine()));
} else if(type == 13) {
line.add(new LineEntry(time++, info.getSlaveConsumeEmptyLine()));
} else if(type == 14) {
line.add(new LineEntry(time++, info.getSlaveConsumeExceptionLine()));
} else if(type == 15) {
line.add(new LineEntry(time++, info.getSlaveExecutedTaskCount()));
} else if(type == 16) {
line.add(new LineEntry(time++, info.getSlaveExecuteTaskTime()));
} else if(type == 17) {
line.add(new LineEntry(time++, info.getSlavePulledTaskCount()));
} else if(type == 18) {
line.add(new LineEntry(time++, info.getSlavePullTaskConsumeTime()));
} else if(type == 19) {
line.add(new LineEntry(time++, info.getSlaveTryPullTaskCount()));
}
}
data.put(entry.getKey(), line);
}
return data;
}
/**
*
* @return
*/
private String generteOutputDirectoryName(long timestamp, String type) {
Calendar calendar = Calendar.getInstance();
calendar.setTimeInMillis(timestamp);
String currentTime = new StringBuilder()
.append(calendar.get(Calendar.YEAR)).append("-")
.append(calendar.get(Calendar.MONTH) + 1).append("-")
.append(calendar.get(Calendar.DAY_OF_MONTH)).toString();
StringBuilder sb = new StringBuilder();
sb.append(config.getSystemMonitorInfoOutput())
.append(config.getSystemName())
.append(File.separator).append("period")
.append(File.separator).append(currentTime)
.append(File.separator).append(type)
.append(File.separator);
return sb.toString();
}
/**
* 导出Master的监控信息到报表
*/
private class ExportTask implements Runnable {
@Override
public void run() {
// 导出系统级别监控信息到period报表
exportSystemMonitorInfo();
// 导出Job级别监控信息到period报表
exportJobMonitorInfo();
// 导出JobTask级别监控信息到period报表
exportJobTaskMonitorInfo();
}
}
/**
* 导出系统级别的监控信息
*/
private void exportSystemMonitorInfo() {
long timestamp = System.currentTimeMillis();
File dir = new File(generteOutputDirectoryName(timestamp, "system"));
if (!dir.exists()) {
dir.mkdirs();
}
String fileName = String.format("%s%s%d%s", dir.getAbsolutePath(), File.separator, timestamp, ".csv");
BufferedWriter writer = null;
try {
writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fileName), "utf-8"));
// 写头
writer.write(SlaveMonitorInfo.title());
// 写内容
String key = null;
SlaveMonitorInfoContainer container = null;
for(Iterator<String> it = cache.keySet().iterator(); it.hasNext();) {
key = it.next();
container = cache.get(key);
if(container != null) {
try {
container.lock.lock();
for(SlaveMonitorInfo info : container.infoList) {
writer.newLine();
writer.write(info.toString());
}
container.infoList.clear();
} finally {
container.lock.unlock();
}
}
cache.remove(key);
}
writer.flush();
} catch (IOException e) {
logger.error("Export monitor info failed:", e);
} finally {
if(writer != null) {
try {
writer.close();
} catch (IOException e) {
logger.error("Export monitor info failed:", e);
}
}
}
}
/**
* 导出Job级别的监控信息
*/
private void exportJobMonitorInfo() {
long timestamp = System.currentTimeMillis();
File dir = new File(generteOutputDirectoryName(timestamp, "job"));
if (!dir.exists()) {
dir.mkdirs();
}
String fileName = String.format("%s%s%d%s", dir.getAbsolutePath(), File.separator, timestamp, ".csv");
dir = new File(generteOutputDirectoryName(timestamp, "jobTask"));
if (!dir.exists()) {
dir.mkdirs();
}
String fileName2 = String.format("%s%s%d%s", dir.getAbsolutePath(), File.separator, timestamp, ".csv");
BufferedWriter writer = null;
BufferedWriter writer2 = null;
try {
writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fileName), "utf-8"));
writer2 = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fileName2), "utf-8"));
// 写头
writer.write(JobExecutionLog.title());
writer2.write(JobTaskExecutionLog.title());
// 写内容
String key = null;
JobExecutionLogContainer container = null;
for(Iterator<String> it = jobExecutionLogCache.keySet().iterator(); it.hasNext();) {
key = it.next();
container = jobExecutionLogCache.get(key);
if(container != null) {
try {
container.lock.lock();
writer.newLine();
writer.write(container.jobExecutionLog.toString());
for(JobTaskExecutionLog info : container.logList) {
writer2.newLine();
writer2.write(info.toString());
}
container.logList.clear();
} finally {
container.lock.unlock();
}
}
jobExecutionLogCache.remove(key);
}
writer.flush();
writer2.flush();
} catch (IOException e) {
logger.error("Export monitor info failed:", e);
} finally {
if(writer != null) {
try {
writer.close();
} catch (IOException e) {
logger.error("Export monitor info failed:", e);
}
}
if(writer2 != null) {
try {
writer2.close();
} catch (IOException e) {
logger.error("Export monitor info failed:", e);
}
}
}
}
private void exportJobTaskMonitorInfo() {
}
private class SlaveMonitorInfoContainer {
public List<SlaveMonitorInfo> infoList = new ArrayList<SlaveMonitorInfo>();
public ReentrantLock lock = new ReentrantLock();
}
/**
*
*/
private class JobExecutionLogContainer {
public JobExecutionLog jobExecutionLog;
public List<JobTaskExecutionLog> logList;
public ReentrantLock lock;
public JobExecutionLogContainer(String jobName) {
jobExecutionLog = new JobExecutionLog(jobName);
logList = new ArrayList<JobTaskExecutionLog>();
lock = new ReentrantLock();
}
}
/**
*
*
*/
public static class Pic {
private String name;
private String url;
public Pic(String name, String url) {
this.name = name;
this.url = url;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getUrl() {
return url;
}
public void setUrl(String url) {
this.url = url;
}
}
}