/*
* Copyright 2012 LinkedIn, Inc
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package com.linkedin.whiteelephant.analysis;
import java.io.IOException;
import java.util.Calendar;
import java.util.List;
import java.util.ArrayList;
import java.util.Properties;
import java.util.TimeZone;
import org.apache.avro.Schema;
import org.apache.avro.Schema.Type;
import org.apache.avro.mapred.AvroKey;
import org.apache.avro.mapred.AvroValue;
import org.apache.avro.mapred.AvroWrapper;
import org.apache.avro.mapreduce.AvroJob;
import org.apache.avro.mapreduce.AvroKeyValueInputFormat;
import org.apache.avro.mapreduce.AvroKeyValueOutputFormat;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.log4j.Logger;
import com.linkedin.whiteelephant.analysis.AttemptStatsKey;
import com.linkedin.whiteelephant.analysis.AttemptStatsValue;
import com.linkedin.whiteelephant.analysis.TaskStatus;
import com.linkedin.whiteelephant.analysis.TaskType;
import com.linkedin.whiteelephant.parsing.LogData;
import com.linkedin.whiteelephant.mapreduce.lib.job.StagedOutputJob;
import com.linkedin.whiteelephant.mapreduce.lib.job.StagedOutputJobExecutor;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
public class ComputeUsagePerHour
{
private static final String REDUCE_SHUFFLE_BYTES = "REDUCE_SHUFFLE_BYTES";
private static final String CPU_MILLISECONDS = "CPU_MILLISECONDS";
private static final String SPILLED_RECORDS = "SPILLED_RECORDS";
private final Logger _log;
private final FileSystem _fs;
private final Properties _props;
private final String _name;
private final static TimeZone timeZone = TimeZone.getTimeZone("GMT");
private final String _jobsOutputPathRoot;
private final String _usageOutputPathRoot;
private final boolean _incremental;
private final int _numDaysForced;
public ComputeUsagePerHour(String name, Properties props) throws IOException
{
_log = Logger.getLogger(name);
_name = name;
_props = props;
_fs = FileSystem.get(StagedOutputJob.createConfigurationFromProps(_props));
if (_props.get("jobs.output.path") == null) {
throw new IllegalArgumentException("joined.output.path is not specified.");
}
if (_props.get("usage.output.path") == null) {
throw new IllegalArgumentException("usage.output.path is not specified.");
}
if (_props.get("num.days.forced") == null) {
throw new IllegalArgumentException("num.days.forced is not specified");
}
_usageOutputPathRoot = (String)_props.get("usage.output.path");
_jobsOutputPathRoot = (String)_props.get("jobs.output.path");
_incremental = Boolean.parseBoolean((String)_props.get("incremental"));
_numDaysForced = Integer.parseInt((String)_props.get("num.days.forced"));
}
public void execute(StagedOutputJobExecutor executor) throws IOException, InterruptedException, ExecutionException
{
FileStatus[] clusterPaths = _fs.listStatus(new Path(_jobsOutputPathRoot));
for (FileStatus clusterPath : clusterPaths)
{
String clusterName = clusterPath.getPath().getName();
FileStatus[] yearPaths = _fs.listStatus(clusterPath.getPath());
for (FileStatus yearPath : yearPaths)
{
String year = yearPath.getPath().getName();
System.out.println("Searching under " + yearPath.getPath());
FileStatus[] dayPaths = _fs.listStatus(yearPath.getPath());
for (FileStatus dayPath : dayPaths)
{
String day = dayPath.getPath().getName();
String pattern = dayPath.getPath().toString() + "/*.avro";
String outputPathForDay = String.format("%s/%s/%s/%s",_usageOutputPathRoot,clusterName,year,day);
FileStatus[] inputFiles = _fs.globStatus(new Path(pattern));
StringBuilder msg = new StringBuilder(pattern + " => " + inputFiles.length + " files");
if (inputFiles.length > 0)
{
Calendar cal = Calendar.getInstance(timeZone);
long nowMillis = cal.getTimeInMillis();
cal.set(Integer.parseInt(year), Integer.parseInt(day.substring(0, 2)) - 1, Integer.parseInt(day.substring(2, 4)));
long thenMillis = cal.getTimeInMillis();
double elapsedDays = Math.max(0.0, ((double)(nowMillis - thenMillis))/(24*3600*1000));
if (!_incremental || !_fs.exists(new Path(outputPathForDay)) || elapsedDays < _numDaysForced)
{
long totalLength = 0;
for (FileStatus stat : inputFiles)
{
totalLength += stat.getLen();
}
msg.append(String.format(", %s MB",totalLength/1024/1024));
System.out.println(msg);
// one reducer per 1 GB
int numReducers = (int)Math.ceil(((double)totalLength)/1024/1024/1024);
submitJob(executor, pattern, outputPathForDay, clusterName, year, day, numReducers);
}
else if (_incremental && _fs.exists(new Path(outputPathForDay)))
{
msg.append(" (skipping)");
System.out.println(msg);
}
}
}
executor.waitForCompletion();
}
}
}
private void submitJob(StagedOutputJobExecutor executor, String inputPattern, String output, String clusterName, String year, String day, int numReducers)
{
List<String> inputPaths = new ArrayList<String>();
inputPaths.add(inputPattern);
final StagedOutputJob job = StagedOutputJob.createStagedJob(
_props,
_name + "-" + "usage-per-hour-" + clusterName + "-" + year + "-" + day,
inputPaths,
"/tmp" + output,
output,
_log);
final Configuration conf = job.getConfiguration();
conf.set("cluster.name", clusterName);
job.setOutputKeyClass(BytesWritable.class);
job.setOutputValueClass(BytesWritable.class);
job.setInputFormatClass(AvroKeyValueInputFormat.class);
job.setOutputFormatClass(AvroKeyValueOutputFormat.class);
AvroJob.setInputKeySchema(job, Schema.create(Type.STRING));
AvroJob.setInputValueSchema(job, LogData.SCHEMA$);
AvroJob.setMapOutputKeySchema(job, AttemptStatsKey.SCHEMA$);
AvroJob.setMapOutputValueSchema(job, AttemptStatsValue.SCHEMA$);
AvroJob.setOutputKeySchema(job, AttemptStatsKey.SCHEMA$);
AvroJob.setOutputValueSchema(job, AttemptStatsValue.SCHEMA$);
job.setNumReduceTasks(numReducers);
job.setMapperClass(ComputeUsagePerHour.TheMapper.class);
job.setReducerClass(ComputeUsagePerHour.TheReducer.class);
executor.submit(job);
}
public static class TheMapper extends Mapper<AvroKey<String>, AvroValue<LogData>, AvroWrapper<AttemptStatsKey>, AvroWrapper<AttemptStatsValue>>
{
private String clusterName;
@Override
protected void setup(Context context) throws IOException, InterruptedException {
System.out.println("Setting up reducer");
Configuration conf = context.getConfiguration();
clusterName = conf.get("cluster.name");
System.out.println("Got cluster " + clusterName);
super.setup(context);
}
@Override
protected void map(AvroKey<String> key, AvroValue<LogData> value, Context context) throws java.io.IOException, java.lang.InterruptedException
{
LogData data = value.datum();
if (data.getEntry() != null && data.getEntry() instanceof com.linkedin.whiteelephant.parsing.Job)
{
com.linkedin.whiteelephant.parsing.Job job = (com.linkedin.whiteelephant.parsing.Job)data.getEntry();
for (com.linkedin.whiteelephant.parsing.Task task : job.getTasks())
{
for (com.linkedin.whiteelephant.parsing.Attempt attempt : task.getAttempts())
{
if (attempt.getTaskStatus() == null)
{
System.out.println("Status null for job " + attempt.getJobId() + " attempt " + attempt.getTaskAttemptId());
context.getCounter("Job Analysis", "Status null").increment(1);
continue;
}
else
{
context.getCounter("Job Analysis", "Status " + attempt.getTaskStatus()).increment(1);
}
if (attempt.getStartTime() == null || attempt.getFinishTime() == null)
{
System.out.println("Missing either startTime or finishTime");
context.getCounter("Job Analysis", "Missing start or finish").increment(1);
continue;
}
if (!(attempt.getStartTime() > 0 && attempt.getFinishTime() > 0))
{
System.out.println("Either startTime or finishTime is not positive");
context.getCounter("Job Analysis", "Not positive start or finish").increment(1);
continue;
}
AttemptStatsKey statsKey = new AttemptStatsKey();
statsKey.setCluster(clusterName);
statsKey.setExcess(((com.linkedin.whiteelephant.parsing.DerivedAttemptData)attempt.getDerived()).getExcess());
statsKey.setStatus(TaskStatus.valueOf(attempt.getTaskStatus().toString()));
statsKey.setType(TaskType.valueOf(attempt.getType().toString().toUpperCase()));
statsKey.setUser(job.getUser());
writeStats(statsKey, attempt, context);
}
}
}
}
private void writeStats(AttemptStatsKey key, com.linkedin.whiteelephant.parsing.Attempt attempt, Context context) throws IOException, InterruptedException
{
Long start = attempt.getStartTime();
Long end = attempt.getFinishTime();
if (end < start)
{
throw new RuntimeException(String.format("finishTime %s is less than startTime %s", end, start));
}
TimeUnit unit = TimeUnit.HOURS;
Long currentTime = start;
key.setUnit(com.linkedin.whiteelephant.analysis.TimeUnit.HOURS);
while (currentTime < end)
{
Calendar currentUnitStart = getCalendarForTime(unit, currentTime);
Calendar currentUnitEnd = getCalendarForTime(unit, currentTime);
if (unit == TimeUnit.HOURS)
{
currentUnitEnd.add(Calendar.HOUR, 1);
}
else
{
throw new RuntimeException("Unsupported time unit: " + unit);
}
long nextMillis = Math.min(currentUnitEnd.getTimeInMillis(),end);
double percentOfTotal = (nextMillis - currentTime)/((double)(end - start));
AttemptStatsValue value = new AttemptStatsValue();
value.setElapsedMinutes((nextMillis - currentTime)/1000.0/60.0);
if (attempt.getCounters().get(CPU_MILLISECONDS) != null)
{
value.setCpuMinutes(percentOfTotal * attempt.getCounters().get(CPU_MILLISECONDS)/1000.0/60.0);
}
if (attempt.getCounters().get(SPILLED_RECORDS) != null)
{
value.setSpilledRecords((long)(percentOfTotal * attempt.getCounters().get(SPILLED_RECORDS)));
}
if (attempt.getCounters().get(REDUCE_SHUFFLE_BYTES) != null)
{
value.setReduceShuffleBytes(attempt.getCounters().get(REDUCE_SHUFFLE_BYTES));
}
key.setTime(currentUnitStart.getTimeInMillis());
if ((key.getTime() + unit.toMillis(1)) >= start && start >= key.getTime())
{
value.setStarted(1);
}
if ((key.getTime() + unit.toMillis(1)) >= end && end >= key.getTime())
{
value.setFinished(1);
}
currentTime = nextMillis;
context.write(new AvroKey<AttemptStatsKey>(key), new AvroValue<AttemptStatsValue>(value));
}
}
private static Calendar getCalendarForTime(TimeUnit unit, Long time)
{
Calendar cal = Calendar.getInstance(timeZone);
cal.setTimeInMillis(time);
if (unit == TimeUnit.HOURS)
{
int dstOffset = cal.get(Calendar.DST_OFFSET);
// zero these out so we can advance to the next boundary simply by adding an hour
cal.set(Calendar.MINUTE, 0);
cal.set(Calendar.SECOND, 0);
cal.set(Calendar.MILLISECOND, 0);
// reset the DST offset, since setting above fields to 0 for some reason alters the value
cal.set(Calendar.DST_OFFSET, dstOffset);
}
else if (unit == TimeUnit.DAYS)
{
int dstOffset = cal.get(Calendar.DST_OFFSET);
// zero these out so we can advance to the next boundary simply by adding a day
cal.set(Calendar.HOUR_OF_DAY, 0);
cal.set(Calendar.MINUTE, 0);
cal.set(Calendar.SECOND, 0);
cal.set(Calendar.MILLISECOND, 0);
// reset the DST offset, since setting above fields to 0 for some reason alters the value
cal.set(Calendar.DST_OFFSET, dstOffset);
}
else
{
throw new RuntimeException("Unsupported time unit: " + unit);
}
return cal;
}
}
public static class TheReducer extends Reducer<AvroKey<AttemptStatsKey>, AvroValue<AttemptStatsValue>, AvroWrapper<AttemptStatsKey>, AvroWrapper<AttemptStatsValue>>
{
private String clusterName;
@Override
protected void setup(Context context) throws IOException, InterruptedException {
System.out.println("Setting up reducer");
Configuration conf = context.getConfiguration();
clusterName = conf.get("cluster.name");
System.out.println("Got cluster " + clusterName);
super.setup(context);
}
@Override
protected void reduce(AvroKey<AttemptStatsKey> key, Iterable<AvroValue<AttemptStatsValue>> values, final Context context) throws IOException, InterruptedException
{
AttemptStatsValue merged = new AttemptStatsValue();
merged.setElapsedMinutes(0.0);
merged.setStarted(0);
merged.setFinished(0);
for (AvroValue<AttemptStatsValue> valueWrapped : values)
{
AttemptStatsValue value = valueWrapped.datum();
merged.setElapsedMinutes(value.getElapsedMinutes() + merged.getElapsedMinutes());
merged.setStarted(value.getStarted() + merged.getStarted());
merged.setFinished(value.getFinished() + merged.getFinished());
if (value.getCpuMinutes() != null)
{
if (merged.getCpuMinutes() == null)
{
merged.setCpuMinutes(value.getCpuMinutes());
}
else
{
merged.setCpuMinutes(merged.getCpuMinutes() + value.getCpuMinutes());
}
}
if (value.getSpilledRecords() != null)
{
if (merged.getSpilledRecords() == null)
{
merged.setSpilledRecords(value.getSpilledRecords());
}
else
{
merged.setSpilledRecords(merged.getSpilledRecords() + value.getSpilledRecords());
}
}
if (value.getReduceShuffleBytes() != null)
{
if (merged.getReduceShuffleBytes() == null)
{
merged.setReduceShuffleBytes(value.getReduceShuffleBytes());
}
else
{
merged.setReduceShuffleBytes(merged.getReduceShuffleBytes() + value.getReduceShuffleBytes());
}
}
}
context.write(key, new AvroWrapper<AttemptStatsValue>(merged));
}
}
}