/**
*
*/
package com.taobao.loganalyzer.common;
import java.io.IOException;
import java.util.Map;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordWriter;
import org.apache.hadoop.mapred.RunningJob;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;
import org.apache.hadoop.mapred.lib.MultipleOutputFormat;
import org.apache.hadoop.util.Progressable;
/**
* @author huangyue.pt
*
*/
public abstract class AbstractProcessor implements Processor {
/*
* (non-Javadoc)
*
* @see com.taobao.loganalyzer.common.Processor#run(java.lang.String,
* java.lang.String, int, int, boolean, java.util.Map)
*/
@SuppressWarnings("unchecked")
@Override
public boolean run(String inputPath, String outputPath, int numMap,
int numReduce, boolean isInputSequenceFile,
Map<String, String> properties) throws Exception {
// TODO Auto-generated method stub
JobConf conf = new JobConf(this.getClass());
System.out.println(">>>" + this.getClass().getCanonicalName());
conf.setJobName(this.getClass().getSimpleName());
FileInputFormat.setInputPaths(conf, inputPath);
FileOutputFormat.setOutputPath(conf, new Path(outputPath));
if (isInputSequenceFile)
conf.setInputFormat(SequenceFileInputFormat.class);
else
conf.setInputFormat(TextInputFormat.class);
conf.setMapperClass(getMapper());
conf.setNumMapTasks(numMap);
conf.setReducerClass(getReducer());
conf.setNumReduceTasks(numReduce);
if ("true".equals(conf.get("map.out.compress"))) {
SequenceFileOutputFormat.setOutputCompressionType(conf,
SequenceFile.CompressionType.BLOCK);
SequenceFileOutputFormat.setOutputCompressorClass(conf,
GzipCodec.class);
}
if (conf.get("mapred.max.track.failures") != null) {
conf.setMaxTaskFailuresPerTracker(Integer.valueOf(conf
.get("mapred.max.track.failures")));
}
for (String propertyKey : properties.keySet())
conf.set(propertyKey, properties.get(propertyKey));
configJob(conf);
JobClient c = new JobClient(conf);
RunningJob job = c.runJob(conf);
return job.isSuccessful();
}
protected abstract void configJob(JobConf conf);
public static class ReportOutFormat<K extends WritableComparable<?>, V extends Writable>
extends MultipleOutputFormat<K, V> {
private TextOutputFormat<K, V> theTextOutputFormat = null;
protected RecordWriter<K, V> getBaseRecordWriter(FileSystem fs,
JobConf job, String name, Progressable arg3) throws IOException {
if (theTextOutputFormat == null) {
theTextOutputFormat = new TextOutputFormat<K, V>();
}
return theTextOutputFormat.getRecordWriter(fs, job, name, arg3);
}
protected String generateFileNameForKeyValue(K key, V value, String name) {
String tag = "";
if (key.toString().split("\u0001").length > 0) {
tag = key.toString().split("\u0001")[0];
}
for (int counter = 0; counter < tag.length(); counter++) {
char c = tag.charAt(counter);
if (!(('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z')
|| ('0' <= c && c <= '9') || c == '_'))
break;
}
return name + "_" + tag;
}
}
}