/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package hitune.analysis.mapreduce.processor;
import hitune.analysis.mapreduce.AnalysisProcessorConfiguration;
import hitune.analysis.mapreduce.CSVFileOutputFormat;
import hitune.analysis.mapreduce.processor.FileFilter.FileFilter;
import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Iterator;
import java.util.List;
import org.apache.hadoop.chukwa.extraction.engine.ChukwaRecord;
import org.apache.hadoop.chukwa.extraction.engine.ChukwaRecordKey;
import org.apache.hadoop.chukwa.extraction.engine.Record;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputCommitter;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.lib.IdentityMapper;
import org.apache.hadoop.mapred.lib.IdentityReducer;
import org.apache.hadoop.mapred.lib.NLineInputFormat;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.log4j.Logger;
/**
* The analysis processing thread, which will invoke a Map/Reduce job to do a specific analysis job.
*
*/
public abstract class AnalysisProcessor implements Runnable {
static Logger log = Logger.getLogger(AnalysisProcessor.class);
private Thread thread = null;
String source = "";
Configuration conf = null;
static final String SEPERATOR_COMMA = ",";
static final long DAY_IN_SECONDS = 24 * 3600;
static SimpleDateFormat day = new java.text.SimpleDateFormat("yyyyMMdd");
protected boolean MOVE_DONE = false;
String inputfiles = null;
/**
* Temp report folder to store the reports before all analysis jobs are done.
*/
static final String REPORT_TMP = "_TMP";
/**
*
*/
public AnalysisProcessor(Configuration conf) {
// TODO Auto-generated constructor stub
this.conf = conf;
log.debug(this.conf.get("tmpjars"));
thread = new Thread(this);
//To create report folder
GenReportHome();
}
private void GenReportHome(){
try {
FileSystem fs = FileSystem.get(this.conf);
Path reportfolder = new Path(this.conf.get(AnalysisProcessorConfiguration.reportfolder));
if(!fs.exists(reportfolder)) fs.mkdirs(reportfolder);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
log.error("Cannot create report folder");
}
}
protected String getTempOutputDir(String outputfolder){
return outputfolder + REPORT_TMP + "/" + getOutputFileName();
}
public String getOutputFileName(){
return this.conf.get(AnalysisProcessorConfiguration.outputfilename);
}
static protected List<String> String2List(String src, String seperator){
List<String> results = null;
if(src == null || src.equals("") || src.equals("null") || src.equals("*")){
return results;
}
else {
results = new ArrayList<String>();
if(src.indexOf(seperator)!=-1){
for (String item : src.split(seperator)){
results.add(item);
}
}
else {
results.add(src);
}
}
return results;
}
static protected String List2String(List<String> list, String seperator){
StringBuilder result = new StringBuilder();
if(list==null || list.size()<=0){
return "";
}
for(String item : list){
result.append(item).append(seperator);
}
return result.toString().substring(0, result.length()+ 0-seperator.length());
}
public void start(){
if(!init()){
log.error("AnalyzerProcessor: " + this.getClass().getSimpleName() + "'s output: " + getOutputFileName() + " intializing failed");
}
if(thread!=null ){
thread.start();
log.info("AnalyzerProcessor: " + this.getClass().getSimpleName() + "'s output: " + getOutputFileName() + " started...");
}
}
public void join() throws InterruptedException{
if(thread!=null){
thread.join();
if(getStatus()){
log.info("AnalyzerProcessor: " + this.getClass().getSimpleName() + "'s output: " + getOutputFileName() + " SUCCESS!");
}
else {
log.info("AnalyzerProcessor: " + this.getClass().getSimpleName() + "'s output: " + getOutputFileName() + " FAILED!");
}
}else{
log.info("AnalyzerProcessor: " + this.getClass().getSimpleName() + "'s output: " + getOutputFileName() + " FAILED!");
}
}
public boolean getStatus(){
return MOVE_DONE;
}
/**
* Merge the output file into one, and only emit the header(field name) once.
*
* @param <K>
* @param <V>
*/
public static class NullKeyIdentityReducer<K, V> extends MapReduceBase implements Reducer<K, V, K, V> {
static boolean isHeader = true;
public void reduce(K key, Iterator<V> values,
OutputCollector<K, V> output, Reporter reporter)
throws IOException {
while (values.hasNext()) {
output.collect(null, values.next());
if(isHeader){
isHeader = false;
break;
}
}
}
}
/**
* Merge multiple output file into one file and only emit the header of csv once.
*
*
*/
class MergeOutput extends Configured implements Tool {
Configuration configure = null;
public MergeOutput(Configuration conf){
this.configure=conf;
}
@Override
public int run(String[] args) throws Exception {
// TODO Auto-generated method stub
JobConf conf = new JobConf(this.configure, AnalysisProcessor.class);
conf.setJobName("MergeOutputFile");
conf.setInputFormat(TextInputFormat.class);
conf.setMapperClass(IdentityMapper.class);
conf.setReducerClass(NullKeyIdentityReducer.class);
conf.setMapOutputKeyClass(LongWritable.class);
conf.setMapOutputValueClass(Text.class);
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(Text.class);
conf.setOutputFormat(CSVFileOutputFormat.class);
conf.setNumReduceTasks(1);
FileInputFormat.setInputPaths(conf, args[0]);
FileOutputFormat.setOutputPath(conf, new Path(args[1]));
JobClient.runJob(conf);
return 0;
}
}
/**
* Move the TEMP output folder to final one(user defined one);
* If there are multiple files under one job's output folder, it should merge the output into one file.
* Then rename the folder to the final one.
* @param job
* @param output
* @param result
*/
protected void moveResults(JobConf job,String output, String result){
try {
FileSystem fs = FileSystem.get(job);
log.debug("move results: " +result);
Path src = new Path(result+"/"+"*.csv*");
Path dst = new Path(output);
if(!fs.exists(dst)){
fs.mkdirs(dst);
}
FileStatus[] matches = fs.globStatus(src, new PathFilter(){
@Override
public boolean accept(Path path) {
// TODO Auto-generated method stub
return true;
}});
if(matches!=null && matches.length!=0){
if(matches.length > 1){
//multiple output files
String []args = new String[2];
args[0]= result;
args[1]= "_"+ result;
fs.delete(new Path("_"+ result));
//merge multiple output files into one file
ToolRunner.run(new MergeOutput(this.conf), args);
fs.delete(new Path(result));
fs.rename(new Path("_"+ result), new Path(result));
}
matches = fs.globStatus(src,new PathFilter(){
@Override
public boolean accept(Path path) {
// TODO Auto-generated method stub
return true;
}});
for(FileStatus file : matches){
String filename = file.getPath().getName();
filename = filename.substring(0,filename.indexOf("-"));
log.debug("move file:" + filename);
Path toFile = new Path(output+"/"+filename);
if(fs.exists(toFile)){
fs.delete(toFile);
}
fs.rename(file.getPath(), toFile);
fs.delete(file.getPath().getParent(), true);
FileStatus[] tmpDirs = fs.listStatus(file.getPath().getParent().getParent());
if(tmpDirs == null || tmpDirs.length == 0){
fs.delete(file.getPath().getParent().getParent(), true);
}
break;
}
}
else{
MOVE_DONE = false;
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
MOVE_DONE = false;
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
MOVE_DONE = true;
}
static protected boolean isMatched(List filterlist, String target){
return (filterlist==null || filterlist.isEmpty() || filterlist.contains(target));
}
/* (non-Javadoc)
* @see java.lang.Runnable#run()
*/
@Override
abstract public void run();
public boolean init(){
return parsingInputPath();
}
protected boolean parsingInputPath(){
if(conf!=null){
String filterclass = conf.get(AnalysisProcessorConfiguration.filefilter);
if(filterclass ==null || filterclass.equals("")){
filterclass = "hitune.analysis.mapreduce.processor.FileFilter.DefaultFileFilter";
}
String [] paths = conf.getStrings(AnalysisProcessorConfiguration.datasource);
String pattern = conf.get(AnalysisProcessorConfiguration.filefilter_pattern, null);
StringBuilder str = new StringBuilder();
for(String path : paths){
log.debug("path to scan: " + path);
FileFilter filter = null;
try {
filter = (FileFilter)Class.forName(filterclass).getConstructor(new Class[] { Configuration.class, String.class }).newInstance(new Object[] {conf, pattern});
if(str.length()!=0){
str.append(FileFilter.SEPARATOR);
}
str.append(filter.filter(new Path(path)));
} catch (IllegalArgumentException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (SecurityException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (InstantiationException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IllegalAccessException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (InvocationTargetException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (NoSuchMethodException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (ClassNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
if(str == null || str.equals("") || str.length() == 0){
log.error("No input file is met the filtering requirments");
return false;
}
else{
inputfiles = str.toString();
return true;
}
}
else{
return false;
}
}
/**
* @param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
}
}