/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package hitune.analysis.mapreduce.processor;
import hitune.analysis.mapreduce.AnalysisProcessorConfiguration;
import hitune.analysis.mapreduce.CSVFileOutputFormat;
import hitune.analysis.mapreduce.HiTuneKey;
import hitune.analysis.mapreduce.HiTuneRecord;
import hitune.analysis.mapreduce.MultiSequenceFileInputFormat;
import hitune.analysis.mapreduce.TextArrayWritable;
import java.io.IOException;
import java.io.StringBufferInputStream;
import java.lang.reflect.InvocationTargetException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.apache.hadoop.chukwa.extraction.engine.ChukwaRecord;
import org.apache.hadoop.chukwa.extraction.engine.ChukwaRecordKey;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.hadoop.mapred.lib.IdentityMapper;
import org.apache.hadoop.record.Record;
import org.apache.log4j.Logger;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
/**
*
*
*/
public class InstrumentSamplingTop extends AnalysisProcessor {
static Logger log = Logger.getLogger(InstrumentSamplingTop.class);
public static class MapClass<K extends Record, V extends Record> extends MapReduceBase implements
Mapper<K, V, K, V>{
JobConf conf = null;
List <String> nodelist = new ArrayList<String>();
Map <String,List<String>> phases = new HashMap<String,List<String>>();
Map <String,String> phasealias = new HashMap<String,String>();
List <String> statuslist = new ArrayList<String>();
public void configure(JobConf jobConf) {
super.configure(jobConf);
this.conf = jobConf;
init();
}
void parsePhase(){
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
// Now use the factory to create a DOM parser (a.k.a. a DocumentBuilder)
DocumentBuilder parser;
try {
parser = factory.newDocumentBuilder();
// Parse the file and build a Document tree to represent its content
Document document = parser.parse(new StringBufferInputStream("<root>"+conf.get("phases") + "</root>"));
// Ask the document for a list of all phases
NodeList rows = document.getElementsByTagName(AnalysisProcessorConfiguration.phase);
int phasenumber = rows.getLength();
for (int i = 0; i< phasenumber; i++){
Node phase = rows.item( i );
NodeList fields = phase.getChildNodes();
String phasename = null;
String stacks = null;
String funcs = null;
List<String> functionlist = new ArrayList<String>();
for (int j = 0; j < fields.getLength(); j++) {
Node fieldNode = fields.item(j);
if (!(fieldNode instanceof Element))
continue;
Element field = (Element)fieldNode;
if ("phasename".equals(field.getTagName()) && field.hasChildNodes())
phasename = ((org.w3c.dom.Text)field.getFirstChild()).getData().trim();
else if ("stack".equals(field.getTagName()) && field.hasChildNodes())
stacks = ((org.w3c.dom.Text)field.getFirstChild()).getData();
else if ("functions".equals(field.getTagName()) && field.hasChildNodes())
funcs = ((org.w3c.dom.Text)field.getFirstChild()).getData();
}
if(stacks!=null && stacks.length()!=0) stacks = stacks.replace(" ", "");
else stacks="";
phasealias.put(stacks, phasename);
if(funcs == null){
continue;
}
for(String func: funcs.split(SEPERATOR_COMMA)){
functionlist.add(func);
}
this.phases.put(stacks, functionlist);
}
} catch (ParserConfigurationException e) {
// TODO Auto-generated catch block
e.printStackTrace();
log.warn(e);
} catch (SAXException e) {
// TODO Auto-generated catch block
e.printStackTrace();
log.warn(e);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
log.warn(e);
}
}
private void init(){
String nodes = conf.get(AnalysisProcessorConfiguration.nodes);
this.nodelist = String2List(nodes, SEPERATOR_COMMA);
String status = conf.get("status");
this.statuslist = String2List(status, SEPERATOR_COMMA);
parsePhase();
}
private String getFuncPattern(String dest, List<String> patternList){
String result = dest;
if(dest==null ){
return "";
}
if (patternList!=null && patternList.size()>0){
for(String pattern : patternList){
Pattern p = Pattern.compile(pattern);
Matcher matcher = p.matcher(dest);
if(matcher.find()){
result=pattern;
}
}
}
return result;
}
@Override
public void map(K key, V value,
OutputCollector<K, V> output,
Reporter reporter) throws IOException {
// TODO Auto-generated method stub
//doing the filter
//<key,value>
//<[AttemptID/PhaseAlias/ThreadName/ThreadId/Func], [Callee,isLast]>
HiTuneRecord valproxy = new HiTuneRecord(value);
HiTuneKey keyproxy = new HiTuneKey(key);
String hostname = valproxy.getHost();
String status = valproxy.getValue("ThreadState");
String stack = valproxy.getValue("CallStack");
if(stack !=null && stack.length()!=0)stack = stack.replace(" ", "");
else stack = "";
String attemptID = valproxy.getValue("TaskID");
log.debug("hostname:" + hostname + " ThreadState:" + status + " stack:" + stack + " attemptID:" + attemptID);
if(isMatched(this.nodelist,hostname)){
if(isMatched(this.statuslist, status)){
for(String s : phasealias.keySet()){
log.debug("phasealias:" +s);
String phase_name = phasealias.get(s);
if(s==null || s.length()==0)s="";
Pattern p = Pattern.compile(s);
if(stack!=null && stack.length()!=0)stack=stack.replace(" ", "");
else stack="";
Matcher matcher = p.matcher(stack);
if(matcher.find()){
String thread_id = valproxy.getValue("ThreadID");
String thread_name = valproxy.getValue("ThreadName");
try {
K newkey = (K) key.getClass().getConstructor().newInstance();
V newvalue = (V) value.getClass().getConstructor().newInstance();
HiTuneRecord newvalproxy = new HiTuneRecord(newvalue);
HiTuneKey newkeyproxy = new HiTuneKey(newkey);
String[] fcs = stack.split("#");
String[] funcs = new String [fcs.length+2];
funcs[0]="_PHASE_";
funcs[1]=getFuncPattern(stack, this.phases.get(s));
System.arraycopy(fcs,0,funcs,2,fcs.length);
for (int i =0; i< funcs.length; i++){
newkeyproxy.setKey(attemptID + "/" + phase_name + "/" + thread_name + "/" + thread_id + "/" + funcs[i]);
newkeyproxy.setDataType(keyproxy.getDataType());
newvalproxy.copyCommonFields(value);
newvalproxy.add("func", funcs[i]);
newvalproxy.add("thread_id", thread_id);
newvalproxy.add("thread_name", thread_name);
newvalproxy.add("phase_name",phase_name );
newvalproxy.add("phase_stack", s);
newvalproxy.add("attempt_id", attemptID);
newvalproxy.add("Callee", "1");
if(i==2){
newvalproxy.add("isLast", "1");
}else{
newvalproxy.add("isLast", "0");
}
output.collect((K)newkeyproxy.getObject(), (V)newvalproxy.getObject());
}
} catch (IllegalArgumentException e) {
// TODO Auto-generated catch block
e.printStackTrace();
log.warn(e);
} catch (SecurityException e) {
// TODO Auto-generated catch block
e.printStackTrace();
log.warn(e);
} catch (InstantiationException e) {
// TODO Auto-generated catch block
e.printStackTrace();
log.warn(e);
} catch (IllegalAccessException e) {
// TODO Auto-generated catch block
e.printStackTrace();
log.warn(e);
} catch (InvocationTargetException e) {
// TODO Auto-generated catch block
e.printStackTrace();
log.warn(e);
} catch (NoSuchMethodException e) {
// TODO Auto-generated catch block
e.printStackTrace();
log.warn(e);
}
}
}
}
}
}
}
/**
* Calculate each phase's statistics:
* 1. choose minimum start time as the start time.
* 2. choose maximum end time as the end time.
* 3. sum the sampling count with certain status.
* 4. sum the function sampling count
* 5. sum the phase's count
* The analyzer won't tell that if the phase is continuous or not in the time sequence.
*/
public static class ReduceClass<K extends Record, V extends Record> extends MapReduceBase implements
Reducer<K, V, K, V>{
static boolean initialized = false;
@Override
public void reduce(K key, Iterator<V> values,
OutputCollector<K, V> output, Reporter reporter)
throws IOException {
// TODO Auto-generated method stub
//key: <[AttemptID/PhaseAlias/ThreadName/ThreadId/func]>
long callee_num = 0;
long lastlevel_callee_num = 0;
try{
V val = null;
HiTuneRecord valproxy = null;
K newkey = (K) key.getClass().getConstructor().newInstance();
HiTuneKey newkeyproxy = new HiTuneKey(newkey);
while(values.hasNext()){
val = (V)values.next();
valproxy = new HiTuneRecord(val);
callee_num += Integer.parseInt(valproxy.getValue("Callee"));
lastlevel_callee_num += Integer.parseInt(valproxy.getValue("isLast"));
}
V newvalue = (V) val.getClass().getConstructor().newInstance();
HiTuneRecord newvalproxy = new HiTuneRecord(newvalue);
newvalproxy.copyCommonFields(val);
newvalproxy.add("callee_num", ""+callee_num);
newvalproxy.add("last_level_callee_num", ""+lastlevel_callee_num );
newvalproxy.add("attempt_id",valproxy.getValue("attempt_id") );
newvalproxy.add("phase_name",valproxy.getValue("phase_name") );
newvalproxy.add("phase_stack", valproxy.getValue("phase_stack") );
newvalproxy.add("thread_name",valproxy.getValue("thread_name") );
newvalproxy.add("thread_id",valproxy.getValue("thread_id") );
newvalproxy.add("host", valproxy.getHost());
newvalproxy.add("func", valproxy.getValue("func") );
newkeyproxy.setKey(valproxy.getValue("attempt_id")
+ "/" + valproxy.getValue("phase_name")
+ "/" + valproxy.getValue("thread_name")
+ "/" + valproxy.getValue("thread_id"));
newkeyproxy.setDataType(new HiTuneKey(key).getDataType());
output.collect((K)newkeyproxy.getObject(), (V)newvalproxy.getObject());
} catch (IllegalArgumentException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (SecurityException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (InstantiationException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IllegalAccessException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (InvocationTargetException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (NoSuchMethodException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
public static class TopClass<K extends Record, V extends Record> extends MapReduceBase implements
Reducer<K, V, Text, TextArrayWritable>{
JobConf conf = null;
int limitNum = 100;
static boolean initialized = false;
boolean funcInStackFormat = false;
@Override
public void configure(JobConf jobConf) {
super.configure(jobConf);
this.conf = jobConf;
this.limitNum = conf.getInt(AnalysisProcessorConfiguration.limit, 100);
this.funcInStackFormat = conf.getBoolean(AnalysisProcessorConfiguration.funcInStackFormat, false);
}
class RecordComparator implements Comparator<HiTuneRecord>{
public int compare(HiTuneRecord r1, HiTuneRecord r2){
if(r1==r2){
log.debug("instance compare" );
return 0;
}else {
int result = (int)(Long.parseLong(r2.getValue("callee_num"))
- Long.parseLong(r1.getValue("callee_num")));
log.debug("result: " + result);
if(result == 0){
return r2.getValue("func").compareTo(r1.getValue("func"));
}else {
return result;
}
}
}
}
@Override
public void reduce(K key, Iterator<V> values,
OutputCollector<Text, TextArrayWritable> output,
Reporter reporter) throws IOException {
// TODO Auto-generated method stub
Map<String, String> newRecord = new HashMap<String,String>();
String []headers = new String[]{"attempt_id","phase_name", "thread_name", "thread_id", "callee_num",
"last_level_callee_num", "host", "phase_stack", "func", "phase_count"};
for(String head:headers){
newRecord.put(head, "");
}
if(!initialized){
TextArrayWritable newValue = new TextArrayWritable(newRecord.keySet().toArray(new String[0]));
output.collect(null, newValue);
initialized = true;
}
TreeSet <HiTuneRecord> arrays = new TreeSet<HiTuneRecord>(new RecordComparator());
TreeSet <HiTuneRecord> stackarrays = new TreeSet<HiTuneRecord>(new RecordComparator());
HiTuneRecord phase = null;
//log.debug("key: " + key.toString());
while(values.hasNext()){
try{
HiTuneRecord temp_proxyval = new HiTuneRecord(values.next());
V newvalue = (V) temp_proxyval.getObject().getClass().getConstructor().newInstance();
HiTuneRecord proxyval = new HiTuneRecord(newvalue);
for (String field: temp_proxyval.getFields()){
proxyval.add(field, temp_proxyval.getValue(field));
}
String function = proxyval.getValue("func");
log.debug(" val: " + proxyval.toString());
if (function.equals("_PHASE_")){
phase = proxyval;
continue;
}else{
if(function.indexOf("#")!=-1 ){
if(funcInStackFormat){
stackarrays.add(proxyval);
if(stackarrays.size()> limitNum){
stackarrays.remove(stackarrays.last());
}
}
}else{
//log.debug("add new val: " + val);
arrays.add(proxyval);
if(arrays.size()> limitNum){
arrays.remove(arrays.last());
}
}
}
} catch (IllegalArgumentException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (SecurityException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (InstantiationException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IllegalAccessException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (InvocationTargetException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (NoSuchMethodException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
if(funcInStackFormat){
int len = limitNum<stackarrays.size()?limitNum:stackarrays.size();
HiTuneRecord[] candidates = stackarrays.toArray(new HiTuneRecord[0]);
for (int i = 0; i <stackarrays.size(); i++ ){
HiTuneRecord val = candidates[i];
if(val!=null){
newRecord.clear();
for(String head : headers){
if(head.equals("phase_count")){
newRecord.put(head, phase.getValue("callee_num"));
}else{
newRecord.put(head, val.getValue(head));
}
}
String [] contents = new String[newRecord.keySet().size()];
int j = 0;
for (String index: newRecord.keySet() ){
contents[j] = newRecord.get(index);
log.debug("content: " + index + "," +contents[j] );
j++;
}
TextArrayWritable newValue = new TextArrayWritable(contents);
output.collect(null, newValue);
}
}
}else {
int len = limitNum<arrays.size()?limitNum:arrays.size();
HiTuneRecord[] candidates = arrays.toArray(new HiTuneRecord[0]);
for (int i = 0; i <len; i++ ){
HiTuneRecord val = candidates[i];
log.debug("dump val: " + val);
if(val!=null){
newRecord.clear();
for(String head : headers){
if(head.equals("phase_count")){
newRecord.put(head, phase.getValue("callee_num"));
}else{
newRecord.put(head, val.getValue(head));
}
}
String [] contents = new String[newRecord.keySet().size()];
int j = 0;
for (String index: newRecord.keySet() ){
contents[j] = newRecord.get(index);
log.debug("content: " + index + "," +contents[j] );
j++;
}
TextArrayWritable newValue = new TextArrayWritable(contents);
output.collect(null, newValue);
}
}
}
}
}
/**
* @param conf
*/
public InstrumentSamplingTop(Configuration conf) {
super(conf);
// TODO Auto-generated constructor stub
}
/* (non-Javadoc)
* @see hitune.analysis.mapreduce.processor.AnalysisProcessor#run()
*/
@Override
public void run() {
// TODO Auto-generated method stub
long timestamp = System.currentTimeMillis();
try{
JobConf conf = new JobConf(this.conf,InstrumentSamplingTop.class);
conf.setJobName(this.getClass().getSimpleName()+"_1_"+ timestamp);
conf.setInputFormat(MultiSequenceFileInputFormat.class);
conf.setMapperClass(InstrumentSamplingTop.MapClass.class);
conf.setReducerClass(InstrumentSamplingTop.ReduceClass.class);
Class<? extends WritableComparable> outputKeyClass =
Class.forName(conf.get(AnalysisProcessorConfiguration.mapoutputKeyClass)).asSubclass(WritableComparable.class);
Class<? extends Writable> outputValueClass =
Class.forName(conf.get(AnalysisProcessorConfiguration.mapoutputValueClass)).asSubclass(Writable.class);
conf.setMapOutputKeyClass(outputKeyClass);
conf.setMapOutputValueClass(outputValueClass);
conf.setOutputKeyClass(outputKeyClass);
conf.setOutputValueClass(outputValueClass);
conf.setOutputFormat(SequenceFileOutputFormat.class);
String outputPaths = conf.get(AnalysisProcessorConfiguration.reportfolder) + "/" + conf.get(AnalysisProcessorConfiguration.reportfile);
String temp_outputPaths = getTempOutputDir(outputPaths );
if(this.inputfiles != null){
log.debug("inputPaths:" + inputfiles);
FileInputFormat.setInputPaths(conf,inputfiles);
FileOutputFormat.setOutputPath(conf,new Path(outputPaths+ "_1_"+ timestamp));
try {
//first job
JobClient.runJob(conf);
JobConf secondconf = new JobConf(this.conf,InstrumentSamplingTop.class);
secondconf.setJobName(this.getClass().getSimpleName()+"_2_"+ timestamp);
secondconf.setInputFormat(SequenceFileInputFormat.class);
secondconf.setMapperClass(IdentityMapper.class);
secondconf.setReducerClass(InstrumentSamplingTop.TopClass.class);
secondconf.setMapOutputKeyClass(outputKeyClass);
secondconf.setMapOutputValueClass(outputValueClass);
secondconf.setOutputKeyClass(Text.class);
secondconf.setOutputValueClass(TextArrayWritable.class);
secondconf.setOutputFormat(CSVFileOutputFormat.class);
FileInputFormat.setInputPaths(secondconf,outputPaths+ "_1_"+ timestamp);
FileOutputFormat.setOutputPath(secondconf,new Path(temp_outputPaths));
//second job to get ranking list
JobClient.runJob(secondconf);
moveResults(secondconf,outputPaths,temp_outputPaths);
Path temp = new Path(outputPaths+ "_1_"+ timestamp);
temp.getFileSystem(conf).delete(temp);
} catch (IOException e) {
// TODO Auto-generated catch block
log.warn("For " + getOutputFileName() + " :JOB fails!");
log.warn(e);
e.printStackTrace();
this.MOVE_DONE = false;
}
}
else{
log.warn( "For " + getOutputFileName() + " :No input path!");
}
}catch(Exception e){
log.warn("Job preparation failure!");
log.warn(e);
e.printStackTrace();
}
}
/**
* @param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
}
}