Package hitune.analysis.mapreduce.processor

Source Code of hitune.analysis.mapreduce.processor.AnalysisProcessor$NullKeyIdentityReducer

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package hitune.analysis.mapreduce.processor;


import hitune.analysis.mapreduce.AnalysisProcessorConfiguration;
import hitune.analysis.mapreduce.CSVFileOutputFormat;
import hitune.analysis.mapreduce.processor.FileFilter.FileFilter;

import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Iterator;
import java.util.List;


import org.apache.hadoop.chukwa.extraction.engine.ChukwaRecord;
import org.apache.hadoop.chukwa.extraction.engine.ChukwaRecordKey;
import org.apache.hadoop.chukwa.extraction.engine.Record;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputCommitter;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.lib.IdentityMapper;
import org.apache.hadoop.mapred.lib.IdentityReducer;
import org.apache.hadoop.mapred.lib.NLineInputFormat;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.log4j.Logger;


/**
* The analysis processing thread, which will invoke a Map/Reduce job to do a specific analysis job.
*
*/
public abstract class AnalysisProcessor implements Runnable {

    static Logger log = Logger.getLogger(AnalysisProcessor.class);
    private Thread thread = null;
    String source = "";
    Configuration conf = null;
    static final String SEPERATOR_COMMA = ",";
    static final long DAY_IN_SECONDS = 24 * 3600;
    static SimpleDateFormat day = new java.text.SimpleDateFormat("yyyyMMdd");
    protected boolean MOVE_DONE = false;

    String inputfiles = null;

    /**
     * Temp report folder to store the reports before all analysis jobs are done.
     */
    static final String REPORT_TMP = "_TMP";



    /**
     *
     */
    public AnalysisProcessor(Configuration conf) {
        // TODO Auto-generated constructor stub
        this.conf = conf;
        log.debug(this.conf.get("tmpjars"));
        thread = new Thread(this);
        //To create report folder
        GenReportHome();
    }

    private void GenReportHome(){
        try {
            FileSystem fs = FileSystem.get(this.conf);
            Path reportfolder = new Path(this.conf.get(AnalysisProcessorConfiguration.reportfolder));
            if(!fs.exists(reportfolder)) fs.mkdirs(reportfolder);
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
            log.error("Cannot create report folder");
        }
    }

    protected String getTempOutputDir(String outputfolder){
        return outputfolder + REPORT_TMP + "/" + getOutputFileName();
    }

    public String getOutputFileName(){
        return this.conf.get(AnalysisProcessorConfiguration.outputfilename);
    }

   

    static protected List<String> String2List(String src, String seperator){
        List<String> results = null;
        if(src == null || src.equals("") || src.equals("null") || src.equals("*")){
            return results;
        }
        else {
            results = new ArrayList<String>();
            if(src.indexOf(seperator)!=-1){

                for (String item : src.split(seperator)){
                    results.add(item);
                }
            }
            else {
                results.add(src);
            }
        }
        return results;
    }

    static protected String List2String(List<String> list, String seperator){
        StringBuilder result = new StringBuilder();
        if(list==null || list.size()<=0){
            return "";
        }
        for(String item : list){
            result.append(item).append(seperator);
        }
        return result.toString().substring(0, result.length()+ 0-seperator.length());
    }



    public void start(){
        if(!init()){
            log.error("AnalyzerProcessor: " + this.getClass().getSimpleName() + "'s output: " + getOutputFileName() + " intializing failed");
        }
        if(thread!=null ){
            thread.start();
            log.info("AnalyzerProcessor: " + this.getClass().getSimpleName() + "'s output: " + getOutputFileName() + " started...");
        }

    }

    public void join() throws InterruptedException{
        if(thread!=null){
            thread.join();
            if(getStatus()){
                log.info("AnalyzerProcessor: " + this.getClass().getSimpleName() + "'s output: " + getOutputFileName() + " SUCCESS!");
            }
            else {
                log.info("AnalyzerProcessor: " + this.getClass().getSimpleName() + "'s output: " + getOutputFileName() + " FAILED!");
            }
        }else{
            log.info("AnalyzerProcessor: " + this.getClass().getSimpleName() + "'s output: " + getOutputFileName() + " FAILED!");
        }

    }


   

    public boolean getStatus(){
        return MOVE_DONE;
    }

   

    /**
     * Merge the output file into one, and only emit the header(field name) once.
     *
     * @param <K>
     * @param <V>
     */
    public static class NullKeyIdentityReducer<K, V> extends MapReduceBase implements Reducer<K, V, K, V> {
        static boolean isHeader = true;
        public void reduce(K key, Iterator<V> values,
                OutputCollector<K, V> output, Reporter reporter)
        throws IOException {
            while (values.hasNext()) {
                output.collect(null, values.next());
                if(isHeader){
                    isHeader = false;
                    break;
                }
            }
        }
    }


    /**
     * Merge multiple output file into one file and only emit the header of csv once.
     *
     *
     */
    class MergeOutput  extends Configured implements Tool {


        Configuration configure = null;
        public MergeOutput(Configuration conf){
            this.configure=conf;
        }
        @Override
        public int run(String[] args) throws Exception {
            // TODO Auto-generated method stub
            JobConf conf = new JobConf(this.configure, AnalysisProcessor.class);

            conf.setJobName("MergeOutputFile");

            conf.setInputFormat(TextInputFormat.class);
            conf.setMapperClass(IdentityMapper.class);
            conf.setReducerClass(NullKeyIdentityReducer.class);

            conf.setMapOutputKeyClass(LongWritable.class);
            conf.setMapOutputValueClass(Text.class);

            conf.setOutputKeyClass(Text.class);
            conf.setOutputValueClass(Text.class);
            conf.setOutputFormat(CSVFileOutputFormat.class);

            conf.setNumReduceTasks(1);
            FileInputFormat.setInputPaths(conf, args[0]);
            FileOutputFormat.setOutputPath(conf, new Path(args[1]));

            JobClient.runJob(conf);
            return 0;
        }


    }

    /**
     * Move the TEMP output folder to final one(user defined one);
     * If there are multiple files under one job's output folder, it should merge the output into one file.
     * Then rename the folder to the final one.
     * @param job
     * @param output
     * @param result
     */
    protected void moveResults(JobConf job,String output, String result){
        try {
            FileSystem fs = FileSystem.get(job);
            log.debug("move results: " +result);
            Path src = new Path(result+"/"+"*.csv*");
            Path dst = new Path(output);
            if(!fs.exists(dst)){
                fs.mkdirs(dst);
            }
            FileStatus[] matches = fs.globStatus(src, new PathFilter(){
                @Override
                public boolean accept(Path path) {
                    // TODO Auto-generated method stub
                    return true;

                }});
            if(matches!=null && matches.length!=0){
                if(matches.length > 1){
                    //multiple output files
                    String []args = new String[2];
                    args[0]= result;
                    args[1]= "_"+ result;
                    fs.delete(new Path("_"+ result));
                    //merge multiple output files into one file
                    ToolRunner.run(new MergeOutput(this.conf), args);
                    fs.delete(new Path(result));
                    fs.rename(new Path("_"+ result), new Path(result));
                }

                matches = fs.globStatus(src,new PathFilter(){
                    @Override
                    public boolean accept(Path path) {
                        // TODO Auto-generated method stub
                        return true;                    
                    }});

                for(FileStatus file : matches){
                    String filename = file.getPath().getName();
                    filename = filename.substring(0,filename.indexOf("-"));
                    log.debug("move file:" + filename);
                    Path toFile = new Path(output+"/"+filename);
                    if(fs.exists(toFile)){
                        fs.delete(toFile);
                    }
                    fs.rename(file.getPath(), toFile);
                    fs.delete(file.getPath().getParent(), true);
                    FileStatus[] tmpDirs = fs.listStatus(file.getPath().getParent().getParent());
                    if(tmpDirs == null || tmpDirs.length == 0){
                        fs.delete(file.getPath().getParent().getParent(), true);
                    }
                    break;
                }
            }
            else{
                MOVE_DONE = false;
            }
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
            MOVE_DONE = false;
        } catch (Exception e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        MOVE_DONE = true;
    }

    static protected boolean isMatched(List filterlist, String target){
        return (filterlist==null || filterlist.isEmpty() || filterlist.contains(target));
    }

    /* (non-Javadoc)
     * @see java.lang.Runnable#run()
     */
    @Override
    abstract public void run();

    public boolean init(){
        return parsingInputPath();
    }


    protected boolean parsingInputPath(){
        if(conf!=null){
            String filterclass = conf.get(AnalysisProcessorConfiguration.filefilter);
            if(filterclass ==null || filterclass.equals("")){
                filterclass = "hitune.analysis.mapreduce.processor.FileFilter.DefaultFileFilter";
            }

            String [] paths = conf.getStrings(AnalysisProcessorConfiguration.datasource);
            String pattern = conf.get(AnalysisProcessorConfiguration.filefilter_pattern, null);
            StringBuilder str = new StringBuilder();

            for(String path : paths){
                log.debug("path to scan: " + path);
                FileFilter filter = null;
                try {
                    filter = (FileFilter)Class.forName(filterclass).getConstructor(new Class[] { Configuration.class, String.class }).newInstance(new Object[] {conf, pattern});

                    if(str.length()!=0){
                        str.append(FileFilter.SEPARATOR);
                    }
                    str.append(filter.filter(new Path(path)));

                   
                } catch (IllegalArgumentException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                } catch (SecurityException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                } catch (InstantiationException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                } catch (IllegalAccessException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                } catch (InvocationTargetException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                } catch (NoSuchMethodException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                } catch (ClassNotFoundException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
            }
            if(str == null || str.equals("") || str.length() == 0){
                log.error("No input file is met the filtering requirments");
                return false;
            }
            else{
                inputfiles = str.toString();
                return true;
            }
        }
        else{
            return false;
        }
       
    }


    /**
     * @param args
     */
    public static void main(String[] args) {
        // TODO Auto-generated method stub

    }

}
TOP

Related Classes of hitune.analysis.mapreduce.processor.AnalysisProcessor$NullKeyIdentityReducer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.