Source Code of edu.eltech.EntryPoint

package edu.eltech;


import edu.eltech.classifier.Category;
import edu.eltech.classifier.Letter;
import edu.eltech.generator.FileGenerator;
import edu.eltech.mapreduce.classifier.MyMapper;
import edu.eltech.mapreduce.classifier.MyReducer;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.ParseException;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapred.*;


import java.io.File;
import java.io.IOException;
import java.util.UUID;


public class EntryPoint {


    private JobConf jobConf = new JobConf(EntryPoint.class);


    public EntryPoint() {
        this("test-job-" + UUID.randomUUID());
    }


    public EntryPoint(String jobName) {
        this(jobName, MyMapper.class, MyReducer.class);
    }


    public EntryPoint(String jobName, Class<? extends org.apache.hadoop.mapred.Mapper> mapper,
                                      Class<? extends org.apache.hadoop.mapred.Reducer> reducer) {


        jobConf.setJobName(jobName);


        jobConf.setJarByClass(EntryPoint.class);
        jobConf.setMapperClass(mapper);
        jobConf.setReducerClass(reducer);


        jobConf.setMapOutputKeyClass(IntWritable.class);
        jobConf.setMapOutputValueClass(Letter.class);


        jobConf.setOutputKeyClass(Category.class);
        jobConf.setOutputValueClass(Letter.class);
    }


    public void run(String inputPath, String outputPath, boolean isTest) throws IOException{


        /* get files from input folder and wrap them into Path objects */
        Path[] paths = null;


//        /* for testing mode - list files from the local file system */
//        if (isTest) {
            File fileFolder = new File(inputPath);
            File[] files = fileFolder.listFiles();


            paths = new Path[files.length];
            for (int i = 0; i < files.length; i++) {
                String s = files[i].toString().replaceFirst(":/", "://");
                paths[i] = new Path(s);
            }
//        }
//        /* for production mode - list files uploaded to the S3 */
//        else {
//
//
//        }


        FileInputFormat.setInputPaths(jobConf, paths);
        FileOutputFormat.setOutputPath(jobConf, new Path(outputPath));


        jobConf.setNumMapTasks(20);
        jobConf.setNumReduceTasks(20);
//        jobConf.setCompressMapOutput(true);
//        TextOutputFormat.setCompressOutput(jobConf, true);


        JobClient.runJob(jobConf);
    }


    public static void main(String[] args) throws ParseException, IOException {


        /* get attributes' values from the input parameters */
        CmdParser cmdParser = new CmdParser();
        CommandLine line = cmdParser.getAttributes(args);


        String inputPath = null, outputPath = null;
        boolean isTestRun = false;


        if ( line.hasOption(CmdParser.HELP) || line.hasOption(CmdParser.HELP_LONG) ) {
            /* print help and exit */
            cmdParser.printHelp();
            System.exit(1);
        }
        else if ( line.hasOption(CmdParser.INPUT_PATH) && line.hasOption(CmdParser.OUTPUT_PATH) ) {
            inputPath = line.getOptionValue(CmdParser.INPUT_PATH);
            outputPath = line.getOptionValue(CmdParser.OUTPUT_PATH);


            if ( line.hasOption(CmdParser.TEST_RUN) ) {
                isTestRun = true;
            }


        }  else {
            /* print help and exit */
            cmdParser.printHelp();
            System.exit(1);
        }


        /* set up environment (only for testing - not suitable for Amazon) */
        if (isTestRun) {
            /* generate data beforehand - in Amazon it will be saved to S3 */
            FileGenerator fileGenerator = new FileGenerator();
            fileGenerator.generateData(inputPath, 100, 10000);


            /* clear the output folder so that Hadoop would not cuss */
            FileGenerator.removeFolder(outputPath);
        }
        long start = System.currentTimeMillis();


        /* initialize and run a mapreduce job */
        EntryPoint entryPoint = new EntryPoint();
        entryPoint.run(inputPath, outputPath, isTestRun);


        if (isTestRun) {
            long end = System.currentTimeMillis();
            System.out.println("Elapsed time: " + (end - start) / 1000 + "sec.");
        }
    }


}
Source Code of edu.eltech.EntryPoint

Related Classes of edu.eltech.EntryPoint