Package edu.eltech

Source Code of edu.eltech.EntryPoint

package edu.eltech;

import edu.eltech.classifier.Category;
import edu.eltech.classifier.Letter;
import edu.eltech.generator.FileGenerator;
import edu.eltech.mapreduce.classifier.MyMapper;
import edu.eltech.mapreduce.classifier.MyReducer;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.ParseException;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapred.*;

import java.io.File;
import java.io.IOException;
import java.util.UUID;

public class EntryPoint {

    private JobConf jobConf = new JobConf(EntryPoint.class);

    public EntryPoint() {
        this("test-job-" + UUID.randomUUID());
    }

    public EntryPoint(String jobName) {
        this(jobName, MyMapper.class, MyReducer.class);
    }

    public EntryPoint(String jobName, Class<? extends org.apache.hadoop.mapred.Mapper> mapper,
                                      Class<? extends org.apache.hadoop.mapred.Reducer> reducer) {

        jobConf.setJobName(jobName);

        jobConf.setJarByClass(EntryPoint.class);
        jobConf.setMapperClass(mapper);
        jobConf.setReducerClass(reducer);

        jobConf.setMapOutputKeyClass(IntWritable.class);
        jobConf.setMapOutputValueClass(Letter.class);

        jobConf.setOutputKeyClass(Category.class);
        jobConf.setOutputValueClass(Letter.class);
    }

    public void run(String inputPath, String outputPath, boolean isTest) throws IOException{

        /* get files from input folder and wrap them into Path objects */
        Path[] paths = null;

//        /* for testing mode - list files from the local file system */
//        if (isTest) {
            File fileFolder = new File(inputPath);
            File[] files = fileFolder.listFiles();

            paths = new Path[files.length];
            for (int i = 0; i < files.length; i++) {
                String s = files[i].toString().replaceFirst(":/", "://");
                paths[i] = new Path(s);
            }
//        }
//        /* for production mode - list files uploaded to the S3 */
//        else {
//
//
//        }

        FileInputFormat.setInputPaths(jobConf, paths);
        FileOutputFormat.setOutputPath(jobConf, new Path(outputPath));

        jobConf.setNumMapTasks(20);
        jobConf.setNumReduceTasks(20);
//        jobConf.setCompressMapOutput(true);
//        TextOutputFormat.setCompressOutput(jobConf, true);

        JobClient.runJob(jobConf);
    }

    public static void main(String[] args) throws ParseException, IOException {

        /* get attributes' values from the input parameters */
        CmdParser cmdParser = new CmdParser();
        CommandLine line = cmdParser.getAttributes(args);

        String inputPath = null, outputPath = null;
        boolean isTestRun = false;

        if ( line.hasOption(CmdParser.HELP) || line.hasOption(CmdParser.HELP_LONG) ) {
            /* print help and exit */
            cmdParser.printHelp();
            System.exit(1);
        }
        else if ( line.hasOption(CmdParser.INPUT_PATH) && line.hasOption(CmdParser.OUTPUT_PATH) ) {
            inputPath = line.getOptionValue(CmdParser.INPUT_PATH);
            outputPath = line.getOptionValue(CmdParser.OUTPUT_PATH);

            if ( line.hasOption(CmdParser.TEST_RUN) ) {
                isTestRun = true;
            }

        }  else {
            /* print help and exit */
            cmdParser.printHelp();
            System.exit(1);
        }

        /* set up environment (only for testing - not suitable for Amazon) */
        if (isTestRun) {
            /* generate data beforehand - in Amazon it will be saved to S3 */
            FileGenerator fileGenerator = new FileGenerator();
            fileGenerator.generateData(inputPath, 100, 10000);

            /* clear the output folder so that Hadoop would not cuss */
            FileGenerator.removeFolder(outputPath);
        }
        long start = System.currentTimeMillis();

        /* initialize and run a mapreduce job */
        EntryPoint entryPoint = new EntryPoint();
        entryPoint.run(inputPath, outputPath, isTestRun);

        if (isTestRun) {
            long end = System.currentTimeMillis();
            System.out.println("Elapsed time: " + (end - start) / 1000 + "sec.");
        }
    }

}
TOP

Related Classes of edu.eltech.EntryPoint

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.