Package com.linkedin.whiteelephant.mapreduce.lib.job

Examples of com.linkedin.whiteelephant.mapreduce.lib.job.StagedOutputJob


        List<String> inputPaths = new ArrayList<String>();
        inputPaths.add(task.inputPathFormat);
       
        String outputPath = task.outputPath;
       
        final StagedOutputJob job = StagedOutputJob.createStagedJob(
           _props,
           _name + "-parse-confs-" + task.id,
           inputPaths,
           "/tmp" + outputPath,
           outputPath,
           _log);
       
        job.getConfiguration().set("jobs.output.path", _confsOutputPathRoot);
        job.getConfiguration().set("logs.cluster.name", clusterName);
               
        job.setOutputKeyClass(BytesWritable.class);
        job.setOutputValueClass(NullWritable.class);
 
        job.setInputFormatClass(CombineDocumentFileFormat.class);
        job.setOutputFormatClass(AvroKeyOutputFormat.class);
 
        AvroJob.setOutputKeySchema(job, JobConf.SCHEMA$);
       
        job.setNumReduceTasks(0);
  
        job.setMapperClass(ParseJobConfs.TheMapper.class);
       
        executor.submit(job);
      }
     
      executor.waitForCompletion();
View Full Code Here


  {
    List<String> inputPaths = new ArrayList<String>();
   
    inputPaths.add(inputPattern);
   
    final StagedOutputJob job = StagedOutputJob.createStagedJob(
      _props,
      _name + "-" + "usage-per-hour-" + clusterName + "-" + year + "-" + day,
      inputPaths,
      "/tmp" + output,
      output,
      _log);
   
    final Configuration conf = job.getConfiguration();
   
    conf.set("cluster.name", clusterName);
               
    job.setOutputKeyClass(BytesWritable.class);
    job.setOutputValueClass(BytesWritable.class);
   
    job.setInputFormatClass(AvroKeyValueInputFormat.class);
    job.setOutputFormatClass(AvroKeyValueOutputFormat.class);
   
    AvroJob.setInputKeySchema(job, Schema.create(Type.STRING));
    AvroJob.setInputValueSchema(job, LogData.SCHEMA$);
   
    AvroJob.setMapOutputKeySchema(job, AttemptStatsKey.SCHEMA$);
    AvroJob.setMapOutputValueSchema(job, AttemptStatsValue.SCHEMA$);
   
    AvroJob.setOutputKeySchema(job, AttemptStatsKey.SCHEMA$);
    AvroJob.setOutputValueSchema(job, AttemptStatsValue.SCHEMA$);
   
    job.setNumReduceTasks(numReducers);
   
    job.setMapperClass(ComputeUsagePerHour.TheMapper.class);
    job.setReducerClass(ComputeUsagePerHour.TheReducer.class);
   
    executor.submit(job);
  }
View Full Code Here

        List<String> inputPaths = new ArrayList<String>();
        inputPaths.add(task.inputPathFormat);
       
        String outputPath = task.outputPath;
       
        final StagedOutputJob job = StagedOutputJob.createStagedJob(
           _props,
           _name + "-parse-jobs-" + task.id,
           inputPaths,
           "/tmp" + outputPath,
           outputPath,
           _log);
       
        job.getConfiguration().set("jobs.output.path", _jobsOutputPathRoot);
        job.getConfiguration().set("logs.cluster.name", clusterName);
               
        // 1 reducer per 12 GB of input data
        long numReduceTasks = (int)Math.ceil(((double)task.totalLength) / 1024 / 1024 / 1024 / 12);
               
        job.setOutputKeyClass(BytesWritable.class);
        job.setOutputValueClass(BytesWritable.class);
 
        job.setInputFormatClass(CombinedTextInputFormat.class);
        job.setOutputFormatClass(AvroKeyValueOutputFormat.class);
 
        AvroJob.setOutputKeySchema(job, Schema.create(Type.STRING));
        AvroJob.setOutputValueSchema(job, LogData.SCHEMA$);
       
        job.setNumReduceTasks((int)numReduceTasks);
  
        job.setMapperClass(ParseJobsFromLogs.TheMapper.class);
        job.setReducerClass(ParseJobsFromLogs.TheReducer.class);
        
        AvroJob.setMapOutputKeySchema(job, Schema.create(Type.STRING));
        AvroJob.setMapOutputValueSchema(job, LogData.SCHEMA$);
       
        MyAvroMultipleOutputs.addNamedOutput(job, "logs", AvroKeyValueOutputFormat.class, Schema.create(Type.STRING), LogData.SCHEMA$);
View Full Code Here

TOP

Related Classes of com.linkedin.whiteelephant.mapreduce.lib.job.StagedOutputJob

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.