Examples of org.apache.hadoop.mapred.JobConf

org.apache.hadoop.mapred.JobConf
te a new JobConf JobConf job = new JobConf(new Configuration(), MyJob.class); // Specify various job-specific parameters job.setJobName("myjob"); job.setInputPath(new Path("in")); job.setOutputPath(new Path("out")); job.setMapperClass(MyJob.MyMapper.class); job.setCombinerClass(MyJob.MyReducer.class); job.setReducerClass(MyJob.MyReducer.class); job.setInputFormat(SequenceFileInputFormat.class); job.setOutputFormat(SequenceFileOutputFormat.class);
@see JobClient @see ClusterStatus @see Tool @see DistributedCache

      // need to get unique dir per cluster
      System.setProperty("test.build.data", "build/test/data/" + clusterName);


      log.info("Starting cluster=" + clusterName);


      Configuration config = new JobConf();


      // umask trick
      String umask = getCurrentUmask(tmpDir, config);
      if (umask != null) {
        log.info("Setting expected umask to " + umask);
        config.set("dfs.datanode.data.dir.perm", umask);
      }


      // dfs cluster is updating config
      // newer dfs cluster are using builder pattern
      // but we need to support older versions in

View Full Code Here

                                          "host2", "host3"});
      }
   @Test
   public void testNumInputs() throws Exception {
     Configuration conf = new Configuration();
     JobConf job = new JobConf(conf);
     MiniDFSCluster dfs = newDFSCluster(job);
        FileSystem fs = dfs.getFileSystem();
        System.out.println("FileSystem " + fs.getUri());
        Path inputDir = new Path("/foo/");
        final int numFiles = 10;

View Full Code Here

 
    // Switch to this if you'd like to look at all text files.  May take many minutes just to read the file listing.
    //String inputPath = "s3n://aws-publicdatasets/common-crawl/parse-output/segment/*/textData-*";


    // Creates a new job configuration for this Hadoop job.
    JobConf job = new JobConf(this.getConf());


    job.setJarByClass(TotalAnalysis.class);


    // fix from the google groups discussion
    String segmentListFile = "s3n://aws-publicdatasets/common-crawl/parse-output/valid_segments.txt";


    FileSystem fsInput = FileSystem.get(new URI(segmentListFile), job);
    BufferedReader reader = new BufferedReader(new InputStreamReader(fsInput.open(new Path(segmentListFile))));


    String segmentId;


    while ((segmentId = reader.readLine()) != null) {
      String inputPath = "s3n://aws-publicdatasets/common-crawl/parse-output/segment/"+segmentId+"/textData-*";
      FileInputFormat.addInputPath(job, new Path(inputPath));
    }


    // Read in any additional config parameters.
    if (configFile != null) {
      LOG.info("adding config parameters from '"+ configFile + "'");
      this.getConf().addResource(configFile);
    }


    // Scan the provided input path for ARC files.
    //LOG.info("setting input path to '"+ inputPath + "'");
    //FileInputFormat.addInputPath(job, new Path(inputPath));
    //FileInputFormat.setInputPathFilter(job, SampleFilter.class);


    // Delete the output path directory if it already exists.
    LOG.info("clearing the output path at '" + outputPath + "'");


    FileSystem fs = FileSystem.get(new URI(outputPath), job);


    if (fs.exists(new Path(outputPath)))
      fs.delete(new Path(outputPath), true);


    // Set the path where final output 'part' files will be saved.
    LOG.info("setting output path to '" + outputPath + "'");
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    FileOutputFormat.setCompressOutput(job, false);


    // Set which InputFormat class to use.
    job.setInputFormat(SequenceFileInputFormat.class);


    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(Text.class);


    // Set which OutputFormat class to use.
    job.setOutputFormat(TextOutputFormat.class);


    // Set the output data types.
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);


    // Set which Mapper and Reducer classes to use.
    job.setMapperClass(TotalAnalysis.TotalAnalysisMapper.class);
    job.setReducerClass(TotalAnalysis.TotalAnalysisReducer.class);


    if (JobClient.runJob(job).isSuccessful())
      return 0;
    else
      return 1;

View Full Code Here

      throw new IllegalArgumentException("'run()' must be passed an output path.");


    outputPath = args[0];


    // Creates a new job configuration for this Hadoop job.
    JobConf job = new JobConf(this.getConf());


    job.setJarByClass(ExampleMetadataStats.class);


    baseInputPath = "s3n://aws-publicdatasets/common-crawl/parse-output/segment";


    FileSystem fs = null;


    // If you would like to process all segments, comment this out and
    // uncomment the block of code below
    String inputPath = baseInputPath + "/1341690154994/metadata-00062";


    LOG.info("adding input path '" + inputPath + "'");
    FileInputFormat.addInputPath(job, new Path(inputPath));
    /*
    fs = FileSystem.get(new URI("s3n://aws-publicdatasets"), job);


    for (FileStatus fileStatus : fs.globStatus(new Path("/common-crawl/parse-output/valid_segments/[0-9]*"))) { 
      String[] parts = fileStatus.getPath().toString().split("/");
      String inputPath = baseInputPath + "/" + parts[parts.length-1] + "/metadata-*";
      LOG.info("adding input path '" + inputPath + "'");
      FileInputFormat.addInputPath(job, new Path(inputPath));
    }
    */


    // Delete the output path directory if it already exists.
    LOG.info("clearing the output path at '" + outputPath + "'");


    fs = FileSystem.get(new URI(outputPath), job);


    if (fs.exists(new Path(outputPath)))
      fs.delete(new Path(outputPath), true);


    // Set the path where final output 'part' files will be saved.
    LOG.info("setting output path to '" + outputPath + "'");
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    FileOutputFormat.setCompressOutput(job, false);


    // Set which InputFormat class to use.
    job.setInputFormat(SequenceFileInputFormat.class);


    // Set which OutputFormat class to use.
    job.setOutputFormat(TextOutputFormat.class);


    // Set the output data types.
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);


    // Set which Mapper and Reducer classes to use.
    job.setMapperClass(ExampleMetadataStats.ExampleMetadataStatsMapper.class);
    job.setCombinerClass(LongSumReducer.class);
    job.setReducerClass(LongSumReducer.class);


    if (JobClient.runJob(job).isSuccessful())
      return 0;
    else
      return 1;

View Full Code Here

      LOG.info("adding config parameters from '"+ configFile + "'");
      this.getConf().addResource(configFile);
    }


    // Creates a new job configuration for this Hadoop job.
    JobConf job = new JobConf(this.getConf());


    job.setJarByClass(ExampleArcMicroformat.class);


    // Scan the provided input path for ARC files.
    LOG.info("setting input path to '"+ inputPath + "'");
    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileInputFormat.setInputPathFilter(job, SampleFilter.class);


    // Delete the output path directory if it already exists.
    LOG.info("clearing the output path at '" + outputPath + "'");


    FileSystem fs = FileSystem.get(new URI(outputPath), job);


    if (fs.exists(new Path(outputPath)))
      fs.delete(new Path(outputPath), true);


    // Set the path where final output 'part' files will be saved.
    LOG.info("setting output path to '" + outputPath + "'");
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    FileOutputFormat.setCompressOutput(job, false);


    // Set which InputFormat class to use.
    job.setInputFormat(ArcInputFormat.class);


    // Set which OutputFormat class to use.
    job.setOutputFormat(TextOutputFormat.class);


    // Set the output data types.
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);


    // Set which Mapper and Reducer classes to use.
    job.setMapperClass(ExampleArcMicroformat.ExampleArcMicroformatMapper.class);
    job.setReducerClass(LongSumReducer.class);


    if (JobClient.runJob(job).isSuccessful())
      return 0;
    else
      return 1;

View Full Code Here

      LOG.info("adding config parameters from '"+ configFile + "'");
      this.getConf().addResource(configFile);
    }


    // Creates a new job configuration for this Hadoop job.
    JobConf job = new JobConf(this.getConf());


    job.setJarByClass(ExampleTextWordCount.class);


    // Scan the provided input path for ARC files.
    LOG.info("setting input path to '"+ inputPath + "'");
    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileInputFormat.setInputPathFilter(job, SampleFilter.class);


    // Delete the output path directory if it already exists.
    LOG.info("clearing the output path at '" + outputPath + "'");


    FileSystem fs = FileSystem.get(new URI(outputPath), job);


    if (fs.exists(new Path(outputPath)))
      fs.delete(new Path(outputPath), true);


    // Set the path where final output 'part' files will be saved.
    LOG.info("setting output path to '" + outputPath + "'");
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    FileOutputFormat.setCompressOutput(job, false);


    // Set which InputFormat class to use.
    job.setInputFormat(SequenceFileInputFormat.class);


    // Set which OutputFormat class to use.
    job.setOutputFormat(TextOutputFormat.class);


    // Set the output data types.
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);


    // Set which Mapper and Reducer classes to use.
    job.setMapperClass(ExampleTextWordCount.ExampleTextWordCountMapper.class);
    job.setReducerClass(LongSumReducer.class);


    if (JobClient.runJob(job).isSuccessful())
      return 0;
    else
      return 1;

View Full Code Here

      LOG.info("adding config parameters from '"+ configFile + "'");
      this.getConf().addResource(configFile);
    }


    // Creates a new job configuration for this Hadoop job.
    JobConf job = new JobConf(this.getConf());


    job.setJarByClass(ExampleMetadataDomainPageCount.class);


    // Scan the provided input path for ARC files.
    LOG.info("setting input path to '"+ inputPath + "'");
    FileInputFormat.addInputPath(job, new Path(inputPath));


    // Optionally, you can add in a custom input path filter
    // FileInputFormat.setInputPathFilter(job, SampleFilter.class);


    // Delete the output path directory if it already exists.
    LOG.info("clearing the output path at '" + outputPath + "'");


    FileSystem fs = FileSystem.get(new URI(outputPath), job);


    if (fs.exists(new Path(outputPath)))
      fs.delete(new Path(outputPath), true);


    // Set the path where final output 'part' files will be saved.
    LOG.info("setting output path to '" + outputPath + "'");
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    FileOutputFormat.setCompressOutput(job, false);


    // Set which InputFormat class to use.
    job.setInputFormat(SequenceFileInputFormat.class);


    // Set which OutputFormat class to use.
    job.setOutputFormat(TextOutputFormat.class);


    // Set the output data types.
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);


    // Set which Mapper and Reducer classes to use.
    job.setMapperClass(ExampleMetadataDomainPageCount.ExampleMetadataDomainPageCountMapper.class);
    job.setReducerClass(LongSumReducer.class);


    if (JobClient.runJob(job).isSuccessful())
      return 0;
    else
      return 1;

View Full Code Here

    }


    ClassLoader loader = (beanClassLoader != null ? beanClassLoader : org.springframework.util.ClassUtils.getDefaultClassLoader());


    if (jar != null) {
      JobConf conf = (JobConf) job.getConfiguration();
      conf.setJar(jar.getURI().toString());
      loader = ExecutionUtils.createParentLastClassLoader(jar, beanClassLoader, cfg);
      conf.setClassLoader(loader);
    }




    // set first to enable auto-detection of K/V to skip the key/value types to be specified
    if (mapper != null) {

View Full Code Here

   * @param original initial configuration to read from. May be null.
   * @param properties properties object to add to the newly created configuration. May be null.
   * @return newly created configuration based on the input parameters.
   */
  public static JobConf createFrom(Configuration original, Properties properties) {
    JobConf cfg = null;
    if (original != null) {
      cfg = new JobConf(original);
    }
    else {
      cfg = new JobConf();
    }
    ConfigurationUtils.addProperties(cfg, properties);
    return cfg;
  }

View Full Code Here

   * @return the result of merging the two configurations.
   */
  public static JobConf merge(Configuration one, Configuration two) {
    if (one == null) {
      if (two == null) {
        return new JobConf();
      }
      return new JobConf(two);
    }


    JobConf c = new JobConf(one);


    if (two == null) {
      return c;
    }


    for (Map.Entry<String, String> entry : two) {
      c.set(entry.getKey(), entry.getValue());
    }


    return c;
  }

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.hadoop.mapred.JobConf

bixo.config.BixoPlatform

com.cloudera.circus.test.TestXTest

com.cloudera.circus.test.XTest$TestMethodListener

com.cloudera.lib.service.hadoop.HadoopService

com.cloudera.lib.service.hadoop.TestHadoopService

com.facebook.hive.orc.TestInputOutputFormat

com.facebook.hiveio.output.HiveApiOutputFormat

com.facebook.hiveio.output.PerThread

com.facebook.LinkBench.LinkBenchDriverMR

com.facebook.presto.hive.HiveRecordSet

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.