Package org.apache.hadoop.mapreduce

Examples of org.apache.hadoop.mapreduce.OutputFormat


    // If not in cache, create a new one
    if(context == null) {

      context = new OutputContext();

      OutputFormat mainOutputFormat;

      try {
        mainOutputFormat = ((OutputFormat) ReflectionUtils.newInstance(
            this.context.getOutputFormatClass(), this.context.getConfiguration()));
      } catch(ClassNotFoundException e1) {
        throw new RuntimeException(e1);
      }

      ProxyOutputCommitter baseOutputCommitter = ((ProxyOutputCommitter) mainOutputFormat
          .getOutputCommitter(this.context));

      // The trick is to create a new Job for each output
      Configuration c = new Configuration(this.context.getConfiguration());
      Job job = new Job(c);

      Class<?> keyClass = getNamedOutputKeyClass(this.context, baseFileName);
      if(keyClass == null && getDefaultNamedOutputKeyClass(this.context) == null) {
        throw new InvalidNamedOutputException("No pre-configured named output for this name / no default named output format specified.");
      }
      job.setOutputKeyClass(keyClass == null ? getDefaultNamedOutputKeyClass(this.context) : keyClass);

      Class<?> valueClass = getNamedOutputValueClass(this.context, baseFileName);
      job.setOutputValueClass(valueClass == null ? getDefaultNamedOutputValueClass(this.context)
          : valueClass);

      // Check possible specific context for the output
      setSpecificNamedOutputContext(this.context.getConfiguration(), job, baseFileName);
      TaskAttemptContext taskContext;
      try {
        taskContext = TaskAttemptContextFactory.get(job.getConfiguration(),
            this.context.getTaskAttemptID());
      } catch(Exception e) {
        throw new IOException(e);
      }

      // First we change the output dir for the new OutputFormat that we will
      // create
      // We put it inside the main output work path -> in case the Job fails,
      // everything will be discarded

      taskContext.getConfiguration().set("mapred.output.dir",
          baseOutputCommitter.getBaseDir() + "/" + baseFileName);
      // This is for Hadoop 2.0 :
      taskContext.getConfiguration().set("mapreduce.output.fileoutputformat.outputdir",
          baseOutputCommitter.getBaseDir() + "/" + baseFileName);
      context.taskAttemptContext = taskContext;

      // Load the OutputFormat instance
      String outputFormatFile = getNamedOutputFormatInstanceFile(this.context, baseFileName);
      if(outputFormatFile == null) {
        outputFormatFile = getDefaultNamedOutputFormatInstanceFile(this.context);
      }

      OutputFormat outputFormat = InstancesDistributor.loadInstance(
          context.taskAttemptContext.getConfiguration(), OutputFormat.class, outputFormatFile, true);
      // We have to create a JobContext for meeting the contract of the
      // OutputFormat
      JobContext jobContext;
      try {
        jobContext = JobContextFactory.get(taskContext.getConfiguration(), taskContext.getJobID());
      } catch(Exception e) {
        throw new IOException(e);
      }

      context.jobContext = jobContext;
      // The contract of the OutputFormat is to check the output specs
      outputFormat.checkOutputSpecs(jobContext);
      // We get the output committer so we can call it later
      context.outputCommitter = outputFormat.getOutputCommitter(taskContext);
      // Save the RecordWriter to cache it
      context.recordWriter = outputFormat.getRecordWriter(taskContext);

      // if counters are enabled, wrap the writer with context
      // to increment counters
      if(countersEnabled) {
        context.recordWriter = new RecordWriterWithCounter(context.recordWriter, baseFileName,
View Full Code Here


        // storeFunc storing the output location in the Configuration
        // in the Job. The PigOutFormat.setLocation() method will merge
        // this modified Configuration into the configuration of the
        // Context we have
        PigOutputFormat.setLocation(context, store);
        OutputFormat outputFormat = storeFunc.getOutputFormat();

        // create a new record writer
        try {
            writer = outputFormat.getRecordWriter(context);
        } catch (InterruptedException e) {
            throw new IOException(e);
        }
        storeFunc.prepareToWrite(writer);
View Full Code Here

           
            // set output location
            PigOutputFormat.setLocation(jobContextCopy, store);
           
            StoreFuncInterface sFunc = store.getStoreFunc();
            OutputFormat of = sFunc.getOutputFormat();
           
            // The above call should have update the conf in the JobContext
            // to have the output location - now call checkOutputSpecs()
            of.checkOutputSpecs(jobContextCopy);
        }
    }
View Full Code Here

    // If not in cache, create a new one
    if(context == null) {

      context = new OutputContext();

      OutputFormat mainOutputFormat;

      try {
        mainOutputFormat = ((OutputFormat) ReflectionUtils.newInstance(
            this.context.getOutputFormatClass(), this.context.getConfiguration()));
      } catch(ClassNotFoundException e1) {
        throw new RuntimeException(e1);
      }

      ProxyOutputCommitter baseOutputCommitter = ((ProxyOutputCommitter) mainOutputFormat
          .getOutputCommitter(this.context));

      // The trick is to create a new Job for each output
      Job job = new Job(this.context.getConfiguration());
      job.setOutputKeyClass(getNamedOutputKeyClass(this.context, baseFileName));
      job.setOutputValueClass(getNamedOutputValueClass(this.context, baseFileName));
      // Check possible specific context for the output
      setSpecificNamedOutputContext(this.context.getConfiguration(), job, baseFileName);
      TaskAttemptContext taskContext = new TaskAttemptContext(job.getConfiguration(),
          this.context.getTaskAttemptID());

      // First we change the output dir for the new OutputFormat that we will
      // create
      // We put it inside the main output work path -> in case the Job fails,
      // everything will be discarded
      taskContext.getConfiguration().set("mapred.output.dir",
          baseOutputCommitter.getBaseDir() + "/" + baseFileName);
      context.taskAttemptContext = taskContext;

      // Load the OutputFormat instance
      OutputFormat outputFormat = DCUtils.loadSerializedObjectInDC(
          context.taskAttemptContext.getConfiguration(), OutputFormat.class,
          getNamedOutputFormatInstanceFile(this.context, baseFileName), true);
      // We have to create a JobContext for meeting the contract of the
      // OutputFormat
      JobContext jobContext = new JobContext(taskContext.getConfiguration(),
          taskContext.getJobID());
      context.jobContext = jobContext;
      // The contract of the OutputFormat is to check the output specs
      outputFormat.checkOutputSpecs(jobContext);
      // We get the output committer so we can call it later
      context.outputCommitter = outputFormat.getOutputCommitter(taskContext);
      // Save the RecordWriter to cache it
      context.recordWriter = outputFormat.getRecordWriter(taskContext);

      // if counters are enabled, wrap the writer with context
      // to increment counters
      if(countersEnabled) {
        context.recordWriter = new RecordWriterWithCounter(context.recordWriter,
View Full Code Here

    // If not in cache, create a new one
    if(context == null) {

      context = new OutputContext();

      OutputFormat mainOutputFormat;

      try {
        mainOutputFormat = ((OutputFormat) ReflectionUtils.newInstance(
            this.context.getOutputFormatClass(), this.context.getConfiguration()));
      } catch(ClassNotFoundException e1) {
        throw new RuntimeException(e1);
      }

      ProxyOutputCommitter baseOutputCommitter = ((ProxyOutputCommitter) mainOutputFormat
          .getOutputCommitter(this.context));

      // The trick is to create a new Job for each output
      Configuration c = new Configuration(this.context.getConfiguration());
      Job job = new Job(c);
      job.setOutputKeyClass(getNamedOutputKeyClass(this.context, baseFileName));
      job.setOutputValueClass(getNamedOutputValueClass(this.context, baseFileName));
      // Check possible specific context for the output
      setSpecificNamedOutputContext(this.context.getConfiguration(), job, baseFileName);
      TaskAttemptContext taskContext;
      try {
        taskContext = TaskAttemptContextFactory.get(job.getConfiguration(),
            this.context.getTaskAttemptID());
      } catch(Exception e) {
        throw new IOException(e);
      }

      // First we change the output dir for the new OutputFormat that we will
      // create
      // We put it inside the main output work path -> in case the Job fails,
      // everything will be discarded
     
      taskContext.getConfiguration().set("mapred.output.dir",
          baseOutputCommitter.getBaseDir() + "/" + baseFileName);
      // This is for Hadoop 2.0 :
      taskContext.getConfiguration().set("mapreduce.output.fileoutputformat.outputdir",
          baseOutputCommitter.getBaseDir() + "/" + baseFileName);
      context.taskAttemptContext = taskContext;

      // Load the OutputFormat instance
      OutputFormat outputFormat = InstancesDistributor.loadInstance(
          context.taskAttemptContext.getConfiguration(), OutputFormat.class,
          getNamedOutputFormatInstanceFile(this.context, baseFileName), true);
      // We have to create a JobContext for meeting the contract of the
      // OutputFormat
      JobContext jobContext;
      try {
        jobContext = JobContextFactory.get(taskContext.getConfiguration(), taskContext.getJobID());
      } catch(Exception e) {
        throw new IOException(e);
      }

      context.jobContext = jobContext;
      // The contract of the OutputFormat is to check the output specs
      outputFormat.checkOutputSpecs(jobContext);
      // We get the output committer so we can call it later
      context.outputCommitter = outputFormat.getOutputCommitter(taskContext);
      // Save the RecordWriter to cache it
      context.recordWriter = outputFormat.getRecordWriter(taskContext);

      // if counters are enabled, wrap the writer with context
      // to increment counters
      if(countersEnabled) {
        context.recordWriter = new RecordWriterWithCounter(context.recordWriter, baseFileName,
View Full Code Here

    // If not in cache, create a new one
    if(context == null) {

      context = new OutputContext();

      OutputFormat mainOutputFormat;

      try {
        mainOutputFormat = ((OutputFormat) ReflectionUtils.newInstance(
            this.context.getOutputFormatClass(), this.context.getConfiguration()));
      } catch(ClassNotFoundException e1) {
        throw new RuntimeException(e1);
      }

      ProxyOutputCommitter baseOutputCommitter = ((ProxyOutputCommitter) mainOutputFormat
          .getOutputCommitter(this.context));

      // The trick is to create a new Job for each output
      Job job = new Job(this.context.getConfiguration());
      job.setOutputKeyClass(getNamedOutputKeyClass(this.context, baseFileName));
      job.setOutputValueClass(getNamedOutputValueClass(this.context, baseFileName));
      // Check possible specific context for the output
      setSpecificNamedOutputContext(this.context.getConfiguration(), job, baseFileName);
      TaskAttemptContext taskContext;
      try {
        taskContext = TaskAttemptContextFactory.get(job.getConfiguration(),
            this.context.getTaskAttemptID());
      } catch(Exception e) {
        throw new IOException(e);
      }

      // First we change the output dir for the new OutputFormat that we will
      // create
      // We put it inside the main output work path -> in case the Job fails,
      // everything will be discarded
      taskContext.getConfiguration().set("mapred.output.dir",
          baseOutputCommitter.getBaseDir() + "/" + baseFileName);
      // This is for Hadoop 2.0 :
      taskContext.getConfiguration().set("mapreduce.output.fileoutputformat.outputdir",
          baseOutputCommitter.getBaseDir() + "/" + baseFileName);
      context.taskAttemptContext = taskContext;

      // Load the OutputFormat instance
      OutputFormat outputFormat = InstancesDistributor.loadInstance(
          context.taskAttemptContext.getConfiguration(), OutputFormat.class,
          getNamedOutputFormatInstanceFile(this.context, baseFileName), true);
      // We have to create a JobContext for meeting the contract of the
      // OutputFormat
      JobContext jobContext;
      try {
        jobContext = JobContextFactory.get(taskContext.getConfiguration(), taskContext.getJobID());
      } catch(Exception e) {
        throw new IOException(e);
      }

      context.jobContext = jobContext;
      // The contract of the OutputFormat is to check the output specs
      outputFormat.checkOutputSpecs(jobContext);
      // We get the output committer so we can call it later
      context.outputCommitter = outputFormat.getOutputCommitter(taskContext);
      // Save the RecordWriter to cache it
      context.recordWriter = outputFormat.getRecordWriter(taskContext);

      // if counters are enabled, wrap the writer with context
      // to increment counters
      if(countersEnabled) {
        context.recordWriter = new RecordWriterWithCounter(context.recordWriter, baseFileName,
View Full Code Here

     * Define the Input Format and the Output Format!
     */
    InputFormat inputFormat = new TupleTextInputFormat(schema, false, false, '\t',
        TupleTextOutputFormat.NO_QUOTE_CHARACTER, TupleTextOutputFormat.NO_ESCAPE_CHARACTER,
        FieldSelector.NONE, TupleTextInputFormat.NO_NULL_STRING);
    OutputFormat outputFormat = new TupleTextOutputFormat(schema, false, '\t',
        TupleTextOutputFormat.NO_QUOTE_CHARACTER, TupleTextOutputFormat.NO_ESCAPE_CHARACTER);

    builder.addInput(inPath, inputFormat, new IdentityTupleMapper());
    builder.setTupleReducer(new IdentityTupleReducer());
    builder.setOutput(outPath, outputFormat, ITuple.class, NullWritable.class);
View Full Code Here

    /*
     * Define the Input Format and the Output Format!
     */
    InputFormat inputFormat = new TupleTextInputFormat(schema, false, false, ',', '"', '\\',
        FieldSelector.NONE, TupleTextInputFormat.NO_NULL_STRING);
    OutputFormat outputFormat = new TupleTextOutputFormat(schema, false, ',', '"', '\\');

    builder.addInput(inPath, inputFormat, new IdentityTupleMapper());
    builder.setTupleReducer(new IdentityTupleReducer());
    builder.setOutput(outPath, outputFormat, ITuple.class, NullWritable.class);
    try {
View Full Code Here

     * Define the Input Format and the Output Format!
     */
    InputFormat inputFormat = new TupleTextInputFormat(schema, true, false, ' ',
        TupleTextOutputFormat.NO_QUOTE_CHARACTER, TupleTextOutputFormat.NO_ESCAPE_CHARACTER,
        FieldSelector.NONE, TupleTextInputFormat.NO_NULL_STRING);
    OutputFormat outputFormat = new TupleTextOutputFormat(schema, true, ' ',
        TupleTextOutputFormat.NO_QUOTE_CHARACTER, TupleTextOutputFormat.NO_ESCAPE_CHARACTER);

    builder.addInput(inPath, inputFormat, new IdentityTupleMapper());
    builder.setTupleReducer(new IdentityTupleReducer());
    builder.setOutput(outPath, outputFormat, ITuple.class, NullWritable.class);
View Full Code Here

    // Define the Input Format and the Output Format!
    // Add the selector to the input format
    InputFormat inputFormat = new TupleTextInputFormat(schema, false, false, ' ',
        TupleTextOutputFormat.NO_QUOTE_CHARACTER, TupleTextOutputFormat.NO_ESCAPE_CHARACTER, selector,
        TupleTextInputFormat.NO_NULL_STRING);
    OutputFormat outputFormat = new TupleTextOutputFormat(schema, false, ' ',
        TupleTextOutputFormat.NO_QUOTE_CHARACTER, TupleTextOutputFormat.NO_ESCAPE_CHARACTER);

    builder.addInput(inPath, inputFormat, new IdentityTupleMapper());
    builder.setTupleReducer(new IdentityTupleReducer());
    builder.setOutput(outPath, outputFormat, ITuple.class, NullWritable.class);
View Full Code Here

TOP

Related Classes of org.apache.hadoop.mapreduce.OutputFormat

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.