Examples of org.apache.hadoop.mapreduce.TaskAttemptContext

org.apache.hadoop.mapreduce.TaskAttemptContext
The context for task attempts.

    conf.setInt(org.apache.hadoop.mapreduce.lib.input.
        LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
    assertTrue("unexpected test data at " + testFile,
        testFileSize > firstSplitLength);


    TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());


    // read the data without splitting to count the records
    FileSplit split = new FileSplit(testFilePath, 0, testFileSize,
        (String[])null);
    LineRecordReader reader = new LineRecordReader();

View Full Code Here

    File testFile = new File(testFileUrl.getFile());
    long testFileSize = testFile.length();
    Path testFilePath = new Path(testFile.getAbsolutePath());
    Configuration conf = new Configuration();
    conf.setInt("io.file.buffer.size", 1);
    TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());


    // Gather the records returned by the record reader
    ArrayList<String> records = new ArrayList<String>();


    long offset = 0;

View Full Code Here

    long testFileSize = testFile.length();
    Configuration conf = new Configuration();
    conf.setInt(org.apache.hadoop.mapreduce.lib.input.
        LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);


    TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());


    // read the data and check whether BOM is skipped
    FileSplit split = new FileSplit(testFilePath, 0, testFileSize,
        (String[])null);
    LineRecordReader reader = new LineRecordReader();

View Full Code Here

    //
    // Old Hadoop API
    //
    public org.apache.hadoop.mapred.InputSplit[] getSplits(JobConf jobConf, int numSplits) throws IOException
    {
        TaskAttemptContext tac = new TaskAttemptContext(jobConf, new TaskAttemptID());
        List<org.apache.hadoop.mapreduce.InputSplit> newInputSplits = this.getSplits(tac);
        org.apache.hadoop.mapred.InputSplit[] oldInputSplits = new org.apache.hadoop.mapred.InputSplit[newInputSplits.size()];
        for (int i = 0; i < newInputSplits.size(); i++)
            oldInputSplits[i] = (ColumnFamilySplit)newInputSplits.get(i);
        return oldInputSplits;

View Full Code Here

        return oldInputSplits;
    }


    public org.apache.hadoop.mapred.RecordReader<ByteBuffer, SortedMap<ByteBuffer, IColumn>> getRecordReader(org.apache.hadoop.mapred.InputSplit split, JobConf jobConf, final Reporter reporter) throws IOException
    {
        TaskAttemptContext tac = new TaskAttemptContext(jobConf, TaskAttemptID.forName(jobConf.get(MAPRED_TASK_ID)))
        {
            @Override
            public void progress()
            {
                reporter.progress();

View Full Code Here

      ArrayList<K> samples = new ArrayList<K>(numSamples);
      int splitsToSample = Math.min(maxSplitsSampled, splits.size());
      int samplesPerSplit = numSamples / splitsToSample;
      long records = 0;
      for (int i = 0; i < splitsToSample; ++i) {
        TaskAttemptContext samplingContext = getTaskAttemptContext(job);
        RecordReader<K,V> reader = inf.createRecordReader(
            splits.get(i), samplingContext);
        reader.initialize(splits.get(i), samplingContext);
        while (reader.nextKeyValue()) {
          samples.add(ReflectionUtils.copy(job.getConfiguration(),

View Full Code Here

      // our target rate is in terms of the maximum number of sample splits,
      // but we accept the possibility of sampling additional splits to hit
      // the target sample keyset
      for (int i = 0; i < splitsToSample ||
                     (i < splits.size() && samples.size() < numSamples); ++i) {
        TaskAttemptContext samplingContext = getTaskAttemptContext(job);
        RecordReader<K,V> reader = inf.createRecordReader(
            splits.get(i), samplingContext);
        reader.initialize(splits.get(i), samplingContext);
        while (reader.nextKeyValue()) {
          if (r.nextDouble() <= freq) {

View Full Code Here

      ArrayList<K> samples = new ArrayList<K>();
      int splitsToSample = Math.min(maxSplitsSampled, splits.size());
      long records = 0;
      long kept = 0;
      for (int i = 0; i < splitsToSample; ++i) {
        TaskAttemptContext samplingContext = getTaskAttemptContext(job);
        RecordReader<K,V> reader = inf.createRecordReader(
            splits.get(i), samplingContext);
        reader.initialize(splits.get(i), samplingContext);
        while (reader.nextKeyValue()) {
          ++records;

View Full Code Here

    @Override
    public RecordWriter<String, Tuple> 
    getRecordWriter(TaskAttemptContext context
                ) throws IOException, InterruptedException {
    
      final TaskAttemptContext ctx = context;
        
      return new RecordWriter<String, Tuple>() {


        private Map<String, MyLineRecordWriter> storeMap = 
              new HashMap<String, MyLineRecordWriter>();
          
        private static final int BUFFER_SIZE = 1024;
          
        private ByteArrayOutputStream mOut = 
              new ByteArrayOutputStream(BUFFER_SIZE);
                           
        @Override
        public void write(String key, Tuple val) throws IOException {                
          int sz = val.size();
          for (int i = 0; i < sz; i++) {
            Object field;
            try {
              field = val.get(i);
            } catch (ExecException ee) {
              throw ee;
            }


            StorageUtil.putField(mOut, field);


            if (i != sz - 1) {
              mOut.write(fieldDel);
            }
          }
              
          getStore(key).write(null, new Text(mOut.toByteArray()));


          mOut.reset();
        }


        @Override
        public void close(TaskAttemptContext context) throws IOException { 
          for (MyLineRecordWriter out : storeMap.values()) {
            out.close(context);
          }
        }
      
        private MyLineRecordWriter getStore(String fieldValue) throws IOException {
          MyLineRecordWriter store = storeMap.get(fieldValue);
          if (store == null) {                  
            DataOutputStream os = createOutputStream(fieldValue);
            store = new MyLineRecordWriter(os, keyValueSeparator);
            storeMap.put(fieldValue, store);
          }
          return store;
        }
          
        private DataOutputStream createOutputStream(String fieldValue) throws IOException {
          Configuration conf = ctx.getConfiguration();
          TaskID taskId = ctx.getTaskAttemptID().getTaskID();
          Path path = new Path(fieldValue, fieldValue + '-' 
                  + NumberFormat.getInstance().format(taskId.getId()));
          Path workOutputPath = ((FileOutputCommitter)getOutputCommitter(ctx)).getWorkPath();
          Path file = new Path(workOutputPath, path);
          FileSystem fs = file.getFileSystem(conf);

View Full Code Here

        POStore store = new POStore(new OperatorKey());
        store.setSFile(new FileSpec(file, storeFuncSpec));
        PigOutputFormat.setLocation(jc, store);
        OutputCommitter oc;
        // create a simulated TaskAttemptContext
        TaskAttemptContext tac = new TaskAttemptContext(conf, new TaskAttemptID());
        PigOutputFormat.setLocation(tac, store);
        RecordWriter<?,?> rw ;
        try {
            of.checkOutputSpecs(jc);
            oc = of.getOutputCommitter(tac);

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.hadoop.mapreduce.TaskAttemptContext

co.gridport.kafka.hadoop.KafkaOutputFormat

com.asakusafw.runtime.compatibility.JobCompatibility

com.datasalt.pangool.tuplemr.mapred.lib.output.PangoolMultipleOutputs

com.datasalt.pangool.tuplemr.mapred.lib.output.ProxyOutputFormat

com.facebook.giraph.hive.input.benchmark.InputBenchmark

com.facebook.giraph.hive.tailer.Tailer

com.facebook.hiveio.benchmark.InputBenchmark

com.facebook.hiveio.input.HiveInput$RecordIterator

com.facebook.hiveio.output.FaultyCheckOutputSpecsTest

com.hadoop.mapreduce.TestLzoTextInputFormat

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.