Package org.apache.hadoop.mapreduce

Examples of org.apache.hadoop.mapreduce.InputFormat


   */
  @SuppressWarnings("unchecked") // getInputFormat, getOutputKeyComparator
  public static <K,V> void writePartitionFile(Job job, Sampler<K,V> sampler)
      throws IOException, ClassNotFoundException, InterruptedException {
    Configuration conf = job.getConfiguration();
    final InputFormat inf =
        ReflectionUtils.newInstance(job.getInputFormatClass(), conf);
    int numPartitions = job.getNumReduceTasks();
    K[] samples = sampler.getSample(inf, job);
    LOG.info("Using " + samples.length + " samples");
    RawComparator<K> comparator =
View Full Code Here


    conf1.set(DUMMY_KEY, "STATE1");
    TaskAttemptContext context1 = new TaskAttemptContextImpl(conf1, taskId);

    // This will create a CombineFileRecordReader that itself contains a
    // DummyRecordReader.
    InputFormat inputFormat = new ChildRRInputFormat();

    Path [] files = { new Path("file1") };
    long [] lengths = { 1 };

    CombineFileSplit split = new CombineFileSplit(files, lengths);

    RecordReader rr = inputFormat.createRecordReader(split, context1);
    assertTrue("Unexpected RR type!", rr instanceof CombineFileRecordReader);

    // Verify that the initial configuration is the one being used.
    // Right after construction the dummy key should have value "STATE1"
    assertEquals("Invalid initial dummy key value", "STATE1",
View Full Code Here

    Configuration conf = new Configuration();
    TaskAttemptContext context = new TaskAttemptContextImpl(conf, taskId);

    // This will create a CombineFileRecordReader that itself contains a
    // DummyRecordReader.
    InputFormat inputFormat = new ChildRRInputFormat();

    Path [] files = { new Path("file1"), new Path("file2") };
    long [] lengths = { 1, 1 };

    CombineFileSplit split = new CombineFileSplit(files, lengths);
    RecordReader rr = inputFormat.createRecordReader(split, context);
    assertTrue("Unexpected RR type!", rr instanceof CombineFileRecordReader);

    // first initialize() call comes from MapTask. We'll do it here.
    rr.initialize(split, context);
View Full Code Here

    }
  }
 
  public void testGetInputFormat() throws IOException
  {
    InputFormat f = underTest.getInputFormat();
    assertEquals(f.getClass(), SequenceFileInputFormat.class);
  }
View Full Code Here

    assertTrue(reader == underTest.reader);
  }
 
  public void testGetInputFormat() throws IOException
  {
    InputFormat f = underTest.getInputFormat();
    assertEquals(f.getClass(), SequenceFileInputFormat.class);
  }
View Full Code Here

        PigInputFormat.mergeSplitSpecificConf(loadFunc, pigSplit, conf);
       
        // for backward compatibility
        PigInputFormat.sJob = conf;
       
        InputFormat inputFormat = loadFunc.getInputFormat();
       
        return new PigRecordReader(inputFormat, pigSplit, loadFunc, context);
    }
View Full Code Here

                        inputSpecificJob);
                // The above setLocation call could write to the conf within
                // the inputSpecificJob - use this updated conf
               
                // get the InputFormat from it and ask for splits
                InputFormat inpFormat = loadFunc.getInputFormat();
                List<InputSplit> oneInputSplits = inpFormat.getSplits(
                        HadoopShims.createJobContext(inputSpecificJob.getConfiguration(),
                                jobcontext.getJobID()));
                List<InputSplit> oneInputPigSplits = getPigSplits(
                        oneInputSplits, i, inpTargets.get(i), fs.getDefaultBlockSize(), combinable, confClone);
                splits.addAll(oneInputPigSplits);
View Full Code Here

                            LoadFunc loader = (LoadFunc) PigContext
                            .instantiateFuncFromSpec(ld.getLFile()
                                    .getFuncSpec());
                            Job job = new Job(conf);
                            loader.setLocation(location, job);
                            InputFormat inf = loader.getInputFormat();
                            List<InputSplit> splits = inf.getSplits(HadoopShims.cloneJobContext(job));
                            List<List<InputSplit>> results = MapRedUtil
                            .getCombinePigSplits(splits, fs
                                    .getDefaultBlockSize(), conf);
                            numFiles += results.size();
                        } else {
View Full Code Here

      writer.write("str1" + " " + "str2" + " " + "30" + " " + "4000" + "\n");
    }
    writer.close();

    Schema schema = new Schema("schema", Fields.parse("a:string, b:string, c:int, d:long"));
    InputFormat inputFormat = new TupleTextInputFormat(schema, false, false, ' ',
        TupleTextInputFormat.NO_QUOTE_CHARACTER, TupleTextInputFormat.NO_ESCAPE_CHARACTER,
        FieldSelector.NONE, TupleTextInputFormat.NO_NULL_STRING);

    Configuration conf = getConf();
    conf.setLong("mapred.min.split.size", 10 * 1024);
 
View Full Code Here

  }

  @Test
  public void testInputCompression() throws Exception {
    Schema schema = new Schema("schema", Fields.parse("a:string, b:string, c:int, d:long"));
    InputFormat inputFormat = new TupleTextInputFormat(schema, false, false, ' ',
        TupleTextInputFormat.NO_QUOTE_CHARACTER, TupleTextInputFormat.NO_ESCAPE_CHARACTER,
        FieldSelector.NONE, TupleTextInputFormat.NO_NULL_STRING);

    Configuration conf = getConf();
    FileSystem fS = FileSystem.get(conf);
View Full Code Here

TOP

Related Classes of org.apache.hadoop.mapreduce.InputFormat

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.