Package org.apache.hadoop.mapreduce

Examples of org.apache.hadoop.mapreduce.InputFormat


    conf1.set(DUMMY_KEY, "STATE1");
    TaskAttemptContext context1 = new TaskAttemptContextImpl(conf1, taskId);

    // This will create a CombineFileRecordReader that itself contains a
    // DummyRecordReader.
    InputFormat inputFormat = new ChildRRInputFormat();

    Path [] files = { new Path("file1") };
    long [] lengths = { 1 };

    CombineFileSplit split = new CombineFileSplit(files, lengths);

    RecordReader rr = inputFormat.createRecordReader(split, context1);
    assertTrue("Unexpected RR type!", rr instanceof CombineFileRecordReader);

    // Verify that the initial configuration is the one being used.
    // Right after construction the dummy key should have value "STATE1"
    assertEquals("Invalid initial dummy key value", "STATE1",
View Full Code Here


    Configuration conf = new Configuration();
    TaskAttemptContext context = new TaskAttemptContextImpl(conf, taskId);

    // This will create a CombineFileRecordReader that itself contains a
    // DummyRecordReader.
    InputFormat inputFormat = new ChildRRInputFormat();

    Path [] files = { new Path("file1"), new Path("file2") };
    long [] lengths = { 1, 1 };

    CombineFileSplit split = new CombineFileSplit(files, lengths);
    RecordReader rr = inputFormat.createRecordReader(split, context);
    assertTrue("Unexpected RR type!", rr instanceof CombineFileRecordReader);

    // first initialize() call comes from MapTask. We'll do it here.
    rr.initialize(split, context);
View Full Code Here

    conf1.set(DUMMY_KEY, "STATE1");
    TaskAttemptContext context1 = new TaskAttemptContextImpl(conf1, taskId);

    // This will create a CombineFileRecordReader that itself contains a
    // DummyRecordReader.
    InputFormat inputFormat = new ChildRRInputFormat();

    Path [] files = { new Path("file1") };
    long [] lengths = { 1 };

    CombineFileSplit split = new CombineFileSplit(files, lengths);

    RecordReader rr = inputFormat.createRecordReader(split, context1);
    assertTrue("Unexpected RR type!", rr instanceof CombineFileRecordReader);

    // Verify that the initial configuration is the one being used.
    // Right after construction the dummy key should have value "STATE1"
    assertEquals("Invalid initial dummy key value", "STATE1",
View Full Code Here

    Configuration conf = new Configuration();
    TaskAttemptContext context = new TaskAttemptContextImpl(conf, taskId);

    // This will create a CombineFileRecordReader that itself contains a
    // DummyRecordReader.
    InputFormat inputFormat = new ChildRRInputFormat();

    Path [] files = { new Path("file1"), new Path("file2") };
    long [] lengths = { 1, 1 };

    CombineFileSplit split = new CombineFileSplit(files, lengths);
    RecordReader rr = inputFormat.createRecordReader(split, context);
    assertTrue("Unexpected RR type!", rr instanceof CombineFileRecordReader);

    // first initialize() call comes from MapTask. We'll do it here.
    rr.initialize(split, context);
View Full Code Here

    Map<Path, String> formatMap = PangoolMultipleInputs.getInputFormatMap(job);
    Map<Path, String> mapperMap = PangoolMultipleInputs.getInputProcessorFileMap(job);

    for(Map.Entry<Path, String> entry : formatMap.entrySet()) {
      FileInputFormat.setInputPaths(jobCopy, entry.getKey());
      InputFormat inputFormat = InstancesDistributor.loadInstance(conf, InputFormat.class,
          entry.getValue(), true);
      PangoolMultipleInputs.setSpecificInputContext(jobCopy.getConfiguration(), entry.getValue());
      List<InputSplit> pathSplits = inputFormat.getSplits(jobCopy);
      for(InputSplit pathSplit : pathSplits) {
        splits.add(new TaggedInputSplit(pathSplit, conf, entry.getValue(), mapperMap
            .get(entry.getKey())));
      }
    }
View Full Code Here

        PigInputFormat.mergeSplitSpecificConf(loadFunc, pigSplit, conf);
       
        // for backward compatibility
        PigInputFormat.sJob = conf;
       
        InputFormat inputFormat = loadFunc.getInputFormat();
       
        List<Long> inpLimitLists =
                (ArrayList<Long>)ObjectSerializer.deserialize(
                        conf.get("pig.inpLimits"));
       
View Full Code Here

                        inputSpecificJob);
                // The above setLocation call could write to the conf within
                // the inputSpecificJob - use this updated conf
               
                // get the InputFormat from it and ask for splits
                InputFormat inpFormat = loadFunc.getInputFormat();
                List<InputSplit> oneInputSplits = inpFormat.getSplits(
                        HadoopShims.createJobContext(inputSpecificJob.getConfiguration(),
                                jobcontext.getJobID()));
                List<InputSplit> oneInputPigSplits = getPigSplits(
                        oneInputSplits, i, inpTargets.get(i), fs.getDefaultBlockSize(), combinable, confClone);
                splits.addAll(oneInputPigSplits);
View Full Code Here

                            LoadFunc loader = (LoadFunc) PigContext
                            .instantiateFuncFromSpec(ld.getLFile()
                                    .getFuncSpec());
                            Job job = new Job(conf);
                            loader.setLocation(location, job);
                            InputFormat inf = loader.getInputFormat();
                            List<InputSplit> splits = inf.getSplits(HadoopShims.cloneJobContext(job));
                            List<List<InputSplit>> results = MapRedUtil
                            .getCombinePigSplits(splits, fs
                                    .getDefaultBlockSize(), conf);
                            numFiles += results.size();
                        } else {
View Full Code Here

    Map<Path, InputFormat> m = new HashMap<Path, InputFormat>();
    Configuration conf = job.getConfiguration();
    String[] pathMappings = conf.get(DIR_FORMATS).split(",");
    for (String pathMapping : pathMappings) {
      String[] split = pathMapping.split(";");
      InputFormat inputFormat;
      try {
       inputFormat = (InputFormat) ReflectionUtils.newInstance(conf
           .getClassByName(split[1]), conf);
      } catch (ClassNotFoundException e) {
       throw new RuntimeException(e);
View Full Code Here

    }

    for (Entry<Class<? extends InputFormat>, List<Path>> formatEntry :
        formatPaths.entrySet()) {
      Class<? extends InputFormat> formatClass = formatEntry.getKey();
      InputFormat format = (InputFormat) ReflectionUtils.newInstance(
         formatClass, conf);
      List<Path> paths = formatEntry.getValue();

      Map<Class<? extends Mapper>, List<Path>> mapperPaths
          = new HashMap<Class<? extends Mapper>, List<Path>>();

      // Now, for each set of paths that have a common InputFormat, build
      // a map of Mappers to the paths they're used for
      for (Path path : paths) {
       Class<? extends Mapper> mapperClass = mapperMap.get(path);
       if (!mapperPaths.containsKey(mapperClass)) {
         mapperPaths.put(mapperClass, new LinkedList<Path>());
       }

       mapperPaths.get(mapperClass).add(path);
      }

      // Now each set of paths that has a common InputFormat and Mapper can
      // be added to the same job, and split together.
      for (Entry<Class<? extends Mapper>, List<Path>> mapEntry :
          mapperPaths.entrySet()) {
       paths = mapEntry.getValue();
       Class<? extends Mapper> mapperClass = mapEntry.getKey();

       if (mapperClass == null) {
         try {
           mapperClass = job.getMapperClass();
         } catch (ClassNotFoundException e) {
           throw new IOException("Mapper class is not found", e);
         }
       }

       FileInputFormat.setInputPaths(jobCopy, paths.toArray(new Path[paths
           .size()]));

       // Get splits for each input path and tag with InputFormat
       // and Mapper types by wrapping in a TaggedInputSplit.
       List<InputSplit> pathSplits = format.getSplits(jobCopy);
       for (InputSplit pathSplit : pathSplits) {
         splits.add(new TaggedInputSplit(pathSplit, conf, format.getClass(),
             mapperClass));
       }
      }
    }
View Full Code Here

TOP

Related Classes of org.apache.hadoop.mapreduce.InputFormat

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.