Package org.apache.hadoop.mapreduce

Examples of org.apache.hadoop.mapreduce.Job


        conf.set("mapred.output.compression.type", "BLOCK");
        conf.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec");
        conf.set(SetJoinMapper.INPUTS+".1", Joiner.on(",").join(o.left));
        conf.set(SetJoinMapper.INPUTS+".2", Joiner.on(",").join(o.right));

        Job job=new Job(conf,"diffFacts");
        job.setJarByClass(this.getClass());
        job.setMapperClass(TextSimpleJoinMapper.class);
        job.setReducerClass(DiffFactReducer.class);
        job.setGroupingComparatorClass(TaggedTextKeyGroupComparator.class);
        job.setPartitionerClass(TaggedKeyPartitioner.class);

        if(o.reducerCount<1) {
            o.reducerCount=1;
        }

        job.setNumReduceTasks(o.reducerCount);

        job.setMapOutputKeyClass(TaggedTextItem.class);
        job.setMapOutputValueClass(VIntWritable.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        for(String path: Iterables.concat(o.left, o.right)) {
            FileInputFormat.addInputPath(job, new Path(path));
        }

        FileOutputFormat.setOutputPath(job, new Path(o.output));
        FileOutputFormat.setCompressOutput(job, true);
        FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
        job.setOutputFormatClass(TextOutputFormat.class);
        return job.waitForCompletion(true) ? 0 : 1;
    }
View Full Code Here


            conf.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec");
            conf.set(SetJoinMapper.INPUTS+".1",inputA);
            for(String path:paths)
                conf.set(SetJoinMapper.INPUTS+".2",path);

            Job job=new Job(conf,"fetchTriplesWithMatchingObjects");
            job.setJarByClass(this.getClass());
            job.setMapperClass(FetchTriplesWithMatchingObjectsMapper.class);
            job.setReducerClass(AcceptWithMatchingKeyReducer.class);
            job.setGroupingComparatorClass(TaggedTextKeyGroupComparator.class);
            job.setPartitionerClass(TaggedKeyPartitioner.class);

            if(reduceTasks==null) {
                reduceTasks=1;    // about right for AWS runs
            }

            job.setNumReduceTasks(reduceTasks);

            job.setMapOutputKeyClass(TaggedTextItem.class);
            job.setMapOutputValueClass(TaggedTextItem.class);
            job.setOutputKeyClass(NullWritable.class);
            job.setOutputValueClass(Text.class);


            FileInputFormat.addInputPath(job, new Path(inputA));
            for(String path:paths)
                FileInputFormat.addInputPath(job, new Path(path));

            FileOutputFormat.setOutputPath(job, new Path(output));
            FileOutputFormat.setCompressOutput(job, true);
            FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
            job.setOutputFormatClass(TextOutputFormat.class);

            return job.waitForCompletion(true) ? 0 : 1;
        } catch(Main.IncorrectUsageException iue) {
            return 2;
        }
    }
View Full Code Here

            conf.set("mapred.compress.map.output", "true");
            conf.set("mapred.output.compression.type", "BLOCK");
            conf.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec");

            Job job=new Job(conf,getJobName());
            job.setSpeculativeExecution(false);
            job.setJarByClass(this.getClass());
            job.setMapperClass(getMapperClass());
            job.setReducerClass(Uniq.class);

            if(reduceTasks==null) {
                reduceTasks=29;    // about right for AWS runs
            }

            job.setNumReduceTasks(reduceTasks);

            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(LongWritable.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(LongWritable.class);

            for(String input:paths) {
                FileInputFormat.addInputPath(job, new Path(input));
            }

            FileOutputFormat.setOutputPath(job, new Path(output));
            FileOutputFormat.setCompressOutput(job, true);
            FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);

            // Gotcha -- this has to run before the definitions above associated with the output format because
            // this is going to be configured against the job as it stands a moment from now

            job.setOutputFormatClass(TextOutputFormat.class);

            return job.waitForCompletion(true) ? 0 : 1;
        } catch(Main.IncorrectUsageException iue) {
            return 2;
        }
    }
View Full Code Here

            conf.set("mapred.output.compression.type", "BLOCK");
            conf.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec");
            conf.set(SetJoinMapper.INPUTS+".1",inputA);
            conf.set(SetJoinMapper.INPUTS+".2",inputB);

            Job job=new Job(conf,"setDifference");
            job.setJarByClass(this.getClass());
            job.setMapperClass(TextSimpleJoinMapper.class);
            job.setReducerClass(SetDifferenceReducer.class);
            job.setGroupingComparatorClass(TaggedTextKeyGroupComparator.class);
            job.setPartitionerClass(TaggedKeyPartitioner.class);

            if(reduceTasks==null) {
                reduceTasks=1;    // about right for AWS runs
            }

            job.setNumReduceTasks(reduceTasks);

            job.setMapOutputKeyClass(TaggedTextItem.class);
            job.setMapOutputValueClass(VIntWritable.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(NullWritable.class);


            FileInputFormat.addInputPath(job, new Path(inputA));
            FileInputFormat.addInputPath(job, new Path(inputB));

            FileOutputFormat.setOutputPath(job, new Path(output));
            FileOutputFormat.setCompressOutput(job, true);
            FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);

            // Gotcha -- this has to run before the definitions above associated with the output format because
            // this is going to be configured against the job as it stands a moment from now

            job.setOutputFormatClass(TextOutputFormat.class);

            return job.waitForCompletion(true) ? 0 : 1;
        } catch(Main.IncorrectUsageException iue) {
            return 2;
        }
    }
View Full Code Here

    @Override
    protected int execute(String[] args) throws Exception {
        Configuration conf = getConf();
        conf.set(StageConstants.PROP_BATCH_ID, getBatchId());
        conf.set(StageConstants.PROP_FLOW_ID, getFlowId());
        Job job = createJob(conf);
        return submit(job);
    }
View Full Code Here

     */
    public Job createJob(Configuration conf) throws IOException, InterruptedException {
        if (conf == null) {
            throw new IllegalArgumentException("conf must not be null"); //$NON-NLS-1$
        }
        Job job = JobCompatibility.newJob(conf);
        VariableTable variables = getPathParser(job.getConfiguration());
        configureJobInfo(job, variables);
        configureStageInput(job, variables);
        configureStageOutput(job, variables);
        configureShuffle(job, variables);
        configureStageResource(job, variables);
View Full Code Here

     */
    public static Job newJob(Configuration conf) throws IOException {
        if (conf == null) {
            throw new IllegalArgumentException("conf must not be null"); //$NON-NLS-1$
        }
        return new Job(conf);
    }
View Full Code Here

        assert name != null;
        assert formatClass != null;
        assert keyClass != null;
        assert valueClass != null;
        assert counters != null;
        Job job = JobCompatibility.newJob(context.getConfiguration());
        job.setOutputFormatClass(formatClass);
        job.setOutputKeyClass(keyClass);
        job.setOutputValueClass(valueClass);
        TaskAttemptContext localContext = JobCompatibility.newTaskAttemptContext(
                job.getConfiguration(),
                context.getTaskAttemptID());
        if (FileOutputFormat.class.isAssignableFrom(formatClass)) {
            setOutputFilePrefix(localContext, name);
        }
        OutputFormat<?, ?> format = ReflectionUtils.newInstance(
View Full Code Here

                getNextDirectory()));
        storage.getFileSystem().delete(getNextDirectory(), true);
    }

    private void update() throws IOException, InterruptedException {
        Job job = JobCompatibility.newJob(getConf());
        job.setJobName("TGC-UPDATE-" + storage.getPatchDirectory());

        List<StageInput> inputList = new ArrayList<StageInput>();
        inputList.add(new StageInput(
                storage.getHeadContents("*").toString(),
                TemporaryInputFormat.class,
                BaseMapper.class));
        inputList.add(new StageInput(
                storage.getPatchContents("*").toString(),
                TemporaryInputFormat.class,
                PatchMapper.class));
        StageInputDriver.set(job, inputList);
        job.setInputFormatClass(StageInputFormat.class);
        job.setMapperClass(StageInputMapper.class);
        job.setMapOutputKeyClass(PatchApplyKey.class);
        job.setMapOutputValueClass(modelClass);

        // combiner may have no effect in normal cases
        job.setReducerClass(PatchApplyReducer.class);
        job.setOutputKeyClass(NullWritable.class);
        job.setOutputValueClass(modelClass);
        job.setPartitionerClass(PatchApplyKey.Partitioner.class);
        job.setSortComparatorClass(PatchApplyKey.SortComparator.class);
        job.setGroupingComparatorClass(PatchApplyKey.GroupComparator.class);

        TemporaryOutputFormat.setOutputPath(job, getNextDirectory());
        job.setOutputFormatClass(TemporaryOutputFormat.class);
        job.getConfiguration().setClass(
                "mapred.output.committer.class",
                LegacyBridgeOutputCommitter.class,
                org.apache.hadoop.mapred.OutputCommitter.class);

        LOG.info(MessageFormat.format("Applying patch: {0} / {1} -> {2}",
                storage.getPatchContents("*"),
                storage.getHeadContents("*"),
                getNextContents()));
        try {
            boolean succeed = job.waitForCompletion(true);
            LOG.info(MessageFormat.format("Applied patch: succeed={0}, {1} / {2} -> {3}",
                    succeed,
                    storage.getPatchContents("*"),
                    storage.getHeadContents("*"),
                    getNextContents()));
View Full Code Here

                false,
                storage.getConfiguration());
    }

    private void create() throws InterruptedException, IOException {
        Job job = JobCompatibility.newJob(getConf());
        job.setJobName("TGC-CREATE-" + storage.getPatchDirectory());

        List<StageInput> inputList = new ArrayList<StageInput>();
        inputList.add(new StageInput(
                storage.getPatchContents("*").toString(),
                TemporaryInputFormat.class,
                DeleteMapper.class));
        StageInputDriver.set(job, inputList);
        job.setInputFormatClass(StageInputFormat.class);
        job.setMapperClass(StageInputMapper.class);
        job.setMapOutputKeyClass(NullWritable.class);
        job.setMapOutputValueClass(modelClass);
        job.setOutputKeyClass(NullWritable.class);
        job.setOutputValueClass(modelClass);

        TemporaryOutputFormat.setOutputPath(job, getNextDirectory());
        job.setOutputFormatClass(TemporaryOutputFormat.class);
        job.getConfiguration().setClass(
                "mapred.output.committer.class",
                LegacyBridgeOutputCommitter.class,
                org.apache.hadoop.mapred.OutputCommitter.class);

        job.setNumReduceTasks(0);

        LOG.info(MessageFormat.format("Applying patch: {0} / (empty) -> {2}",
                storage.getPatchContents("*"),
                storage.getHeadContents("*"),
                getNextContents()));
        try {
            boolean succeed = job.waitForCompletion(true);
            LOG.info(MessageFormat.format("Applied patch: succeed={0}, {1} / (empty) -> {3}",
                    succeed,
                    storage.getPatchContents("*"),
                    storage.getHeadContents("*"),
                    getNextContents()));
View Full Code Here

TOP

Related Classes of org.apache.hadoop.mapreduce.Job

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.