Package com.tinkerpop.gremlin.giraph.hdfs

Examples of com.tinkerpop.gremlin.giraph.hdfs.GremlinWritableIterator


        mapReduce.storeState(apacheConfiguration);
        ConfUtil.mergeApacheIntoHadoopConfiguration(apacheConfiguration, newConfiguration);
        if (!mapReduce.doStage(MapReduce.Stage.MAP)) {
            final Path memoryPath = new Path(configuration.get(Constants.GREMLIN_GIRAPH_OUTPUT_LOCATION) + "/" + mapReduce.getMemoryKey());
            if (newConfiguration.getClass(Constants.GREMLIN_GIRAPH_MEMORY_OUTPUT_FORMAT_CLASS, SequenceFileOutputFormat.class, OutputFormat.class).equals(SequenceFileOutputFormat.class))
                mapReduce.addResultToMemory(memory, new GremlinWritableIterator(configuration, memoryPath));
            else
                GiraphGraphComputer.LOGGER.warn(SEQUENCE_WARNING);
        } else {
            final Optional<Comparator<?>> mapSort = mapReduce.getMapKeySort();
            final Optional<Comparator<?>> reduceSort = mapReduce.getReduceKeySort();

            newConfiguration.setClass(Constants.GRELMIN_GIRAPH_MAP_REDUCE_CLASS, mapReduce.getClass(), MapReduce.class);
            final Job job = new Job(newConfiguration, mapReduce.toString());
            GiraphGraphComputer.LOGGER.info(Constants.GIRAPH_GREMLIN_JOB_PREFIX + mapReduce.toString());
            job.setJarByClass(GiraphGraph.class);
            if (mapSort.isPresent()) job.setSortComparatorClass(GremlinWritableComparator.GremlinWritableMapComparator.class);
            job.setMapperClass(GiraphMap.class);
            if (mapReduce.doStage(MapReduce.Stage.REDUCE)) {
                if (mapReduce.doStage(MapReduce.Stage.COMBINE))
                    job.setCombinerClass(GiraphCombine.class);
                job.setReducerClass(GiraphReduce.class);
            } else {
                if (mapSort.isPresent()) {
                    job.setReducerClass(Reducer.class);
                } else {
                    job.setNumReduceTasks(0);
                }
            }
            job.setMapOutputKeyClass(GremlinWritable.class);
            job.setMapOutputValueClass(GremlinWritable.class);
            job.setOutputKeyClass(GremlinWritable.class);
            job.setOutputValueClass(GremlinWritable.class);
            job.setInputFormatClass(ConfUtil.getInputFormatFromVertexInputFormat((Class) newConfiguration.getClass(Constants.GIRAPH_VERTEX_INPUT_FORMAT_CLASS, VertexInputFormat.class)));
            job.setOutputFormatClass(newConfiguration.getClass(Constants.GREMLIN_GIRAPH_MEMORY_OUTPUT_FORMAT_CLASS, SequenceFileOutputFormat.class, OutputFormat.class)); // TODO: Make this configurable
            // if there is no vertex program, then grab the graph from the input location
            final Path graphPath = configuration.get(VertexProgram.VERTEX_PROGRAM, null) != null ?
                    new Path(newConfiguration.get(Constants.GREMLIN_GIRAPH_OUTPUT_LOCATION) + "/" + Constants.SYSTEM_G) :
                    new Path(newConfiguration.get(Constants.GREMLIN_GIRAPH_INPUT_LOCATION));
            Path memoryPath = new Path(newConfiguration.get(Constants.GREMLIN_GIRAPH_OUTPUT_LOCATION) + "/" + (reduceSort.isPresent() ? mapReduce.getMemoryKey() + "-temp" : mapReduce.getMemoryKey()));
            if (FileSystem.get(newConfiguration).exists(memoryPath)) {
                FileSystem.get(newConfiguration).delete(memoryPath, true);
            }
            FileInputFormat.setInputPaths(job, graphPath);
            FileOutputFormat.setOutputPath(job, memoryPath);
            job.waitForCompletion(true);


            // if there is a reduce sort, we need to run another identity MapReduce job
            if (reduceSort.isPresent()) {
                final Job reduceSortJob = new Job(newConfiguration, "ReduceKeySort");
                reduceSortJob.setSortComparatorClass(GremlinWritableComparator.GremlinWritableReduceComparator.class);
                reduceSortJob.setMapperClass(Mapper.class);
                reduceSortJob.setReducerClass(Reducer.class);
                reduceSortJob.setMapOutputKeyClass(GremlinWritable.class);
                reduceSortJob.setMapOutputValueClass(GremlinWritable.class);
                reduceSortJob.setOutputKeyClass(GremlinWritable.class);
                reduceSortJob.setOutputValueClass(GremlinWritable.class);
                reduceSortJob.setInputFormatClass(SequenceFileInputFormat.class); // TODO: require this hard coded? If so, ERROR messages needed.
                reduceSortJob.setOutputFormatClass(newConfiguration.getClass(Constants.GREMLIN_GIRAPH_MEMORY_OUTPUT_FORMAT_CLASS, SequenceFileOutputFormat.class, OutputFormat.class));
                FileInputFormat.setInputPaths(reduceSortJob, memoryPath);
                final Path sortedMemoryPath = new Path(newConfiguration.get(Constants.GREMLIN_GIRAPH_OUTPUT_LOCATION) + "/" + mapReduce.getMemoryKey());
                FileOutputFormat.setOutputPath(reduceSortJob, sortedMemoryPath);
                reduceSortJob.waitForCompletion(true);
                FileSystem.get(newConfiguration).delete(memoryPath, true); // delete the temporary memory path
                memoryPath = sortedMemoryPath;
            }

            // if its not a SequenceFile there is no certain way to convert to necessary Java objects.
            // to get results you have to look through HDFS directory structure. Oh the horror.
            if (newConfiguration.getClass(Constants.GREMLIN_GIRAPH_MEMORY_OUTPUT_FORMAT_CLASS, SequenceFileOutputFormat.class, OutputFormat.class).equals(SequenceFileOutputFormat.class))
                mapReduce.addResultToMemory(memory, new GremlinWritableIterator(configuration, memoryPath));
            else
                GiraphGraphComputer.LOGGER.warn(SEQUENCE_WARNING);
        }
    }
View Full Code Here

TOP

Related Classes of com.tinkerpop.gremlin.giraph.hdfs.GremlinWritableIterator

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.