mapReduce.storeState(apacheConfiguration);
ConfUtil.mergeApacheIntoHadoopConfiguration(apacheConfiguration, newConfiguration);
if (!mapReduce.doStage(MapReduce.Stage.MAP)) {
final Path memoryPath = new Path(configuration.get(Constants.GREMLIN_GIRAPH_OUTPUT_LOCATION) + "/" + mapReduce.getMemoryKey());
if (newConfiguration.getClass(Constants.GREMLIN_GIRAPH_MEMORY_OUTPUT_FORMAT_CLASS, SequenceFileOutputFormat.class, OutputFormat.class).equals(SequenceFileOutputFormat.class))
mapReduce.addResultToMemory(memory, new GremlinWritableIterator(configuration, memoryPath));
else
GiraphGraphComputer.LOGGER.warn(SEQUENCE_WARNING);
} else {
final Optional<Comparator<?>> mapSort = mapReduce.getMapKeySort();
final Optional<Comparator<?>> reduceSort = mapReduce.getReduceKeySort();
newConfiguration.setClass(Constants.GRELMIN_GIRAPH_MAP_REDUCE_CLASS, mapReduce.getClass(), MapReduce.class);
final Job job = new Job(newConfiguration, mapReduce.toString());
GiraphGraphComputer.LOGGER.info(Constants.GIRAPH_GREMLIN_JOB_PREFIX + mapReduce.toString());
job.setJarByClass(GiraphGraph.class);
if (mapSort.isPresent()) job.setSortComparatorClass(GremlinWritableComparator.GremlinWritableMapComparator.class);
job.setMapperClass(GiraphMap.class);
if (mapReduce.doStage(MapReduce.Stage.REDUCE)) {
if (mapReduce.doStage(MapReduce.Stage.COMBINE))
job.setCombinerClass(GiraphCombine.class);
job.setReducerClass(GiraphReduce.class);
} else {
if (mapSort.isPresent()) {
job.setReducerClass(Reducer.class);
} else {
job.setNumReduceTasks(0);
}
}
job.setMapOutputKeyClass(GremlinWritable.class);
job.setMapOutputValueClass(GremlinWritable.class);
job.setOutputKeyClass(GremlinWritable.class);
job.setOutputValueClass(GremlinWritable.class);
job.setInputFormatClass(ConfUtil.getInputFormatFromVertexInputFormat((Class) newConfiguration.getClass(Constants.GIRAPH_VERTEX_INPUT_FORMAT_CLASS, VertexInputFormat.class)));
job.setOutputFormatClass(newConfiguration.getClass(Constants.GREMLIN_GIRAPH_MEMORY_OUTPUT_FORMAT_CLASS, SequenceFileOutputFormat.class, OutputFormat.class)); // TODO: Make this configurable
// if there is no vertex program, then grab the graph from the input location
final Path graphPath = configuration.get(VertexProgram.VERTEX_PROGRAM, null) != null ?
new Path(newConfiguration.get(Constants.GREMLIN_GIRAPH_OUTPUT_LOCATION) + "/" + Constants.SYSTEM_G) :
new Path(newConfiguration.get(Constants.GREMLIN_GIRAPH_INPUT_LOCATION));
Path memoryPath = new Path(newConfiguration.get(Constants.GREMLIN_GIRAPH_OUTPUT_LOCATION) + "/" + (reduceSort.isPresent() ? mapReduce.getMemoryKey() + "-temp" : mapReduce.getMemoryKey()));
if (FileSystem.get(newConfiguration).exists(memoryPath)) {
FileSystem.get(newConfiguration).delete(memoryPath, true);
}
FileInputFormat.setInputPaths(job, graphPath);
FileOutputFormat.setOutputPath(job, memoryPath);
job.waitForCompletion(true);
// if there is a reduce sort, we need to run another identity MapReduce job
if (reduceSort.isPresent()) {
final Job reduceSortJob = new Job(newConfiguration, "ReduceKeySort");
reduceSortJob.setSortComparatorClass(GremlinWritableComparator.GremlinWritableReduceComparator.class);
reduceSortJob.setMapperClass(Mapper.class);
reduceSortJob.setReducerClass(Reducer.class);
reduceSortJob.setMapOutputKeyClass(GremlinWritable.class);
reduceSortJob.setMapOutputValueClass(GremlinWritable.class);
reduceSortJob.setOutputKeyClass(GremlinWritable.class);
reduceSortJob.setOutputValueClass(GremlinWritable.class);
reduceSortJob.setInputFormatClass(SequenceFileInputFormat.class); // TODO: require this hard coded? If so, ERROR messages needed.
reduceSortJob.setOutputFormatClass(newConfiguration.getClass(Constants.GREMLIN_GIRAPH_MEMORY_OUTPUT_FORMAT_CLASS, SequenceFileOutputFormat.class, OutputFormat.class));
FileInputFormat.setInputPaths(reduceSortJob, memoryPath);
final Path sortedMemoryPath = new Path(newConfiguration.get(Constants.GREMLIN_GIRAPH_OUTPUT_LOCATION) + "/" + mapReduce.getMemoryKey());
FileOutputFormat.setOutputPath(reduceSortJob, sortedMemoryPath);
reduceSortJob.waitForCompletion(true);
FileSystem.get(newConfiguration).delete(memoryPath, true); // delete the temporary memory path
memoryPath = sortedMemoryPath;
}
// if its not a SequenceFile there is no certain way to convert to necessary Java objects.
// to get results you have to look through HDFS directory structure. Oh the horror.
if (newConfiguration.getClass(Constants.GREMLIN_GIRAPH_MEMORY_OUTPUT_FORMAT_CLASS, SequenceFileOutputFormat.class, OutputFormat.class).equals(SequenceFileOutputFormat.class))
mapReduce.addResultToMemory(memory, new GremlinWritableIterator(configuration, memoryPath));
else
GiraphGraphComputer.LOGGER.warn(SEQUENCE_WARNING);
}
}