break;
}
}
if (!hasInputs) {
LOG.warn("No input sources for pipeline, nothing to do...");
return new MRExecutor(conf, jarClass, outputs, toMaterialize);
}
// Create a new graph that splits up up dependent GBK nodes.
Graph graph = prepareFinalGraph(baseGraph);
// Break the graph up into connected components.
List<List<Vertex>> components = graph.connectedComponents();
// For each component, we will create one or more job prototypes,
// depending on its profile.
// For dependency handling, we only need to care about which
// job prototype a particular GBK is assigned to.
Multimap<Vertex, JobPrototype> newAssignments = HashMultimap.create();
for (List<Vertex> component : components) {
newAssignments.putAll(constructJobPrototypes(component, components.size()));
}
// Add in the job dependency information here.
for (Map.Entry<Vertex, JobPrototype> e : newAssignments.entries()) {
JobPrototype current = e.getValue();
List<Vertex> parents = graph.getParents(e.getKey());
for (Vertex parent : parents) {
for (JobPrototype parentJobProto : newAssignments.get(parent)) {
current.addDependency(parentJobProto);
}
}
}
ImmutableMultimap<Target, JobPrototype> previousStages = ImmutableMultimap.copyOf(assignments);
for (Map.Entry<Vertex, JobPrototype> e : newAssignments.entries()) {
if (e.getKey().isOutput()) {
PCollectionImpl<?> pcollect = e.getKey().getPCollection();
JobPrototype current = e.getValue();
// Add in implicit dependencies via SourceTargets that are read into memory
for (Target pt : pcollect.getTargetDependencies()) {
for (JobPrototype parentJobProto : assignments.get(pt)) {
current.addDependency(parentJobProto);
}
}
// Add this to the set of output assignments
for (Target t : outputs.get(pcollect)) {
assignments.put(t, e.getValue());
}
} else {
Source source = e.getKey().getSource();
if (source != null && source instanceof Target) {
JobPrototype current = e.getValue();
Collection<JobPrototype> parentJobPrototypes = previousStages.get((Target) source);
if (parentJobPrototypes != null) {
for (JobPrototype parentJobProto : parentJobPrototypes) {
current.addDependency(parentJobProto);
}
}
}
}
}
// Remove completed outputs and mark materialized output locations
// for subsequent job processing.
for (PCollectionImpl<?> output : currentStage) {
if (toMaterialize.containsKey(output)) {
MaterializableIterable mi = toMaterialize.get(output);
if (mi.isSourceTarget()) {
output.materializeAt((SourceTarget) mi.getSource());
}
}
targetDeps.remove(output);
}
}
// Finally, construct the jobs from the prototypes and return.
DotfileWriter dotfileWriter = new DotfileWriter();
MRExecutor exec = new MRExecutor(conf, jarClass, outputs, toMaterialize);
for (JobPrototype proto : Sets.newHashSet(assignments.values())) {
dotfileWriter.addJobPrototype(proto);
exec.addJob(proto.getCrunchJob(jarClass, conf, pipeline, lastJobID));
}
String planDotFile = dotfileWriter.buildDotfile();
exec.setPlanDotFile(planDotFile);
conf.set(PlanningParameters.PIPELINE_PLAN_DOTFILE, planDotFile);
return exec;
}