HashMultimap<Target, NodePath> outputPaths = HashMultimap.create();
for (Vertex v : component) {
if (v.isInput()) {
for (Edge e : v.getOutgoingEdges()) {
for (NodePath nodePath : e.getNodePaths()) {
PCollectionImpl target = nodePath.tail();
for (Target t : outputs.get(target)) {
outputPaths.put(t, nodePath);
}
}
}
}
}
if (outputPaths.isEmpty()) {
throw new IllegalStateException("No outputs?");
}
JobPrototype prototype = JobPrototype.createMapOnlyJob(
++lastJobID, outputPaths, pipeline.createTempPath());
for (Vertex v : component) {
assignment.put(v, prototype);
}
} else {
Set<Edge> usedEdges = Sets.newHashSet();
for (Vertex g : gbks) {
Set<NodePath> inputs = Sets.newHashSet();
HashMultimap<Target, NodePath> mapSideOutputPaths = HashMultimap.create();
for (Edge e : g.getIncomingEdges()) {
inputs.addAll(e.getNodePaths());
usedEdges.add(e);
if (e.getHead().isInput()) {
for (Edge ep : e.getHead().getOutgoingEdges()) {
if (ep.getTail().isOutput() && !usedEdges.contains(ep)) { // map-side output
for (Target t : outputs.get(ep.getTail().getPCollection())) {
mapSideOutputPaths.putAll(t, ep.getNodePaths());
}
usedEdges.add(ep);
}
}
}
}
JobPrototype prototype = JobPrototype.createMapReduceJob(
++lastJobID, (PGroupedTableImpl) g.getPCollection(), inputs, pipeline.createTempPath());
prototype.addMapSideOutputs(mapSideOutputPaths);
assignment.put(g, prototype);
for (Edge e : g.getIncomingEdges()) {
assignment.put(e.getHead(), prototype);
if (e.getHead().isInput()) {
for (Edge ep : e.getHead().getOutgoingEdges()) {
if (ep.getTail().isOutput() && !assignment.containsKey(ep.getTail())) { // map-side output
assignment.put(ep.getTail(), prototype);
}
}
}
}
HashMultimap<Target, NodePath> outputPaths = HashMultimap.create();
for (Edge e : g.getOutgoingEdges()) {
Vertex output = e.getTail();
for (Target t : outputs.get(output.getPCollection())) {
outputPaths.putAll(t, e.getNodePaths());
}
assignment.put(output, prototype);
usedEdges.add(e);
}
prototype.addReducePaths(outputPaths);
}
// Check for any un-assigned vertices, which should be map-side outputs
// that we will need to run in a map-only job.
HashMultimap<Target, NodePath> outputPaths = HashMultimap.create();
Set<Vertex> orphans = Sets.newHashSet();
for (Vertex v : component) {
// Check if this vertex has multiple inputs but only a subset of
// them have already been assigned
boolean vertexHasUnassignedIncomingEdges = false;
if (v.isOutput()) {
for (Edge e : v.getIncomingEdges()) {
if (!usedEdges.contains(e)) {
vertexHasUnassignedIncomingEdges = true;
}
}
}
if (v.isOutput() && (vertexHasUnassignedIncomingEdges || !assignment.containsKey(v))) {
orphans.add(v);
for (Edge e : v.getIncomingEdges()) {
if (vertexHasUnassignedIncomingEdges && usedEdges.contains(e)) {
// We've already dealt with this incoming edge
continue;
}
orphans.add(e.getHead());
for (NodePath nodePath : e.getNodePaths()) {
PCollectionImpl target = nodePath.tail();
for (Target t : outputs.get(target)) {
outputPaths.put(t, nodePath);
}
}
}