}
Set<Group> initialGroups = new HashSet<Group>();
for(List<Node> colocate: _colocate.values()) {
Group g = new Group(graph, colocate);
boltNodes.removeAll(colocate);
initialGroups.add(g);
}
for(Node n: boltNodes) {
initialGroups.add(new Group(graph, n));
}
GraphGrouper grouper = new GraphGrouper(graph, initialGroups);
grouper.mergeFully();
Collection<Group> mergedGroups = grouper.getAllGroups();
// add identity partitions between groups
for(IndexedEdge<Node> e: new HashSet<IndexedEdge>(graph.edgeSet())) {
if(!(e.source instanceof PartitionNode) && !(e.target instanceof PartitionNode)) {
Group g1 = grouper.nodeGroup(e.source);
Group g2 = grouper.nodeGroup(e.target);
// g1 being null means the source is a spout node
if(g1==null && !(e.source instanceof SpoutNode))
throw new RuntimeException("Planner exception: Null source group must indicate a spout node at this phase of planning");
if(g1==null || !g1.equals(g2)) {
graph.removeEdge(e);
PartitionNode pNode = makeIdentityPartition(e.source);
graph.addVertex(pNode);
graph.addEdge(e.source, pNode, new IndexedEdge(e.source, pNode, 0));
graph.addEdge(pNode, e.target, new IndexedEdge(pNode, e.target, e.index));
}
}
}
// if one group subscribes to the same stream with same partitioning multiple times,
// merge those together (otherwise can end up with many output streams created for that partitioning
// if need to split into multiple output streams because of same input having different
// partitioning to the group)
// this is because can't currently merge splitting logic into a spout
// not the most kosher algorithm here, since the grouper indexes are being trounced via the adding of nodes to random groups, but it
// works out
List<Node> forNewGroups = new ArrayList<Node>();
for(Group g: mergedGroups) {
for(PartitionNode n: extraPartitionInputs(g)) {
Node idNode = makeIdentityNode(n.allOutputFields);
Node newPartitionNode = new PartitionNode(idNode.streamId, n.name, idNode.allOutputFields, n.thriftGrouping);
Node parentNode = TridentUtils.getParent(graph, n);
Set<IndexedEdge> outgoing = graph.outgoingEdgesOf(n);
graph.removeVertex(n);
graph.addVertex(idNode);
graph.addVertex(newPartitionNode);
addEdge(graph, parentNode, idNode, 0);
addEdge(graph, idNode, newPartitionNode, 0);
for(IndexedEdge e: outgoing) {
addEdge(graph, newPartitionNode, e.target, e.index);
}
Group parentGroup = grouper.nodeGroup(parentNode);
if(parentGroup==null) {
forNewGroups.add(idNode);
} else {
parentGroup.nodes.add(idNode);
}
}
}
// TODO: in the future, want a way to include this logic in the spout itself,
// or make it unecessary by having storm include metadata about which grouping a tuple
// came from
for(Node n: forNewGroups) {
grouper.addGroup(new Group(graph, n));
}
// add in spouts as groups so we can get parallelisms
for(Node n: spoutNodes) {
grouper.addGroup(new Group(graph, n));
}
grouper.reindex();
mergedGroups = grouper.getAllGroups();