// not the most kosher algorithm here, since the grouper indexes are being trounced via the adding of nodes to random groups, but it
// works out
List<Node> forNewGroups = new ArrayList<Node>();
for(Group g: mergedGroups) {
for(PartitionNode n: extraPartitionInputs(g)) {
Node idNode = makeIdentityNode(n.allOutputFields);
Node newPartitionNode = new PartitionNode(idNode.streamId, n.name, idNode.allOutputFields, n.thriftGrouping);
Node parentNode = TridentUtils.getParent(graph, n);
Set<IndexedEdge> outgoing = graph.outgoingEdgesOf(n);
graph.removeVertex(n);
graph.addVertex(idNode);
graph.addVertex(newPartitionNode);
addEdge(graph, parentNode, idNode, 0);
addEdge(graph, idNode, newPartitionNode, 0);
for(IndexedEdge e: outgoing) {
addEdge(graph, newPartitionNode, e.target, e.index);
}
Group parentGroup = grouper.nodeGroup(parentNode);
if(parentGroup==null) {
forNewGroups.add(idNode);
} else {
parentGroup.nodes.add(idNode);
}
}
}
// TODO: in the future, want a way to include this logic in the spout itself,
// or make it unecessary by having storm include metadata about which grouping a tuple
// came from
for(Node n: forNewGroups) {
grouper.addGroup(new Group(graph, n));
}
// add in spouts as groups so we can get parallelisms
for(Node n: spoutNodes) {
grouper.addGroup(new Group(graph, n));
}
grouper.reindex();
mergedGroups = grouper.getAllGroups();
Map<Node, String> batchGroupMap = new HashMap();
List<Set<Node>> connectedComponents = new ConnectivityInspector<Node, IndexedEdge>(graph).connectedSets();
for(int i=0; i<connectedComponents.size(); i++) {
String groupId = "bg" + i;
for(Node n: connectedComponents.get(i)) {
batchGroupMap.put(n, groupId);
}
}
// System.out.println("GRAPH:");
// System.out.println(graph);
Map<Group, Integer> parallelisms = getGroupParallelisms(graph, grouper, mergedGroups);
TridentTopologyBuilder builder = new TridentTopologyBuilder();
Map<Node, String> spoutIds = genSpoutIds(spoutNodes);
Map<Group, String> boltIds = genBoltIds(mergedGroups);
for(SpoutNode sn: spoutNodes) {
Integer parallelism = parallelisms.get(grouper.nodeGroup(sn));
if(sn.type == SpoutNode.SpoutType.DRPC) {
builder.setBatchPerTupleSpout(spoutIds.get(sn), sn.streamId,
(IRichSpout) sn.spout, parallelism, batchGroupMap.get(sn));
} else {
ITridentSpout s;
if(sn.spout instanceof IBatchSpout) {
s = new BatchSpoutExecutor((IBatchSpout)sn.spout);
} else if(sn.spout instanceof ITridentSpout) {
s = (ITridentSpout) sn.spout;
} else {
throw new RuntimeException("Regular rich spouts not supported yet... try wrapping in a RichSpoutBatchExecutor");
// TODO: handle regular rich spout without batches (need lots of updates to support this throughout)
}
builder.setSpout(spoutIds.get(sn), sn.streamId, sn.txId, s, parallelism, batchGroupMap.get(sn));
}
}
for(Group g: mergedGroups) {
if(!isSpoutGroup(g)) {
Integer p = parallelisms.get(g);
Map<String, String> streamToGroup = getOutputStreamBatchGroups(g, batchGroupMap);
BoltDeclarer d = builder.setBolt(boltIds.get(g), new SubtopologyBolt(graph, g.nodes, batchGroupMap), p,
committerBatches(g, batchGroupMap), streamToGroup);
Collection<PartitionNode> inputs = uniquedSubscriptions(externalGroupInputs(g));
for(PartitionNode n: inputs) {
Node parent = TridentUtils.getParent(graph, n);
String componentId;
if(parent instanceof SpoutNode) {
componentId = spoutIds.get(parent);
} else {
componentId = boltIds.get(grouper.nodeGroup(parent));