RoutingNode node = routingNodes.node(discoNode.id());
if (node == null) {
continue;
}
Decision decision = allocation.deciders().canAllocate(shard, node, allocation);
if (decision.type() == Decision.Type.THROTTLE) {
throttledNodes.add(discoNode);
} else if (decision.type() == Decision.Type.NO) {
noNodes.add(discoNode);
} else {
if (logger.isDebugEnabled()) {
logger.debug("[{}][{}]: allocating [{}] to [{}] on primary allocation", shard.index(), shard.id(), shard, discoNode);
}
// we found a match
changed = true;
// make sure we create one with the version from the recovered state
allocation.routingNodes().assign(new MutableShardRouting(shard, highestVersion), node.nodeId());
unassignedIterator.remove();
// found a node, so no throttling, no "no", and break out of the loop
throttledNodes.clear();
noNodes.clear();
break;
}
}
if (throttledNodes.isEmpty()) {
// if we have a node that we "can't" allocate to, force allocation, since this is our master data!
if (!noNodes.isEmpty()) {
DiscoveryNode discoNode = noNodes.iterator().next();
RoutingNode node = routingNodes.node(discoNode.id());
if (logger.isDebugEnabled()) {
logger.debug("[{}][{}]: forcing allocating [{}] to [{}] on primary allocation", shard.index(), shard.id(), shard, discoNode);
}
// we found a match
changed = true;
// make sure we create one with the version from the recovered state
allocation.routingNodes().assign(new MutableShardRouting(shard, highestVersion), node.nodeId());
unassignedIterator.remove();
}
} else {
if (logger.isDebugEnabled()) {
logger.debug("[{}][{}]: throttling allocation [{}] to [{}] on primary allocation", shard.index(), shard.id(), shard, throttledNodes);
}
// we are throttling this, but we have enough to allocate to this node, ignore it for now
unassignedIterator.remove();
routingNodes.ignoredUnassigned().add(shard);
}
}
if (!routingNodes.hasUnassigned()) {
return changed;
}
// Now, handle replicas, try to assign them to nodes that are similar to the one the primary was allocated on
unassignedIterator = routingNodes.unassigned().iterator();
while (unassignedIterator.hasNext()) {
MutableShardRouting shard = unassignedIterator.next();
// pre-check if it can be allocated to any node that currently exists, so we won't list the store for it for nothing
boolean canBeAllocatedToAtLeastOneNode = false;
for (ObjectCursor<DiscoveryNode> cursor : nodes.dataNodes().values()) {
RoutingNode node = routingNodes.node(cursor.value.id());
if (node == null) {
continue;
}
// if we can't allocate it on a node, ignore it, for example, this handles
// cases for only allocating a replica after a primary
Decision decision = allocation.deciders().canAllocate(shard, node, allocation);
if (decision.type() == Decision.Type.YES) {
canBeAllocatedToAtLeastOneNode = true;
break;
}
}
if (!canBeAllocatedToAtLeastOneNode) {
continue;
}
Map<DiscoveryNode, TransportNodesListShardStoreMetaData.StoreFilesMetaData> shardStores = buildShardStores(nodes, shard);
long lastSizeMatched = 0;
DiscoveryNode lastDiscoNodeMatched = null;
RoutingNode lastNodeMatched = null;
for (Map.Entry<DiscoveryNode, TransportNodesListShardStoreMetaData.StoreFilesMetaData> nodeStoreEntry : shardStores.entrySet()) {
DiscoveryNode discoNode = nodeStoreEntry.getKey();
TransportNodesListShardStoreMetaData.StoreFilesMetaData storeFilesMetaData = nodeStoreEntry.getValue();
logger.trace("{}: checking node [{}]", shard, discoNode);
if (storeFilesMetaData == null) {
// already allocated on that node...
continue;
}
RoutingNode node = routingNodes.node(discoNode.id());
if (node == null) {
continue;
}
// check if we can allocate on that node...
// we only check for NO, since if this node is THROTTLING and it has enough "same data"
// then we will try and assign it next time
Decision decision = allocation.deciders().canAllocate(shard, node, allocation);
if (decision.type() == Decision.Type.NO) {
continue;
}
// if it is already allocated, we can't assign to it...
if (storeFilesMetaData.allocated()) {
continue;
}
if (!shard.primary()) {
MutableShardRouting primaryShard = routingNodes.activePrimary(shard);
if (primaryShard != null) {
assert primaryShard.active();
DiscoveryNode primaryNode = nodes.get(primaryShard.currentNodeId());
if (primaryNode != null) {
TransportNodesListShardStoreMetaData.StoreFilesMetaData primaryNodeStore = shardStores.get(primaryNode);
if (primaryNodeStore != null && primaryNodeStore.allocated()) {
long sizeMatched = 0;
for (StoreFileMetaData storeFileMetaData : storeFilesMetaData) {
if (primaryNodeStore.fileExists(storeFileMetaData.name()) && primaryNodeStore.file(storeFileMetaData.name()).isSame(storeFileMetaData)) {
sizeMatched += storeFileMetaData.length();
}
}
if (sizeMatched > lastSizeMatched) {
lastSizeMatched = sizeMatched;
lastDiscoNodeMatched = discoNode;
lastNodeMatched = node;
}
}
}
}
}
}
if (lastNodeMatched != null) {
// we only check on THROTTLE since we checked before before on NO
Decision decision = allocation.deciders().canAllocate(shard, lastNodeMatched, allocation);
if (decision.type() == Decision.Type.THROTTLE) {
if (logger.isTraceEnabled()) {
logger.debug("[{}][{}]: throttling allocation [{}] to [{}] in order to reuse its unallocated persistent store with total_size [{}]", shard.index(), shard.id(), shard, lastDiscoNodeMatched, new ByteSizeValue(lastSizeMatched));
}
// we are throttling this, but we have enough to allocate to this node, ignore it for now
unassignedIterator.remove();