}
@Override public boolean allocateUnassigned(NodeAllocations nodeAllocations, RoutingAllocation allocation) {
boolean changed = false;
DiscoveryNodes nodes = allocation.nodes();
RoutingNodes routingNodes = allocation.routingNodes();
if (nodes.dataNodes().isEmpty()) {
return changed;
}
if (!routingNodes.hasUnassigned()) {
return changed;
}
Iterator<MutableShardRouting> unassignedIterator = routingNodes.unassigned().iterator();
while (unassignedIterator.hasNext()) {
MutableShardRouting shard = unassignedIterator.next();
// pre-check if it can be allocated to any node that currently exists, so we won't list the store for it for nothing
boolean canBeAllocatedToAtLeastOneNode = false;
for (DiscoveryNode discoNode : nodes.dataNodes().values()) {
RoutingNode node = routingNodes.node(discoNode.id());
if (node == null) {
continue;
}
// if its THROTTLING, we are not going to allocate it to this node, so ignore it as well
if (nodeAllocations.canAllocate(shard, node, allocation).allocate()) {
canBeAllocatedToAtLeastOneNode = true;
break;
}
}
if (!canBeAllocatedToAtLeastOneNode) {
continue;
}
Map<DiscoveryNode, TransportNodesListShardStoreMetaData.StoreFilesMetaData> shardStores = buildShardStores(nodes, shard);
long lastSizeMatched = 0;
DiscoveryNode lastDiscoNodeMatched = null;
RoutingNode lastNodeMatched = null;
for (Map.Entry<DiscoveryNode, TransportNodesListShardStoreMetaData.StoreFilesMetaData> nodeStoreEntry : shardStores.entrySet()) {
DiscoveryNode discoNode = nodeStoreEntry.getKey();
TransportNodesListShardStoreMetaData.StoreFilesMetaData storeFilesMetaData = nodeStoreEntry.getValue();
logger.trace("{}: checking node [{}]", shard, discoNode);
if (storeFilesMetaData == null) {
// already allocated on that node...
continue;
}
RoutingNode node = routingNodes.node(discoNode.id());
if (node == null) {
continue;
}
// check if we can allocate on that node...
// we only check for NO, since if this node is THROTTLING and it has enough "same data"
// then we will try and assign it next time
if (nodeAllocations.canAllocate(shard, node, allocation) == Decision.NO) {
continue;
}
// if it is already allocated, we can't assign to it...
if (storeFilesMetaData.allocated()) {
continue;
}
// if its a primary, it will be recovered from the gateway, find one that is closet to it
if (shard.primary()) {
try {
CommitPoint commitPoint = cachedCommitPoints.get(shard.shardId());
if (commitPoint == null) {
commitPoint = ((BlobStoreGateway) ((InternalNode) this.node).injector().getInstance(Gateway.class)).findCommitPoint(shard.index(), shard.id());
if (commitPoint != null) {
cachedCommitPoints.put(shard.shardId(), commitPoint);
} else {
cachedCommitPoints.put(shard.shardId(), CommitPoint.NULL);
}
} else if (commitPoint == CommitPoint.NULL) {
commitPoint = null;
}
if (commitPoint == null) {
break;
}
long sizeMatched = 0;
for (StoreFileMetaData storeFileMetaData : storeFilesMetaData) {
CommitPoint.FileInfo fileInfo = commitPoint.findPhysicalIndexFile(storeFileMetaData.name());
if (fileInfo != null) {
if (fileInfo.isSame(storeFileMetaData)) {
logger.trace("{}: [{}] reusing file since it exists on remote node and on gateway", shard, storeFileMetaData.name());
sizeMatched += storeFileMetaData.length();
} else {
logger.trace("{}: [{}] ignore file since it exists on remote node and on gateway but is different", shard, storeFileMetaData.name());
}
} else {
logger.trace("{}: [{}] exists on remote node, does not exists on gateway", shard, storeFileMetaData.name());
}
}
if (sizeMatched > lastSizeMatched) {
lastSizeMatched = sizeMatched;
lastDiscoNodeMatched = discoNode;
lastNodeMatched = node;
logger.trace("{}: node elected for pre_allocation [{}], total_size_matched [{}]", shard, discoNode, new ByteSizeValue(sizeMatched));
} else {
logger.trace("{}: node ignored for pre_allocation [{}], total_size_matched [{}] smaller than last_size_matched [{}]", shard, discoNode, new ByteSizeValue(sizeMatched), new ByteSizeValue(lastSizeMatched));
}
} catch (Exception e) {
// failed, log and try and allocate based on size
logger.debug("Failed to guess allocation of primary based on gateway for " + shard, e);
}
} else {
// if its backup, see if there is a primary that *is* allocated, and try and assign a location that is closest to it
// note, since we replicate operations, this might not be the same (different flush intervals)
MutableShardRouting primaryShard = routingNodes.findPrimaryForReplica(shard);
if (primaryShard != null && primaryShard.active()) {
DiscoveryNode primaryNode = nodes.get(primaryShard.currentNodeId());
if (primaryNode != null) {
TransportNodesListShardStoreMetaData.StoreFilesMetaData primaryNodeStore = shardStores.get(primaryNode);
if (primaryNodeStore != null && primaryNodeStore.allocated()) {
long sizeMatched = 0;