Skip to content

Commit 791d35f

Browse files
committed
Reduce state size in awareness allocation decider (#70063)
Today the awareness allocation decider computes the number of nodes and shards in each zone, even though it only needs to know the names of the zones and the number of shards in the current zone. This commit drops the unnecessary state in the computation.
1 parent 4939388 commit 791d35f

File tree

3 files changed

+50
-58
lines changed

3 files changed

+50
-58
lines changed

server/src/main/java/org/elasticsearch/cluster/routing/RoutingNodes.java

Lines changed: 11 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88

99
package org.elasticsearch.cluster.routing;
1010

11-
import com.carrotsearch.hppc.ObjectIntHashMap;
1211
import com.carrotsearch.hppc.cursors.ObjectCursor;
1312
import org.apache.logging.log4j.Logger;
1413
import org.apache.lucene.util.CollectionUtil;
@@ -19,6 +18,7 @@
1918
import org.elasticsearch.cluster.node.DiscoveryNode;
2019
import org.elasticsearch.cluster.routing.UnassignedInfo.AllocationStatus;
2120
import org.elasticsearch.cluster.routing.allocation.ExistingShardsAllocator;
21+
import org.elasticsearch.cluster.service.MasterService;
2222
import org.elasticsearch.common.Nullable;
2323
import org.elasticsearch.common.Randomness;
2424
import org.elasticsearch.common.collect.Tuple;
@@ -37,9 +37,12 @@
3737
import java.util.ListIterator;
3838
import java.util.Map;
3939
import java.util.NoSuchElementException;
40+
import java.util.Objects;
4041
import java.util.Queue;
4142
import java.util.Set;
4243
import java.util.function.Predicate;
44+
import java.util.stream.Collectors;
45+
import java.util.stream.StreamSupport;
4346

4447
/**
4548
* {@link RoutingNodes} represents a copy the routing information contained in the {@link ClusterState cluster state}.
@@ -70,7 +73,7 @@ public class RoutingNodes implements Iterable<RoutingNode> {
7073

7174
private int relocatingShards = 0;
7275

73-
private final Map<String, ObjectIntHashMap<String>> nodesPerAttributeNames = new HashMap<>();
76+
private final Map<String, Set<String>> attributeValuesByAttribute = new HashMap<>();
7477
private final Map<String, Recoveries> recoveriesPerNode = new HashMap<>();
7578

7679
public RoutingNodes(ClusterState clusterState) {
@@ -229,20 +232,12 @@ public RoutingNode node(String nodeId) {
229232
return nodesToShards.get(nodeId);
230233
}
231234

232-
public ObjectIntHashMap<String> nodesPerAttributesCounts(String attributeName) {
233-
ObjectIntHashMap<String> nodesPerAttributesCounts = nodesPerAttributeNames.get(attributeName);
234-
if (nodesPerAttributesCounts != null) {
235-
return nodesPerAttributesCounts;
236-
}
237-
nodesPerAttributesCounts = new ObjectIntHashMap<>();
238-
for (RoutingNode routingNode : this) {
239-
String attrValue = routingNode.node().getAttributes().get(attributeName);
240-
if (attrValue != null) {
241-
nodesPerAttributesCounts.addTo(attrValue, 1);
242-
}
243-
}
244-
nodesPerAttributeNames.put(attributeName, nodesPerAttributesCounts);
245-
return nodesPerAttributesCounts;
235+
public Set<String> getAttributeValues(String attributeName) {
236+
// Only ever accessed on the master service thread so no need for synchronization
237+
assert MasterService.isMasterUpdateThread() || Thread.currentThread().getName().startsWith("TEST-")
238+
: Thread.currentThread().getName() + " should be the master service thread";
239+
return attributeValuesByAttribute.computeIfAbsent(attributeName, ignored -> StreamSupport.stream(this.spliterator(), false)
240+
.map(r -> r.node().getAttributes().get(attributeName)).filter(Objects::nonNull).collect(Collectors.toSet()));
246241
}
247242

248243
/**

server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/AwarenessAllocationDecider.java

Lines changed: 37 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88

99
package org.elasticsearch.cluster.routing.allocation.decider;
1010

11-
import com.carrotsearch.hppc.ObjectIntHashMap;
1211
import org.elasticsearch.cluster.metadata.IndexMetadata;
1312
import org.elasticsearch.cluster.routing.RoutingNode;
1413
import org.elasticsearch.cluster.routing.ShardRouting;
@@ -22,8 +21,9 @@
2221
import java.util.HashMap;
2322
import java.util.List;
2423
import java.util.Map;
24+
import java.util.Set;
2525
import java.util.function.Function;
26-
import java.util.stream.StreamSupport;
26+
import java.util.stream.Stream;
2727

2828
import static java.util.Collections.emptyList;
2929
import static java.util.stream.Collectors.toList;
@@ -133,70 +133,67 @@ private Decision underCapacity(ShardRouting shardRouting, RoutingNode node, Rout
133133
}
134134

135135
final boolean debug = allocation.debugDecision();
136-
IndexMetadata indexMetadata = allocation.metadata().getIndexSafe(shardRouting.index());
136+
final IndexMetadata indexMetadata = allocation.metadata().getIndexSafe(shardRouting.index());
137137

138138
if (INDEX_AUTO_EXPAND_REPLICAS_SETTING.get(indexMetadata.getSettings()).expandToAllNodes()) {
139139
return YES_AUTO_EXPAND_ALL;
140140
}
141141

142-
int shardCount = indexMetadata.getNumberOfReplicas() + 1; // 1 for primary
142+
final int shardCount = indexMetadata.getNumberOfReplicas() + 1; // 1 for primary
143143
for (String awarenessAttribute : awarenessAttributes) {
144144
// the node the shard exists on must be associated with an awareness attribute
145145
if (node.node().getAttributes().containsKey(awarenessAttribute) == false) {
146146
return debug ? debugNoMissingAttribute(awarenessAttribute, awarenessAttributes) : Decision.NO;
147147
}
148148

149-
// build attr_value -> nodes map
150-
ObjectIntHashMap<String> nodesPerAttribute = allocation.routingNodes().nodesPerAttributesCounts(awarenessAttribute);
149+
final Set<String> actualAttributeValues = allocation.routingNodes().getAttributeValues(awarenessAttribute);
150+
final String targetAttributeValue = node.node().getAttributes().get(awarenessAttribute);
151+
assert targetAttributeValue != null : "attribute [" + awarenessAttribute + "] missing on " + node.node();
152+
assert actualAttributeValues.contains(targetAttributeValue)
153+
: "attribute [" + awarenessAttribute + "] on " + node.node() + " is not in " + actualAttributeValues;
154+
155+
int shardsForTargetAttributeValue = 0;
156+
// Will be the count of shards on nodes with attribute `awarenessAttribute` matching the one on `node`.
151157

152-
// build the count of shards per attribute value
153-
ObjectIntHashMap<String> shardPerAttribute = new ObjectIntHashMap<>();
154158
for (ShardRouting assignedShard : allocation.routingNodes().assignedShards(shardRouting.shardId())) {
155159
if (assignedShard.started() || assignedShard.initializing()) {
156160
// Note: this also counts relocation targets as that will be the new location of the shard.
157161
// Relocation sources should not be counted as the shard is moving away
158-
RoutingNode routingNode = allocation.routingNodes().node(assignedShard.currentNodeId());
159-
shardPerAttribute.addTo(routingNode.node().getAttributes().get(awarenessAttribute), 1);
162+
final RoutingNode assignedNode = allocation.routingNodes().node(assignedShard.currentNodeId());
163+
if (targetAttributeValue.equals(assignedNode.node().getAttributes().get(awarenessAttribute))) {
164+
shardsForTargetAttributeValue += 1;
165+
}
160166
}
161167
}
162168

163169
if (moveToNode) {
164170
if (shardRouting.assignedToNode()) {
165-
String nodeId = shardRouting.relocating() ? shardRouting.relocatingNodeId() : shardRouting.currentNodeId();
166-
if (node.nodeId().equals(nodeId) == false) {
167-
// we work on different nodes, move counts around
168-
shardPerAttribute.putOrAdd(allocation.routingNodes().node(nodeId).node().getAttributes().get(awarenessAttribute),
169-
0, -1);
170-
shardPerAttribute.addTo(node.node().getAttributes().get(awarenessAttribute), 1);
171-
}
171+
final RoutingNode currentNode = allocation.routingNodes().node(
172+
shardRouting.relocating() ? shardRouting.relocatingNodeId() : shardRouting.currentNodeId());
173+
if (targetAttributeValue.equals(currentNode.node().getAttributes().get(awarenessAttribute)) == false) {
174+
shardsForTargetAttributeValue += 1;
175+
} // else this shard is already on a node in the same zone as the target node, so moving it doesn't change the count
172176
} else {
173-
shardPerAttribute.addTo(node.node().getAttributes().get(awarenessAttribute), 1);
177+
shardsForTargetAttributeValue += 1;
174178
}
175179
}
176180

177-
int numberOfAttributes = nodesPerAttribute.size();
178-
List<String> fullValues = forcedAwarenessAttributes.get(awarenessAttribute);
179-
if (fullValues != null) {
180-
for (String fullValue : fullValues) {
181-
if (shardPerAttribute.containsKey(fullValue) == false) {
182-
numberOfAttributes++;
183-
}
184-
}
185-
}
186-
// TODO should we remove ones that are not part of full list?
181+
final List<String> forcedValues = forcedAwarenessAttributes.get(awarenessAttribute);
182+
final int valueCount = forcedValues == null
183+
? actualAttributeValues.size()
184+
: Math.toIntExact(Stream.concat(actualAttributeValues.stream(), forcedValues.stream()).distinct().count());
187185

188-
final int currentNodeCount = shardPerAttribute.get(node.node().getAttributes().get(awarenessAttribute));
189-
final int maximumNodeCount = (shardCount + numberOfAttributes - 1) / numberOfAttributes; // ceil(shardCount/numberOfAttributes)
190-
if (currentNodeCount > maximumNodeCount) {
186+
final int maximumShardsPerAttributeValue = (shardCount + valueCount - 1) / valueCount; // ceil(shardCount/valueCount)
187+
if (shardsForTargetAttributeValue > maximumShardsPerAttributeValue) {
191188
return debug ? debugNoTooManyCopies(
192189
shardCount,
193190
awarenessAttribute,
194191
node.node().getAttributes().get(awarenessAttribute),
195-
numberOfAttributes,
196-
StreamSupport.stream(nodesPerAttribute.keys().spliterator(), false).map(c -> c.value).sorted().collect(toList()),
197-
fullValues == null ? null : fullValues.stream().sorted().collect(toList()),
198-
currentNodeCount,
199-
maximumNodeCount)
192+
valueCount,
193+
actualAttributeValues.stream().sorted().collect(toList()),
194+
forcedValues == null ? null : forcedValues.stream().sorted().collect(toList()),
195+
shardsForTargetAttributeValue,
196+
maximumShardsPerAttributeValue)
200197
: Decision.NO;
201198
}
202199
}
@@ -211,8 +208,8 @@ private static Decision debugNoTooManyCopies(
211208
int numberOfAttributes,
212209
List<String> realAttributes,
213210
List<String> forcedAttributes,
214-
int currentNodeCount,
215-
int maximumNodeCount) {
211+
int actualShardCount,
212+
int maximumShardCount) {
216213
return Decision.single(Decision.Type.NO, NAME,
217214
"there are [%d] copies of this shard and [%d] values for attribute [%s] (%s from nodes in the cluster and %s) so there " +
218215
"may be at most [%d] copies of this shard allocated to nodes with each value, but (including this copy) there " +
@@ -222,8 +219,8 @@ private static Decision debugNoTooManyCopies(
222219
attributeName,
223220
realAttributes,
224221
forcedAttributes == null ? "no forced awareness" : forcedAttributes + " from forced awareness",
225-
maximumNodeCount,
226-
currentNodeCount,
222+
maximumShardCount,
223+
actualShardCount,
227224
attributeName,
228225
attributeValue);
229226
}

server/src/main/java/org/elasticsearch/cluster/service/MasterService.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -173,8 +173,8 @@ ClusterState state() {
173173
return clusterStateSupplier.get();
174174
}
175175

176-
private static boolean isMasterUpdateThread() {
177-
return Thread.currentThread().getName().contains(MASTER_UPDATE_THREAD_NAME);
176+
public static boolean isMasterUpdateThread() {
177+
return Thread.currentThread().getName().contains('[' + MASTER_UPDATE_THREAD_NAME + ']');
178178
}
179179

180180
public static boolean assertMasterUpdateThread() {

0 commit comments

Comments
 (0)