From 4b23f4be35e69907e41661db7ba88a62b53a2058 Mon Sep 17 00:00:00 2001 From: Gordon Brown Date: Tue, 13 Apr 2021 15:34:57 -0600 Subject: [PATCH 01/13] First cut of a node shutdown allocation decider --- .../NodeShutdownAllocationDecider.java | 93 +++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeShutdownAllocationDecider.java diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeShutdownAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeShutdownAllocationDecider.java new file mode 100644 index 0000000000000..099ff3589a992 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeShutdownAllocationDecider.java @@ -0,0 +1,93 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.cluster.routing.allocation.decider; + +import org.elasticsearch.cluster.metadata.IndexMetadata; +import org.elasticsearch.cluster.metadata.Metadata; +import org.elasticsearch.cluster.metadata.NodesShutdownMetadata; +import org.elasticsearch.cluster.metadata.SingleNodeShutdownMetadata; +import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.cluster.routing.RoutingNode; +import org.elasticsearch.cluster.routing.ShardRouting; +import org.elasticsearch.cluster.routing.allocation.RoutingAllocation; + +/** + * An allocation decider that prevents shards from being allocated to a + * node that is in the process of shutting down. + * + * In short: No shards can be allocated to, or remain on, a node which is + * shutting down for removal. Primary shards cannot be allocated to, or remain + * on, a node which is shutting down for restart. + */ +public class NodeShutdownAllocationDecider extends AllocationDecider { + + private static final String NAME = "node_shutdown"; + + /** + * Determines if a shard can be allocated to a particular node, based on whether that node is shutting down or not. + */ + @Override + public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { + final SingleNodeShutdownMetadata thisNodeShutdownMetadata = getNodeShutdownMetadata(allocation.metadata(), node.nodeId()); + + if (thisNodeShutdownMetadata == null) { + // There's no shutdown metadata for this node, return yes. + return allocation.decision(Decision.YES, NAME, "this node is not currently shutting down"); + } + + if (SingleNodeShutdownMetadata.Type.REMOVE.equals(thisNodeShutdownMetadata.getType())) { + return allocation.decision(Decision.NO, NAME, "node [%s] is preparing to be removed from the cluster", node.nodeId()); + } + + if (shardRouting.primary() && SingleNodeShutdownMetadata.Type.RESTART.equals(thisNodeShutdownMetadata.getType())) { + return allocation.decision(Decision.NO, NAME, "node [%s] is preparing to be restarted", node.nodeId()); + } + + return Decision.YES; + } + + /** + * Applies the same rules as {@link NodeShutdownAllocationDecider#canAllocate(ShardRouting, RoutingNode, RoutingAllocation)} to + * determine if shards can remain on their current node. + */ + @Override + public Decision canRemain(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { + return this.canAllocate(shardRouting, node, allocation); + } + + /** + * Prevents indices from being auto-expanded to nodes which are in the process of shutting down, regardless of whether they're shutting + * down for restart or removal. + */ + @Override + public Decision shouldAutoExpandToNode(IndexMetadata indexMetadata, DiscoveryNode node, RoutingAllocation allocation) { + SingleNodeShutdownMetadata thisNodeShutdownMetadata = getNodeShutdownMetadata(allocation.metadata(), node.getId()); + + if (thisNodeShutdownMetadata == null) { + return allocation.decision(Decision.YES, NAME, "no nodes are currently shutting down"); + } else if (SingleNodeShutdownMetadata.Type.RESTART.equals(thisNodeShutdownMetadata.getType())){ + return allocation.decision(Decision.NO, NAME, "node [%s] is preparing to be restarted", node.getId()); + } else if (SingleNodeShutdownMetadata.Type.REMOVE.equals(thisNodeShutdownMetadata.getType())) { + return allocation.decision(Decision.NO, NAME, "node [%s] is preparing to be removed from the cluster", node.getId()); + } else { + assert false : "node shutdown type should be either REMOVE or RESTART"; + return Decision.ALWAYS; + } + } + + private static SingleNodeShutdownMetadata getNodeShutdownMetadata(Metadata metadata, String nodeId) { + NodesShutdownMetadata nodesShutdownMetadata = metadata.custom(NodesShutdownMetadata.TYPE); + if (nodesShutdownMetadata == null || nodesShutdownMetadata.getPerNodeInfo() == null) { + // There are no nodes in the process of shutting down, return null. + return null; + } + + return nodesShutdownMetadata.getPerNodeInfo().get(nodeId); + } +} From 23b8eff4fb51bc9bbf8b9edb9b80cef241bb1c01 Mon Sep 17 00:00:00 2001 From: Gordon Brown Date: Mon, 19 Apr 2021 14:29:15 -0600 Subject: [PATCH 02/13] Fix compilation after master merge --- .../allocation/decider/NodeShutdownAllocationDecider.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeShutdownAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeShutdownAllocationDecider.java index 099ff3589a992..ad59082aa5086 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeShutdownAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeShutdownAllocationDecider.java @@ -83,11 +83,11 @@ public Decision shouldAutoExpandToNode(IndexMetadata indexMetadata, DiscoveryNod private static SingleNodeShutdownMetadata getNodeShutdownMetadata(Metadata metadata, String nodeId) { NodesShutdownMetadata nodesShutdownMetadata = metadata.custom(NodesShutdownMetadata.TYPE); - if (nodesShutdownMetadata == null || nodesShutdownMetadata.getPerNodeInfo() == null) { + if (nodesShutdownMetadata == null || nodesShutdownMetadata.getAllNodeMetdataMap() == null) { // There are no nodes in the process of shutting down, return null. return null; } - return nodesShutdownMetadata.getPerNodeInfo().get(nodeId); + return nodesShutdownMetadata.getAllNodeMetdataMap().get(nodeId); } } From 2c960353f2ee428929c2b88e0af11402ba190763 Mon Sep 17 00:00:00 2001 From: Gordon Brown Date: Wed, 21 Apr 2021 16:31:49 -0600 Subject: [PATCH 03/13] Actually register the allocation decider --- .../src/main/java/org/elasticsearch/cluster/ClusterModule.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/server/src/main/java/org/elasticsearch/cluster/ClusterModule.java b/server/src/main/java/org/elasticsearch/cluster/ClusterModule.java index 27fad07235e9f..9d55dd448959b 100644 --- a/server/src/main/java/org/elasticsearch/cluster/ClusterModule.java +++ b/server/src/main/java/org/elasticsearch/cluster/ClusterModule.java @@ -38,6 +38,7 @@ import org.elasticsearch.cluster.routing.allocation.decider.EnableAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.FilterAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.MaxRetryAllocationDecider; +import org.elasticsearch.cluster.routing.allocation.decider.NodeShutdownAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.NodeVersionAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.RebalanceOnlyWhenActiveAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.ReplicaAfterPrimaryActiveAllocationDecider; @@ -206,6 +207,7 @@ public static Collection createAllocationDeciders(Settings se addAllocationDecider(deciders, new ThrottlingAllocationDecider(settings, clusterSettings)); addAllocationDecider(deciders, new ShardsLimitAllocationDecider(settings, clusterSettings)); addAllocationDecider(deciders, new AwarenessAllocationDecider(settings, clusterSettings)); + addAllocationDecider(deciders, new NodeShutdownAllocationDecider()); clusterPlugins.stream() .flatMap(p -> p.createAllocationDeciders(settings, clusterSettings).stream()) From d13ba368cd66812beda3dbfc7efd3f171507f5df Mon Sep 17 00:00:00 2001 From: Gordon Brown Date: Wed, 21 Apr 2021 16:33:32 -0600 Subject: [PATCH 04/13] Don't prevent shards from being allocated to restarting nodes. --- .../allocation/decider/NodeShutdownAllocationDecider.java | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeShutdownAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeShutdownAllocationDecider.java index ad59082aa5086..4e2f8380c804e 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeShutdownAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeShutdownAllocationDecider.java @@ -16,6 +16,7 @@ import org.elasticsearch.cluster.routing.RoutingNode; import org.elasticsearch.cluster.routing.ShardRouting; import org.elasticsearch.cluster.routing.allocation.RoutingAllocation; +import org.elasticsearch.common.Nullable; /** * An allocation decider that prevents shards from being allocated to a @@ -45,10 +46,6 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing return allocation.decision(Decision.NO, NAME, "node [%s] is preparing to be removed from the cluster", node.nodeId()); } - if (shardRouting.primary() && SingleNodeShutdownMetadata.Type.RESTART.equals(thisNodeShutdownMetadata.getType())) { - return allocation.decision(Decision.NO, NAME, "node [%s] is preparing to be restarted", node.nodeId()); - } - return Decision.YES; } @@ -71,8 +68,6 @@ public Decision shouldAutoExpandToNode(IndexMetadata indexMetadata, DiscoveryNod if (thisNodeShutdownMetadata == null) { return allocation.decision(Decision.YES, NAME, "no nodes are currently shutting down"); - } else if (SingleNodeShutdownMetadata.Type.RESTART.equals(thisNodeShutdownMetadata.getType())){ - return allocation.decision(Decision.NO, NAME, "node [%s] is preparing to be restarted", node.getId()); } else if (SingleNodeShutdownMetadata.Type.REMOVE.equals(thisNodeShutdownMetadata.getType())) { return allocation.decision(Decision.NO, NAME, "node [%s] is preparing to be removed from the cluster", node.getId()); } else { @@ -81,6 +76,7 @@ public Decision shouldAutoExpandToNode(IndexMetadata indexMetadata, DiscoveryNod } } + @Nullable private static SingleNodeShutdownMetadata getNodeShutdownMetadata(Metadata metadata, String nodeId) { NodesShutdownMetadata nodesShutdownMetadata = metadata.custom(NodesShutdownMetadata.TYPE); if (nodesShutdownMetadata == null || nodesShutdownMetadata.getAllNodeMetdataMap() == null) { From c154d0b971a1de6aedd16c2f6a5ba43080c35f6b Mon Sep 17 00:00:00 2001 From: Gordon Brown Date: Wed, 21 Apr 2021 16:40:07 -0600 Subject: [PATCH 05/13] Adjust message wording --- .../allocation/decider/NodeShutdownAllocationDecider.java | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeShutdownAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeShutdownAllocationDecider.java index 4e2f8380c804e..9d11633c99cab 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeShutdownAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeShutdownAllocationDecider.java @@ -67,9 +67,12 @@ public Decision shouldAutoExpandToNode(IndexMetadata indexMetadata, DiscoveryNod SingleNodeShutdownMetadata thisNodeShutdownMetadata = getNodeShutdownMetadata(allocation.metadata(), node.getId()); if (thisNodeShutdownMetadata == null) { - return allocation.decision(Decision.YES, NAME, "no nodes are currently shutting down"); + return allocation.decision(Decision.YES, NAME, "node [%s] is not preparing for removal from the cluster"); + } else if (SingleNodeShutdownMetadata.Type.RESTART.equals(thisNodeShutdownMetadata.getType())){ + return allocation.decision(Decision.YES, NAME, "node [%s] is preparing for restart but will remain in the cluster", + node.getId()); } else if (SingleNodeShutdownMetadata.Type.REMOVE.equals(thisNodeShutdownMetadata.getType())) { - return allocation.decision(Decision.NO, NAME, "node [%s] is preparing to be removed from the cluster", node.getId()); + return allocation.decision(Decision.NO, NAME, "node [%s] is preparing for removal from the cluster", node.getId()); } else { assert false : "node shutdown type should be either REMOVE or RESTART"; return Decision.ALWAYS; From 091499845acc34c7099dbda40e26d71bc3e4bf9d Mon Sep 17 00:00:00 2001 From: Gordon Brown Date: Thu, 22 Apr 2021 16:58:52 -0600 Subject: [PATCH 06/13] Add integration test --- .../xpack/shutdown/NodeShutdownIT.java | 84 +++++++++++++++++++ 1 file changed, 84 insertions(+) diff --git a/x-pack/plugin/shutdown/qa/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/shutdown/NodeShutdownIT.java b/x-pack/plugin/shutdown/qa/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/shutdown/NodeShutdownIT.java index b99571e69db75..e56d2c3e36327 100644 --- a/x-pack/plugin/shutdown/qa/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/shutdown/NodeShutdownIT.java +++ b/x-pack/plugin/shutdown/qa/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/shutdown/NodeShutdownIT.java @@ -11,15 +11,21 @@ import org.elasticsearch.common.settings.SecureString; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.util.concurrent.ThreadContext; +import org.elasticsearch.common.xcontent.support.XContentMapValues; import org.elasticsearch.test.rest.ESRestTestCase; import java.io.IOException; import java.util.List; import java.util.Map; +import java.util.Optional; +import java.util.concurrent.TimeUnit; import static org.hamcrest.Matchers.empty; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.hasSize; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.not; +import static org.hamcrest.Matchers.notNullValue; public class NodeShutdownIT extends ESRestTestCase { @@ -58,6 +64,84 @@ public void testCRUD() throws Exception { assertNoShuttingDownNodes(nodeIdToShutdown); } + /** + * A very basic smoke test to make sure the allocation decider is working. + */ + @SuppressWarnings("unchecked") + public void testAllocationPreventedForRemoval() throws Exception { + Request nodesRequest = new Request("GET", "_nodes"); + Map nodesResponse = responseAsMap(client().performRequest(nodesRequest)); + Map nodesObject = (Map) nodesResponse.get("nodes"); + + String nodeIdToShutdown = randomFrom(nodesObject.keySet()); + String reason = "testing node shutdown allocation rules"; + String type = "REMOVE"; + + // Put a shutdown request + Request putShutdown = new Request("PUT", "_nodes/" + nodeIdToShutdown + "/shutdown"); + putShutdown.setJsonEntity("{\"type\": \"" + type + "\", \"reason\": \"" + reason + "\"}"); + assertOK(client().performRequest(putShutdown)); + + // Create an index with 1s/2r + final String indexName = "test-idx"; + Request createIndexRequest = new Request("PUT", indexName); + createIndexRequest.setJsonEntity("{\"settings\": {\"number_of_shards\": 1, \"number_of_replicas\": 3}}"); + assertOK(client().performRequest(createIndexRequest)); + + // Watch to ensure no shards gets allocated to the node that's shutting down + Request checkShardsRequest = new Request("GET", "_cat/shards/" + indexName); + checkShardsRequest.addParameter("format", "json"); + checkShardsRequest.addParameter("h", "index,shard,prirep,id,state"); + + assertBusy(() -> { + List shardsResponse = entityAsList(client().performRequest(checkShardsRequest)); + int startedShards = 0; + int unassignedShards = 0; + for (Object shard : shardsResponse) { + Map shardMap = (Map) shard; + assertThat( + "no shards should be assigned to a node shutting down for removal", + shardMap.get("id"), + not(equalTo(nodeIdToShutdown)) + ); + + if (shardMap.get("id") == null) { + unassignedShards++; + } else if (nodeIdToShutdown.equals(shardMap.get("id")) == false) { + assertThat("all other shards should be started", shardMap.get("state"), equalTo("STARTED")); + startedShards++; + } + } + assertThat(unassignedShards, equalTo(1)); + assertThat(startedShards, equalTo(3)); + }); + // Now that we know all shards of the test index are assigned except one, + // make sure it's unassigned because of the allocation decider. + + Request allocationExplainRequest = new Request("GET", "_cluster/allocation/explain"); + allocationExplainRequest.setJsonEntity("{\"index\": \"" + indexName + "\", \"shard\": 0, \"primary\": false}"); + Map allocationExplainMap = entityAsMap(client().performRequest(allocationExplainRequest)); + List> decisions = (List>) allocationExplainMap.get("node_allocation_decisions"); + assertThat(decisions, notNullValue()); + + Optional> maybeDecision = decisions.stream() + .filter(decision -> nodeIdToShutdown.equals(decision.get("node_id"))) + .findFirst(); + assertThat("expected decisions for node, but not found", maybeDecision.isPresent(), is(true)); + + Map decision = maybeDecision.get(); + assertThat("node should have deciders", decision.containsKey("deciders"), is(true)); + + List> deciders = (List>) decision.get("deciders"); + assertThat( + "the node_shutdown allocation decider should have decided NO", + deciders.stream() + .filter(decider -> "node_shutdown".equals(decider.get("decider"))) + .allMatch(decider -> "NO".equals(decider.get("decision"))), + is(true) + ); + } + @SuppressWarnings("unchecked") private void assertNoShuttingDownNodes(String nodeIdToShutdown) throws IOException { Request getShutdownStatus = new Request("GET", "_nodes/" + nodeIdToShutdown + "/shutdown"); From 28107d82082e988ed532f0fd46fbf8433c2ec12f Mon Sep 17 00:00:00 2001 From: Gordon Brown Date: Thu, 22 Apr 2021 17:16:50 -0600 Subject: [PATCH 07/13] Imports --- .../java/org/elasticsearch/xpack/shutdown/NodeShutdownIT.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/x-pack/plugin/shutdown/qa/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/shutdown/NodeShutdownIT.java b/x-pack/plugin/shutdown/qa/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/shutdown/NodeShutdownIT.java index e56d2c3e36327..9af10e692ecde 100644 --- a/x-pack/plugin/shutdown/qa/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/shutdown/NodeShutdownIT.java +++ b/x-pack/plugin/shutdown/qa/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/shutdown/NodeShutdownIT.java @@ -11,14 +11,12 @@ import org.elasticsearch.common.settings.SecureString; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.util.concurrent.ThreadContext; -import org.elasticsearch.common.xcontent.support.XContentMapValues; import org.elasticsearch.test.rest.ESRestTestCase; import java.io.IOException; import java.util.List; import java.util.Map; import java.util.Optional; -import java.util.concurrent.TimeUnit; import static org.hamcrest.Matchers.empty; import static org.hamcrest.Matchers.equalTo; From 4090b7839c75cb2659073a2702ff78d70ea800eb Mon Sep 17 00:00:00 2001 From: Gordon Brown Date: Mon, 26 Apr 2021 17:31:05 -0600 Subject: [PATCH 08/13] Unit tests and minor cleanup per review --- .../NodeShutdownAllocationDecider.java | 53 +++-- .../NodeShutdownAllocationDeciderTests.java | 197 ++++++++++++++++++ 2 files changed, 238 insertions(+), 12 deletions(-) create mode 100644 server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/NodeShutdownAllocationDeciderTests.java diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeShutdownAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeShutdownAllocationDecider.java index 9d11633c99cab..c119998cac390 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeShutdownAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeShutdownAllocationDecider.java @@ -8,6 +8,8 @@ package org.elasticsearch.cluster.routing.allocation.decider; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.cluster.metadata.Metadata; import org.elasticsearch.cluster.metadata.NodesShutdownMetadata; @@ -27,6 +29,7 @@ * on, a node which is shutting down for restart. */ public class NodeShutdownAllocationDecider extends AllocationDecider { + private static final Logger logger = LogManager.getLogger(NodeShutdownAllocationDecider.class); private static final String NAME = "node_shutdown"; @@ -42,11 +45,28 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing return allocation.decision(Decision.YES, NAME, "this node is not currently shutting down"); } - if (SingleNodeShutdownMetadata.Type.REMOVE.equals(thisNodeShutdownMetadata.getType())) { - return allocation.decision(Decision.NO, NAME, "node [%s] is preparing to be removed from the cluster", node.nodeId()); + switch (thisNodeShutdownMetadata.getType()) { + case REMOVE: + return allocation.decision(Decision.NO, NAME, "node [%s] is preparing to be removed from the cluster", node.nodeId()); + case RESTART: + return allocation.decision( + Decision.YES, + NAME, + "node [%s] is preparing to restart, but will remain in the cluster", + node.nodeId() + ); + default: + logger.error( + "found unrecognized node shutdown type [{}] while deciding allocation for [{}] shard [{}][{}] on node [{}]", + thisNodeShutdownMetadata.getType(), + shardRouting.primary() ? "primary" : "replica", + shardRouting.getIndexName(), + shardRouting.getId(), + node.nodeId() + ); + assert false : "node shutdown type not recognized: " + thisNodeShutdownMetadata.getType(); + return Decision.YES; } - - return Decision.YES; } /** @@ -68,14 +88,23 @@ public Decision shouldAutoExpandToNode(IndexMetadata indexMetadata, DiscoveryNod if (thisNodeShutdownMetadata == null) { return allocation.decision(Decision.YES, NAME, "node [%s] is not preparing for removal from the cluster"); - } else if (SingleNodeShutdownMetadata.Type.RESTART.equals(thisNodeShutdownMetadata.getType())){ - return allocation.decision(Decision.YES, NAME, "node [%s] is preparing for restart but will remain in the cluster", - node.getId()); - } else if (SingleNodeShutdownMetadata.Type.REMOVE.equals(thisNodeShutdownMetadata.getType())) { - return allocation.decision(Decision.NO, NAME, "node [%s] is preparing for removal from the cluster", node.getId()); - } else { - assert false : "node shutdown type should be either REMOVE or RESTART"; - return Decision.ALWAYS; + } + + switch (thisNodeShutdownMetadata.getType()) { + case RESTART: + return allocation.decision(Decision.YES, NAME, "node [%s] is preparing to restart, but will remain in the cluster", + node.getId()); + case REMOVE: + return allocation.decision(Decision.NO, NAME, "node [%s] is preparing for removal from the cluster", node.getId()); + default: + logger.error( + "found unrecognized node shutdown type [{}] while deciding auto-expansion for index [{}] on node [{}]", + thisNodeShutdownMetadata.getType(), + indexMetadata.getIndex().getName(), + node.getId() + ); + assert false : "node shutdown type not recognized: " + thisNodeShutdownMetadata.getType(); + return Decision.YES; } } diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/NodeShutdownAllocationDeciderTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/NodeShutdownAllocationDeciderTests.java new file mode 100644 index 0000000000000..d95cfc7aec9d1 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/NodeShutdownAllocationDeciderTests.java @@ -0,0 +1,197 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.cluster.routing.allocation.decider; + +import org.elasticsearch.Version; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.ESAllocationTestCase; +import org.elasticsearch.cluster.EmptyClusterInfoService; +import org.elasticsearch.cluster.metadata.IndexMetadata; +import org.elasticsearch.cluster.metadata.Metadata; +import org.elasticsearch.cluster.metadata.NodesShutdownMetadata; +import org.elasticsearch.cluster.metadata.SingleNodeShutdownMetadata; +import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.cluster.node.DiscoveryNodeRole; +import org.elasticsearch.cluster.node.DiscoveryNodes; +import org.elasticsearch.cluster.routing.RecoverySource; +import org.elasticsearch.cluster.routing.RoutingNode; +import org.elasticsearch.cluster.routing.ShardRouting; +import org.elasticsearch.cluster.routing.UnassignedInfo; +import org.elasticsearch.cluster.routing.allocation.AllocationService; +import org.elasticsearch.cluster.routing.allocation.RoutingAllocation; +import org.elasticsearch.cluster.routing.allocation.allocator.BalancedShardsAllocator; +import org.elasticsearch.common.settings.ClusterSettings; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.shard.ShardId; +import org.elasticsearch.snapshots.EmptySnapshotsInfoService; +import org.elasticsearch.test.gateway.TestGatewayAllocator; + +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +import static org.hamcrest.Matchers.equalTo; + +public class NodeShutdownAllocationDeciderTests extends ESAllocationTestCase { + private static final DiscoveryNode DATA_NODE = newNode("node-data", Collections.singleton(DiscoveryNodeRole.DATA_ROLE)); + private final ShardRouting shard = ShardRouting.newUnassigned( + new ShardId("myindex", "myindex", 0), + true, + RecoverySource.EmptyStoreRecoverySource.INSTANCE, + new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "index created") + ); + private final ClusterSettings clusterSettings = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + private NodeShutdownAllocationDecider decider = new NodeShutdownAllocationDecider(); + private final AllocationDeciders allocationDeciders = new AllocationDeciders( + Arrays.asList( + decider, + new SameShardAllocationDecider(Settings.EMPTY, clusterSettings), + new ReplicaAfterPrimaryActiveAllocationDecider() + ) + ); + private final AllocationService service = new AllocationService( + allocationDeciders, + new TestGatewayAllocator(), + new BalancedShardsAllocator(Settings.EMPTY), + EmptyClusterInfoService.INSTANCE, + EmptySnapshotsInfoService.INSTANCE + ); + + private final String idxName = "test-idx"; + private final String idxUuid = "test-idx-uuid"; + private final IndexMetadata indexMetadata = IndexMetadata.builder(idxName) + .settings( + Settings.builder() + .put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT) + .put(IndexMetadata.SETTING_INDEX_UUID, idxUuid) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + .build() + ) + .build(); + + public void testCanAllocateShardsToRestartingNode() { + ClusterState state = prepareState( + service.reroute(ClusterState.EMPTY_STATE, "initial state"), + SingleNodeShutdownMetadata.Type.RESTART + ); + RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, state.getRoutingNodes(), state, null, null, 0); + RoutingNode routingNode = new RoutingNode(DATA_NODE.getId(), DATA_NODE, shard); + allocation.debugDecision(true); + + Decision decision = decider.canAllocate(shard, routingNode, allocation); + assertThat(decision.type(), equalTo(Decision.Type.YES)); + assertThat( + decision.getExplanation(), + equalTo("node [" + DATA_NODE.getId() + "] is preparing to restart, but will remain in the cluster") + ); + } + + + public void testCannotAllocateShardsToRemovingNode() { + ClusterState state = prepareState( + service.reroute(ClusterState.EMPTY_STATE, "initial state"), + SingleNodeShutdownMetadata.Type.REMOVE + ); + RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, state.getRoutingNodes(), state, null, null, 0); + RoutingNode routingNode = new RoutingNode(DATA_NODE.getId(), DATA_NODE, shard); + allocation.debugDecision(true); + + Decision decision = decider.canAllocate(shard, routingNode, allocation); + assertThat(decision.type(), equalTo(Decision.Type.NO)); + assertThat( + decision.getExplanation(), + equalTo("node [" + DATA_NODE.getId() + "] is preparing to be removed from the cluster") + ); + } + + public void testShardsCanRemainOnRestartingNode() { + ClusterState state = prepareState( + service.reroute(ClusterState.EMPTY_STATE, "initial state"), + SingleNodeShutdownMetadata.Type.RESTART + ); + RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, state.getRoutingNodes(), state, null, null, 0); + RoutingNode routingNode = new RoutingNode(DATA_NODE.getId(), DATA_NODE, shard); + allocation.debugDecision(true); + + Decision decision = decider.canRemain(shard, routingNode, allocation); + assertThat(decision.type(), equalTo(Decision.Type.YES)); + assertThat( + decision.getExplanation(), + equalTo("node [" + DATA_NODE.getId() + "] is preparing to restart, but will remain in the cluster") + ); + } + + public void testShardsCannotRemainOnRemovingNode() { + ClusterState state = prepareState( + service.reroute(ClusterState.EMPTY_STATE, "initial state"), + SingleNodeShutdownMetadata.Type.REMOVE + ); + RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, state.getRoutingNodes(), state, null, null, 0); + RoutingNode routingNode = new RoutingNode(DATA_NODE.getId(), DATA_NODE, shard); + allocation.debugDecision(true); + + Decision decision = decider.canRemain(shard, routingNode, allocation); + assertThat(decision.type(), equalTo(Decision.Type.NO)); + assertThat( + decision.getExplanation(), + equalTo("node [" + DATA_NODE.getId() + "] is preparing to be removed from the cluster") + ); + } + + public void testCanAutoExpandToRestartingNode() { + ClusterState state = prepareState( + service.reroute(ClusterState.EMPTY_STATE, "initial state"), + SingleNodeShutdownMetadata.Type.RESTART + ); + RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, state.getRoutingNodes(), state, null, null, 0); + allocation.debugDecision(true); + + Decision decision = decider.shouldAutoExpandToNode(indexMetadata, DATA_NODE, allocation); + assertThat(decision.type(), equalTo(Decision.Type.YES)); + assertThat( + decision.getExplanation(), + equalTo("node [" + DATA_NODE.getId() + "] is preparing to restart, but will remain in the cluster") + ); + } + + public void testCannotAutoExpandToRemovingNode() { + ClusterState state = prepareState( + service.reroute(ClusterState.EMPTY_STATE, "initial state"), + SingleNodeShutdownMetadata.Type.REMOVE + ); + RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, state.getRoutingNodes(), state, null, null, 0); + allocation.debugDecision(true); + + Decision decision = decider.shouldAutoExpandToNode(indexMetadata, DATA_NODE, allocation); + assertThat(decision.type(), equalTo(Decision.Type.NO)); + assertThat(decision.getExplanation(), equalTo("node [" + DATA_NODE.getId() + "] is preparing for removal from the cluster")); + } + + private ClusterState prepareState(ClusterState initialState, SingleNodeShutdownMetadata.Type shutdownType) { + Map nodesShutdownInfo = new HashMap<>(); + + final SingleNodeShutdownMetadata nodeShutdownMetadata = SingleNodeShutdownMetadata.builder() + .setNodeId(DATA_NODE.getId()) + .setType(shutdownType) + .setReason(this.getTestName()) + .setStartedAtMillis(1L) + .build(); + NodesShutdownMetadata nodesShutdownMetadata = new NodesShutdownMetadata(new HashMap<>()).putSingleNodeMetadata( + nodeShutdownMetadata + ); + return ClusterState.builder(initialState) + .nodes(DiscoveryNodes.builder().add(DATA_NODE).build()) + .metadata( + Metadata.builder().put(IndexMetadata.builder(indexMetadata)).putCustom(NodesShutdownMetadata.TYPE, nodesShutdownMetadata) + ) + .build(); + } +} From eb30c3d928bff274a391853c4f4ad41487bd3b3a Mon Sep 17 00:00:00 2001 From: Gordon Brown Date: Mon, 26 Apr 2021 17:31:54 -0600 Subject: [PATCH 09/13] Move shutdown allocation decider up in priority --- .../src/main/java/org/elasticsearch/cluster/ClusterModule.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/ClusterModule.java b/server/src/main/java/org/elasticsearch/cluster/ClusterModule.java index 9d55dd448959b..915ca94fbac4e 100644 --- a/server/src/main/java/org/elasticsearch/cluster/ClusterModule.java +++ b/server/src/main/java/org/elasticsearch/cluster/ClusterModule.java @@ -201,13 +201,13 @@ public static Collection createAllocationDeciders(Settings se addAllocationDecider(deciders, new NodeVersionAllocationDecider()); addAllocationDecider(deciders, new SnapshotInProgressAllocationDecider()); addAllocationDecider(deciders, new RestoreInProgressAllocationDecider()); + addAllocationDecider(deciders, new NodeShutdownAllocationDecider()); addAllocationDecider(deciders, new FilterAllocationDecider(settings, clusterSettings)); addAllocationDecider(deciders, new SameShardAllocationDecider(settings, clusterSettings)); addAllocationDecider(deciders, new DiskThresholdDecider(settings, clusterSettings)); addAllocationDecider(deciders, new ThrottlingAllocationDecider(settings, clusterSettings)); addAllocationDecider(deciders, new ShardsLimitAllocationDecider(settings, clusterSettings)); addAllocationDecider(deciders, new AwarenessAllocationDecider(settings, clusterSettings)); - addAllocationDecider(deciders, new NodeShutdownAllocationDecider()); clusterPlugins.stream() .flatMap(p -> p.createAllocationDeciders(settings, clusterSettings).stream()) From 0e6030a25796619c26a977654d807ec7f15c3406 Mon Sep 17 00:00:00 2001 From: Gordon Brown Date: Mon, 26 Apr 2021 17:33:26 -0600 Subject: [PATCH 10/13] Fix decider order test --- .../test/java/org/elasticsearch/cluster/ClusterModuleTests.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/server/src/test/java/org/elasticsearch/cluster/ClusterModuleTests.java b/server/src/test/java/org/elasticsearch/cluster/ClusterModuleTests.java index 9a219755f4848..b966d428ce0c6 100644 --- a/server/src/test/java/org/elasticsearch/cluster/ClusterModuleTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/ClusterModuleTests.java @@ -21,6 +21,7 @@ import org.elasticsearch.cluster.routing.allocation.decider.EnableAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.FilterAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.MaxRetryAllocationDecider; +import org.elasticsearch.cluster.routing.allocation.decider.NodeShutdownAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.NodeVersionAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.RebalanceOnlyWhenActiveAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.ReplicaAfterPrimaryActiveAllocationDecider; @@ -193,6 +194,7 @@ public void testAllocationDeciderOrder() { NodeVersionAllocationDecider.class, SnapshotInProgressAllocationDecider.class, RestoreInProgressAllocationDecider.class, + NodeShutdownAllocationDecider.class, FilterAllocationDecider.class, SameShardAllocationDecider.class, DiskThresholdDecider.class, From b7ec81d4a5e1760ddb63e5d601ad26d791512dfa Mon Sep 17 00:00:00 2001 From: Gordon Brown Date: Wed, 28 Apr 2021 16:22:17 -0600 Subject: [PATCH 11/13] Change log messages to DEBUG to prevent log spam --- .../allocation/decider/NodeShutdownAllocationDecider.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeShutdownAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeShutdownAllocationDecider.java index c119998cac390..f9680012cc6be 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeShutdownAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeShutdownAllocationDecider.java @@ -56,7 +56,7 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing node.nodeId() ); default: - logger.error( + logger.debug( "found unrecognized node shutdown type [{}] while deciding allocation for [{}] shard [{}][{}] on node [{}]", thisNodeShutdownMetadata.getType(), shardRouting.primary() ? "primary" : "replica", @@ -97,7 +97,7 @@ public Decision shouldAutoExpandToNode(IndexMetadata indexMetadata, DiscoveryNod case REMOVE: return allocation.decision(Decision.NO, NAME, "node [%s] is preparing for removal from the cluster", node.getId()); default: - logger.error( + logger.debug( "found unrecognized node shutdown type [{}] while deciding auto-expansion for index [{}] on node [{}]", thisNodeShutdownMetadata.getType(), indexMetadata.getIndex().getName(), From b479b423c4df911e4691cbc6f465f5eefcd3e7bf Mon Sep 17 00:00:00 2001 From: Gordon Brown Date: Wed, 28 Apr 2021 16:27:42 -0600 Subject: [PATCH 12/13] Wait to auto-expand when node is restarting --- .../allocation/decider/NodeShutdownAllocationDecider.java | 8 ++++++-- .../decider/NodeShutdownAllocationDeciderTests.java | 6 +++--- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeShutdownAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeShutdownAllocationDecider.java index f9680012cc6be..d272abe4f6391 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeShutdownAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeShutdownAllocationDecider.java @@ -92,8 +92,12 @@ public Decision shouldAutoExpandToNode(IndexMetadata indexMetadata, DiscoveryNod switch (thisNodeShutdownMetadata.getType()) { case RESTART: - return allocation.decision(Decision.YES, NAME, "node [%s] is preparing to restart, but will remain in the cluster", - node.getId()); + return allocation.decision( + Decision.NO, + NAME, + "node [%s] is preparing to restart, auto-expansion waiting until it is complete", + node.getId() + ); case REMOVE: return allocation.decision(Decision.NO, NAME, "node [%s] is preparing for removal from the cluster", node.getId()); default: diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/NodeShutdownAllocationDeciderTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/NodeShutdownAllocationDeciderTests.java index d95cfc7aec9d1..c74d5ee68e4ea 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/NodeShutdownAllocationDeciderTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/NodeShutdownAllocationDeciderTests.java @@ -146,7 +146,7 @@ public void testShardsCannotRemainOnRemovingNode() { ); } - public void testCanAutoExpandToRestartingNode() { + public void testCannotAutoExpandToRestartingNode() { ClusterState state = prepareState( service.reroute(ClusterState.EMPTY_STATE, "initial state"), SingleNodeShutdownMetadata.Type.RESTART @@ -155,10 +155,10 @@ public void testCanAutoExpandToRestartingNode() { allocation.debugDecision(true); Decision decision = decider.shouldAutoExpandToNode(indexMetadata, DATA_NODE, allocation); - assertThat(decision.type(), equalTo(Decision.Type.YES)); + assertThat(decision.type(), equalTo(Decision.Type.NO)); assertThat( decision.getExplanation(), - equalTo("node [" + DATA_NODE.getId() + "] is preparing to restart, but will remain in the cluster") + equalTo("node [" + DATA_NODE.getId() + "] is preparing to restart, auto-expansion waiting until it is complete") ); } From e804d7478695c8ecc5508351bc187bd1c93d0c49 Mon Sep 17 00:00:00 2001 From: Gordon Brown Date: Wed, 28 Apr 2021 16:31:40 -0600 Subject: [PATCH 13/13] Fix compilation after merge --- .../allocation/decider/NodeShutdownAllocationDecider.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeShutdownAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeShutdownAllocationDecider.java index d272abe4f6391..feaff722e1aa8 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeShutdownAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/NodeShutdownAllocationDecider.java @@ -115,11 +115,11 @@ public Decision shouldAutoExpandToNode(IndexMetadata indexMetadata, DiscoveryNod @Nullable private static SingleNodeShutdownMetadata getNodeShutdownMetadata(Metadata metadata, String nodeId) { NodesShutdownMetadata nodesShutdownMetadata = metadata.custom(NodesShutdownMetadata.TYPE); - if (nodesShutdownMetadata == null || nodesShutdownMetadata.getAllNodeMetdataMap() == null) { + if (nodesShutdownMetadata == null || nodesShutdownMetadata.getAllNodeMetadataMap() == null) { // There are no nodes in the process of shutting down, return null. return null; } - return nodesShutdownMetadata.getAllNodeMetdataMap().get(nodeId); + return nodesShutdownMetadata.getAllNodeMetadataMap().get(nodeId); } }