Skip to content

Commit b2a9328

Browse files
authored
Add target_node_name for REPLACE shutdown type (#77151)
This commit extends the `REPLACE` shutdown type to support a new `target_node_name` field, which will control the node used to replace the one that is to shut down. However, only the data layer is present as of this PR. Also adjusts the serialization/deserialization for the REPLACE type to handle mixed-version clusters.
1 parent 5b41c03 commit b2a9328

File tree

16 files changed

+235
-40
lines changed

16 files changed

+235
-40
lines changed

server/src/main/java/org/elasticsearch/cluster/metadata/SingleNodeShutdownMetadata.java

Lines changed: 59 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88

99
package org.elasticsearch.cluster.metadata;
1010

11+
import org.apache.logging.log4j.message.ParameterizedMessage;
12+
import org.elasticsearch.Version;
1113
import org.elasticsearch.cluster.AbstractDiffable;
1214
import org.elasticsearch.cluster.Diffable;
1315
import org.elasticsearch.common.io.stream.StreamInput;
@@ -33,13 +35,16 @@ public class SingleNodeShutdownMetadata extends AbstractDiffable<SingleNodeShutd
3335
ToXContentObject,
3436
Diffable<SingleNodeShutdownMetadata> {
3537

38+
public static final Version REPLACE_SHUTDOWN_TYPE_ADDED_VERSION = Version.V_8_0_0;
39+
3640
public static final ParseField NODE_ID_FIELD = new ParseField("node_id");
3741
public static final ParseField TYPE_FIELD = new ParseField("type");
3842
public static final ParseField REASON_FIELD = new ParseField("reason");
3943
public static final String STARTED_AT_READABLE_FIELD = "shutdown_started";
4044
public static final ParseField STARTED_AT_MILLIS_FIELD = new ParseField(STARTED_AT_READABLE_FIELD + "millis");
4145
public static final ParseField ALLOCATION_DELAY_FIELD = new ParseField("allocation_delay");
4246
public static final ParseField NODE_SEEN_FIELD = new ParseField("node_seen");
47+
public static final ParseField TARGET_NODE_NAME_FIELD = new ParseField("target_node_name");
4348

4449
public static final ConstructingObjectParser<SingleNodeShutdownMetadata, Void> PARSER = new ConstructingObjectParser<>(
4550
"node_shutdown_info",
@@ -49,7 +54,8 @@ public class SingleNodeShutdownMetadata extends AbstractDiffable<SingleNodeShutd
4954
(String) a[2],
5055
(long) a[3],
5156
(boolean) a[4],
52-
(TimeValue) a[5]
57+
(TimeValue) a[5],
58+
(String) a[6]
5359
)
5460
);
5561

@@ -64,6 +70,7 @@ public class SingleNodeShutdownMetadata extends AbstractDiffable<SingleNodeShutd
6470
(p, c) -> TimeValue.parseTimeValue(p.textOrNull(), ALLOCATION_DELAY_FIELD.getPreferredName()), ALLOCATION_DELAY_FIELD,
6571
ObjectParser.ValueType.STRING_OR_NULL
6672
);
73+
PARSER.declareString(ConstructingObjectParser.optionalConstructorArg(), TARGET_NODE_NAME_FIELD);
6774
}
6875

6976
public static SingleNodeShutdownMetadata parse(XContentParser parser) {
@@ -78,6 +85,7 @@ public static SingleNodeShutdownMetadata parse(XContentParser parser) {
7885
private final long startedAtMillis;
7986
private final boolean nodeSeen;
8087
@Nullable private final TimeValue allocationDelay;
88+
@Nullable private final String targetNodeName;
8189

8290
/**
8391
* @param nodeId The node ID that this shutdown metadata refers to.
@@ -91,7 +99,8 @@ private SingleNodeShutdownMetadata(
9199
String reason,
92100
long startedAtMillis,
93101
boolean nodeSeen,
94-
@Nullable TimeValue allocationDelay
102+
@Nullable TimeValue allocationDelay,
103+
@Nullable String targetNodeName
95104
) {
96105
this.nodeId = Objects.requireNonNull(nodeId, "node ID must not be null");
97106
this.type = Objects.requireNonNull(type, "shutdown type must not be null");
@@ -102,6 +111,13 @@ private SingleNodeShutdownMetadata(
102111
throw new IllegalArgumentException("shard allocation delay is only valid for RESTART-type shutdowns");
103112
}
104113
this.allocationDelay = allocationDelay;
114+
if (targetNodeName != null && type != Type.REPLACE) {
115+
throw new IllegalArgumentException(new ParameterizedMessage("target node name is only valid for REPLACE type shutdowns, " +
116+
"but was given type [{}] and target node name [{}]", type, targetNodeName).getFormattedMessage());
117+
} else if (targetNodeName == null && type == Type.REPLACE) {
118+
throw new IllegalArgumentException("target node name is required for REPLACE type shutdowns");
119+
}
120+
this.targetNodeName = targetNodeName;
105121
}
106122

107123
public SingleNodeShutdownMetadata(StreamInput in) throws IOException {
@@ -111,6 +127,11 @@ public SingleNodeShutdownMetadata(StreamInput in) throws IOException {
111127
this.startedAtMillis = in.readVLong();
112128
this.nodeSeen = in.readBoolean();
113129
this.allocationDelay = in.readOptionalTimeValue();
130+
if (in.getVersion().onOrAfter(REPLACE_SHUTDOWN_TYPE_ADDED_VERSION)) {
131+
this.targetNodeName = in.readOptionalString();
132+
} else {
133+
this.targetNodeName = null;
134+
}
114135
}
115136

116137
/**
@@ -148,6 +169,13 @@ public boolean getNodeSeen() {
148169
return nodeSeen;
149170
}
150171

172+
/**
173+
* @return The name of the node to be used as a replacement for this node, or null.
174+
*/
175+
public String getTargetNodeName() {
176+
return targetNodeName;
177+
}
178+
151179
/**
152180
* @return The amount of time shard reallocation should be delayed for shards on this node, so that they will not be automatically
153181
* reassigned while the node is restarting. Will be {@code null} for non-restart shutdowns.
@@ -165,11 +193,18 @@ public TimeValue getAllocationDelay() {
165193
@Override
166194
public void writeTo(StreamOutput out) throws IOException {
167195
out.writeString(nodeId);
168-
out.writeEnum(type);
196+
if (out.getVersion().before(REPLACE_SHUTDOWN_TYPE_ADDED_VERSION) && this.type == SingleNodeShutdownMetadata.Type.REPLACE) {
197+
out.writeEnum(SingleNodeShutdownMetadata.Type.REMOVE);
198+
} else {
199+
out.writeEnum(type);
200+
}
169201
out.writeString(reason);
170202
out.writeVLong(startedAtMillis);
171203
out.writeBoolean(nodeSeen);
172204
out.writeOptionalTimeValue(allocationDelay);
205+
if (out.getVersion().onOrAfter(REPLACE_SHUTDOWN_TYPE_ADDED_VERSION)) {
206+
out.writeOptionalString(targetNodeName);
207+
}
173208
}
174209

175210
@Override
@@ -184,6 +219,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
184219
if (allocationDelay != null) {
185220
builder.field(ALLOCATION_DELAY_FIELD.getPreferredName(), allocationDelay.getStringRep());
186221
}
222+
if (targetNodeName != null) {
223+
builder.field(TARGET_NODE_NAME_FIELD.getPreferredName(), targetNodeName);
224+
}
187225
}
188226
builder.endObject();
189227

@@ -200,7 +238,8 @@ && getNodeId().equals(that.getNodeId())
200238
&& getType() == that.getType()
201239
&& getReason().equals(that.getReason())
202240
&& getNodeSeen() == that.getNodeSeen()
203-
&& Objects.equals(allocationDelay, that.allocationDelay);
241+
&& Objects.equals(allocationDelay, that.allocationDelay)
242+
&& Objects.equals(targetNodeName, that.targetNodeName);
204243
}
205244

206245
@Override
@@ -211,7 +250,8 @@ public int hashCode() {
211250
getReason(),
212251
getStartedAtMillis(),
213252
getNodeSeen(),
214-
allocationDelay
253+
allocationDelay,
254+
targetNodeName
215255
);
216256
}
217257

@@ -228,7 +268,8 @@ public static Builder builder(SingleNodeShutdownMetadata original) {
228268
.setType(original.getType())
229269
.setReason(original.getReason())
230270
.setStartedAtMillis(original.getStartedAtMillis())
231-
.setNodeSeen(original.getNodeSeen());
271+
.setNodeSeen(original.getNodeSeen())
272+
.setTargetNodeName(original.getTargetNodeName());
232273
}
233274

234275
public static class Builder {
@@ -238,6 +279,7 @@ public static class Builder {
238279
private long startedAtMillis = -1;
239280
private boolean nodeSeen = false;
240281
private TimeValue allocationDelay;
282+
private String targetNodeName;
241283

242284
private Builder() {}
243285

@@ -295,6 +337,15 @@ public Builder setAllocationDelay(TimeValue allocationDelay) {
295337
return this;
296338
}
297339

340+
/**
341+
* @param targetNodeName The name of the node which should be used to replcae this one. Only valid if the shutdown type is REPLACE.
342+
* @return This builder.
343+
*/
344+
public Builder setTargetNodeName(String targetNodeName) {
345+
this.targetNodeName = targetNodeName;
346+
return this;
347+
}
348+
298349
public SingleNodeShutdownMetadata build() {
299350
if (startedAtMillis == -1) {
300351
throw new IllegalArgumentException("start timestamp must be set");
@@ -306,7 +357,8 @@ public SingleNodeShutdownMetadata build() {
306357
reason,
307358
startedAtMillis,
308359
nodeSeen,
309-
allocationDelay
360+
allocationDelay,
361+
targetNodeName
310362
);
311363
}
312364
}

server/src/test/java/org/elasticsearch/cluster/metadata/NodesShutdownMetadataTests.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,8 @@ private SingleNodeShutdownMetadata randomNodeShutdownInfo() {
8787
.setStartedAtMillis(randomNonNegativeLong());
8888
if (type.equals(SingleNodeShutdownMetadata.Type.RESTART) && randomBoolean()) {
8989
builder.setAllocationDelay(TimeValue.parseTimeValue(randomTimeValue(), this.getTestName()));
90+
} else if (type.equals(SingleNodeShutdownMetadata.Type.REPLACE)) {
91+
builder.setTargetNodeName(randomAlphaOfLengthBetween(5,10));
9092
}
9193
return builder.setNodeSeen(randomBoolean())
9294
.build();

server/src/test/java/org/elasticsearch/cluster/routing/UnassignedInfoTests.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -350,11 +350,14 @@ public void testRemainingDelayCalculationsWithUnrelatedShutdowns() throws Except
350350
Map<String, SingleNodeShutdownMetadata> shutdowns = new HashMap<>();
351351
int numberOfShutdowns = randomIntBetween(1,15);
352352
for (int i = 0; i <= numberOfShutdowns; i++) {
353+
final SingleNodeShutdownMetadata.Type type = randomFrom(EnumSet.allOf(SingleNodeShutdownMetadata.Type.class));
354+
final String targetNodeName = type == SingleNodeShutdownMetadata.Type.REPLACE ? randomAlphaOfLengthBetween(10, 20) : null;
353355
SingleNodeShutdownMetadata shutdown = SingleNodeShutdownMetadata.builder()
354356
.setNodeId(randomValueOtherThan(lastNodeId, () -> randomAlphaOfLengthBetween(5,10)))
355357
.setReason(this.getTestName())
356358
.setStartedAtMillis(randomNonNegativeLong())
357-
.setType(randomFrom(EnumSet.allOf(SingleNodeShutdownMetadata.Type.class)))
359+
.setType(type)
360+
.setTargetNodeName(targetNodeName)
358361
.build();
359362
shutdowns.put(shutdown.getNodeId(), shutdown);
360363
}

server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/NodeShutdownAllocationDeciderTests.java

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@
3535
import java.util.Arrays;
3636
import java.util.Collections;
3737
import java.util.HashMap;
38-
import java.util.Map;
3938

4039
import static org.hamcrest.Matchers.equalTo;
4140

@@ -176,13 +175,13 @@ public void testCannotAutoExpandToRemovingNode() {
176175
}
177176

178177
private ClusterState prepareState(ClusterState initialState, SingleNodeShutdownMetadata.Type shutdownType) {
179-
Map<String, SingleNodeShutdownMetadata> nodesShutdownInfo = new HashMap<>();
180-
178+
final String targetNodeName = shutdownType == SingleNodeShutdownMetadata.Type.REPLACE ? randomAlphaOfLengthBetween(10, 20) : null;
181179
final SingleNodeShutdownMetadata nodeShutdownMetadata = SingleNodeShutdownMetadata.builder()
182180
.setNodeId(DATA_NODE.getId())
183181
.setType(shutdownType)
184182
.setReason(this.getTestName())
185183
.setStartedAtMillis(1L)
184+
.setTargetNodeName(targetNodeName)
186185
.build();
187186
NodesShutdownMetadata nodesShutdownMetadata = new NodesShutdownMetadata(new HashMap<>()).putSingleNodeMetadata(
188187
nodeShutdownMetadata

x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/CheckShrinkReadyStepTests.java

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -356,15 +356,21 @@ public void testStepCompletableIfAllShardsActive() {
356356
ImmutableOpenMap.Builder<String, IndexMetadata> indices = ImmutableOpenMap.<String, IndexMetadata> builder().fPut(index.getName(),
357357
indexMetadata);
358358

359+
final SingleNodeShutdownMetadata.Type type = randomFrom(
360+
SingleNodeShutdownMetadata.Type.REMOVE,
361+
SingleNodeShutdownMetadata.Type.REPLACE
362+
);
363+
final String targetNodeName = type == SingleNodeShutdownMetadata.Type.REPLACE ? randomAlphaOfLengthBetween(10, 20) : null;
359364
ClusterState clusterState = ClusterState.builder(ClusterState.EMPTY_STATE)
360365
.metadata(Metadata.builder()
361366
.indices(indices.build())
362367
.putCustom(NodesShutdownMetadata.TYPE, new NodesShutdownMetadata(Collections.singletonMap("node1",
363368
SingleNodeShutdownMetadata.builder()
364-
.setType(randomFrom(SingleNodeShutdownMetadata.Type.REMOVE, SingleNodeShutdownMetadata.Type.REPLACE))
369+
.setType(type)
365370
.setStartedAtMillis(randomNonNegativeLong())
366371
.setReason("test")
367372
.setNodeId("node1")
373+
.setTargetNodeName(targetNodeName)
368374
.build()))))
369375
.nodes(DiscoveryNodes.builder()
370376
.add(DiscoveryNode.createLocal(Settings.builder().put(node1Settings.build())
@@ -407,15 +413,21 @@ public void testStepBecomesUncompletable() {
407413
ImmutableOpenMap.Builder<String, IndexMetadata> indices = ImmutableOpenMap.<String, IndexMetadata> builder().fPut(index.getName(),
408414
indexMetadata);
409415

416+
final SingleNodeShutdownMetadata.Type type = randomFrom(
417+
SingleNodeShutdownMetadata.Type.REMOVE,
418+
SingleNodeShutdownMetadata.Type.REPLACE
419+
);
420+
final String targetNodeName = type == SingleNodeShutdownMetadata.Type.REPLACE ? randomAlphaOfLengthBetween(10, 20) : null;
410421
ClusterState clusterState = ClusterState.builder(ClusterState.EMPTY_STATE)
411422
.metadata(Metadata.builder()
412423
.indices(indices.build())
413424
.putCustom(NodesShutdownMetadata.TYPE, new NodesShutdownMetadata(Collections.singletonMap("node1",
414425
SingleNodeShutdownMetadata.builder()
415-
.setType(randomFrom(SingleNodeShutdownMetadata.Type.REMOVE, SingleNodeShutdownMetadata.Type.REPLACE))
426+
.setType(type)
416427
.setStartedAtMillis(randomNonNegativeLong())
417428
.setReason("test")
418429
.setNodeId("node1")
430+
.setTargetNodeName(targetNodeName)
419431
.build()))))
420432
.nodes(DiscoveryNodes.builder()
421433
.add(DiscoveryNode.createLocal(Settings.builder().put(node1Settings.build())

x-pack/plugin/ilm/src/test/java/org/elasticsearch/xpack/ilm/IndexLifecycleServiceTests.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -580,14 +580,20 @@ public void testIndicesOnShuttingDownNodesInDangerousStep() {
580580
IndexLifecycleService.indicesOnShuttingDownNodesInDangerousStep(state, "shutdown_node"),
581581
equalTo(Collections.emptySet()));
582582

583+
final SingleNodeShutdownMetadata.Type type = randomFrom(
584+
SingleNodeShutdownMetadata.Type.REMOVE,
585+
SingleNodeShutdownMetadata.Type.REPLACE
586+
);
587+
final String targetNodeName = type == SingleNodeShutdownMetadata.Type.REPLACE ? randomAlphaOfLengthBetween(10, 20) : null;
583588
state = ClusterState.builder(state)
584589
.metadata(Metadata.builder(state.metadata())
585590
.putCustom(NodesShutdownMetadata.TYPE, new NodesShutdownMetadata(Collections.singletonMap("shutdown_node",
586591
SingleNodeShutdownMetadata.builder()
587592
.setNodeId("shutdown_node")
588593
.setReason("shut down for test")
589594
.setStartedAtMillis(randomNonNegativeLong())
590-
.setType(randomFrom(SingleNodeShutdownMetadata.Type.REMOVE, SingleNodeShutdownMetadata.Type.REPLACE))
595+
.setType(type)
596+
.setTargetNodeName(targetNodeName)
591597
.build())))
592598
.build())
593599
.build();

x-pack/plugin/ml/src/internalClusterTest/java/org/elasticsearch/xpack/ml/integration/MlNodeShutdownIT.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -80,8 +80,8 @@ public void testJobsVacateShuttingDownNode() throws Exception {
8080
nodeIdToShutdown.get(),
8181
randomFrom(SingleNodeShutdownMetadata.Type.values()),
8282
"just testing",
83-
null
84-
)
83+
null,
84+
null)
8585
).actionGet();
8686

8787
// Wait for the desired end state of all 6 jobs running on nodes that are not shutting down.
@@ -150,8 +150,8 @@ public void testCloseJobVacatingShuttingDownNode() throws Exception {
150150
nodeIdToShutdown.get(),
151151
randomFrom(SingleNodeShutdownMetadata.Type.values()),
152152
"just testing",
153-
null
154-
)
153+
null,
154+
null)
155155
)
156156
.actionGet();
157157

0 commit comments

Comments
 (0)