-
Notifications
You must be signed in to change notification settings - Fork 25.6k
Make ILM aware of node shutdown #73690
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
980a771
b186c9e
38a8b6c
888eb44
2d2b80c
bf61186
d0d017f
35833e0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,34 @@ | ||
| /* | ||
| * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
| * or more contributor license agreements. Licensed under the Elastic License | ||
| * 2.0 and the Server Side Public License, v 1; you may not use this file except | ||
| * in compliance with, at your election, the Elastic License 2.0 or the Server | ||
| * Side Public License, v 1. | ||
| */ | ||
|
|
||
| package org.elasticsearch.plugins; | ||
|
|
||
| import org.elasticsearch.cluster.metadata.SingleNodeShutdownMetadata; | ||
|
|
||
| import java.util.Collection; | ||
|
|
||
| /** | ||
| * A {@link ShutdownAwarePlugin} is a plugin that can be made aware of a shutdown. It comprises two | ||
| * parts, one part used for telling plugins that a set of nodes are going to be shut down | ||
| * ({@link #signalShutdown(Collection)}), the other for retrieving the status of those plugins | ||
| * as to whether it is safe to shut down ({@link #safeToShutdown(String, SingleNodeShutdownMetadata.Type)} | ||
| */ | ||
| public interface ShutdownAwarePlugin { | ||
|
|
||
| /** | ||
| * Whether the plugin is considered safe to shut down. This method is called when the status of | ||
| * a shutdown is retrieved via the API, and it is only called on the master node. | ||
| */ | ||
| boolean safeToShutdown(String nodeId, SingleNodeShutdownMetadata.Type shutdownType); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please consider adding an extra |
||
|
|
||
| /** | ||
| * A trigger to notify the plugin that a shutdown for the nodes has been triggered. This method | ||
| * will be called on every node for each cluster state, so it should return quickly. | ||
| */ | ||
| void signalShutdown(Collection<String> shutdownNodeIds); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please consider adding an extra |
||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,108 @@ | ||
| /* | ||
| * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
| * or more contributor license agreements. Licensed under the Elastic License | ||
| * 2.0 and the Server Side Public License, v 1; you may not use this file except | ||
| * in compliance with, at your election, the Elastic License 2.0 or the Server | ||
| * Side Public License, v 1. | ||
| */ | ||
|
|
||
| /* | ||
| * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
| * or more contributor license agreements. Licensed under the Elastic License | ||
| * 2.0; you may not use this file except in compliance with the Elastic License | ||
| * 2.0. | ||
| */ | ||
|
|
||
| package org.elasticsearch.shutdown; | ||
|
|
||
| import org.apache.logging.log4j.LogManager; | ||
| import org.apache.logging.log4j.Logger; | ||
| import org.apache.logging.log4j.message.ParameterizedMessage; | ||
| import org.elasticsearch.cluster.ClusterChangedEvent; | ||
| import org.elasticsearch.cluster.ClusterState; | ||
| import org.elasticsearch.cluster.ClusterStateListener; | ||
| import org.elasticsearch.cluster.metadata.NodesShutdownMetadata; | ||
| import org.elasticsearch.cluster.metadata.SingleNodeShutdownMetadata; | ||
| import org.elasticsearch.common.Nullable; | ||
| import org.elasticsearch.plugins.ShutdownAwarePlugin; | ||
|
|
||
| import java.util.Collections; | ||
| import java.util.List; | ||
| import java.util.Map; | ||
| import java.util.Set; | ||
| import java.util.stream.Collectors; | ||
|
|
||
| /** | ||
| * The {@link PluginShutdownService} is used for the node shutdown infrastructure to signal to | ||
| * plugins that a shutdown is occurring, and to check whether it is safe to shut down. | ||
| */ | ||
| public class PluginShutdownService implements ClusterStateListener { | ||
|
|
||
| private static final Logger logger = LogManager.getLogger(PluginShutdownService.class); | ||
| public List<ShutdownAwarePlugin> plugins; | ||
|
|
||
| public PluginShutdownService(@Nullable List<ShutdownAwarePlugin> plugins) { | ||
| this.plugins = plugins == null ? Collections.emptyList() : plugins; | ||
| } | ||
|
|
||
| /** | ||
| * Return all nodes shutting down from the given cluster state | ||
| */ | ||
| public static Set<String> shutdownNodes(final ClusterState clusterState) { | ||
| return NodesShutdownMetadata.getShutdowns(clusterState) | ||
| .map(NodesShutdownMetadata::getAllNodeMetadataMap) | ||
| .map(Map::keySet) | ||
| .orElse(Collections.emptySet()); | ||
| } | ||
|
|
||
| /** | ||
| * Return all nodes shutting down with the given shutdown type from the given cluster state | ||
| */ | ||
| public static Set<String> shutdownTypeNodes(final ClusterState clusterState, final SingleNodeShutdownMetadata.Type shutdownType) { | ||
| return NodesShutdownMetadata.getShutdowns(clusterState) | ||
| .map(NodesShutdownMetadata::getAllNodeMetadataMap) | ||
| .map(m -> m.entrySet().stream() | ||
| .filter(e -> e.getValue().getType() == shutdownType) | ||
| .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue))) | ||
| .map(Map::keySet) | ||
| .orElse(Collections.emptySet()); | ||
| } | ||
|
|
||
| /** | ||
| * Check with registered plugins whether the shutdown is safe for the given node id and type | ||
| */ | ||
| public boolean readyToShutdown(String nodeId, SingleNodeShutdownMetadata.Type shutdownType) { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wonder if it's worth having this method take an extra
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We discussed adding these today, but since there's no current user, we're going to keep the cluster state out of the interface for now, and revisit it when ML (or a different plugin) has an implementation where they need these There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK cool. I am going to work on the ML PR soon, so can add the arguments to that if you don't have a fundamental objection.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
If possible, I think I'd prefer to keep them out of the interface. Especially for the Would that work for you? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since both methods are implemented by the same class, it doesn't really help to just add the current cluster state to one of them. I can instead add a reference to the
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That would be my preference then, as long as that isn't too distasteful of a solution for you. |
||
| // TODO: consider adding debugging information (a message about why not?) | ||
| // TODO: consider adding more fine-grained status rather than true/false | ||
| for (ShutdownAwarePlugin plugin : plugins) { | ||
| try { | ||
| if (plugin.safeToShutdown(nodeId, shutdownType) == false) { | ||
| logger.trace("shutdown aware plugin [{}] is not yet ready for shutdown", plugin); | ||
| return false; | ||
| } | ||
| } catch (Exception e) { | ||
| logger.warn("uncaught exception when retrieving whether plugin is ready for node shutdown", e); | ||
| } | ||
| } | ||
| return true; | ||
| } | ||
|
|
||
| /** | ||
| * Signal to plugins the nodes that are currently shutting down | ||
| */ | ||
| public void signalShutdown(final ClusterState state) { | ||
| Set<String> shutdownNodes = shutdownNodes(state); | ||
| for (ShutdownAwarePlugin plugin : plugins) { | ||
| try { | ||
| plugin.signalShutdown(shutdownNodes); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Similarly, it would be nice if |
||
| } catch (Exception e) { | ||
| logger.warn(new ParameterizedMessage("uncaught exception when notifying plugins of nodes {} shutdown", shutdownNodes), e); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| @Override | ||
| public void clusterChanged(ClusterChangedEvent event) { | ||
| signalShutdown(event.state()); | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,45 @@ | ||
| /* | ||
| * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
| * or more contributor license agreements. Licensed under the Elastic License | ||
| * 2.0 and the Server Side Public License, v 1; you may not use this file except | ||
| * in compliance with, at your election, the Elastic License 2.0 or the Server | ||
| * Side Public License, v 1. | ||
| */ | ||
|
|
||
| package org.elasticsearch.cluster.metadata; | ||
|
|
||
| import org.elasticsearch.test.ESTestCase; | ||
|
|
||
| public class SingleNodeShutdownMetadataTests extends ESTestCase { | ||
| public void testStatusComination() { | ||
| SingleNodeShutdownMetadata.Status status; | ||
|
|
||
| status = SingleNodeShutdownMetadata.Status.combine(SingleNodeShutdownMetadata.Status.NOT_STARTED, | ||
| SingleNodeShutdownMetadata.Status.IN_PROGRESS, | ||
| SingleNodeShutdownMetadata.Status.STALLED); | ||
| assertEquals(status, SingleNodeShutdownMetadata.Status.STALLED); | ||
|
|
||
| status = SingleNodeShutdownMetadata.Status.combine(SingleNodeShutdownMetadata.Status.NOT_STARTED, | ||
| SingleNodeShutdownMetadata.Status.IN_PROGRESS, | ||
| SingleNodeShutdownMetadata.Status.NOT_STARTED); | ||
| assertEquals(status, SingleNodeShutdownMetadata.Status.IN_PROGRESS); | ||
|
|
||
| status = SingleNodeShutdownMetadata.Status.combine(SingleNodeShutdownMetadata.Status.NOT_STARTED, | ||
| SingleNodeShutdownMetadata.Status.NOT_STARTED, | ||
| SingleNodeShutdownMetadata.Status.NOT_STARTED); | ||
| assertEquals(status, SingleNodeShutdownMetadata.Status.NOT_STARTED); | ||
|
|
||
| status = SingleNodeShutdownMetadata.Status.combine(SingleNodeShutdownMetadata.Status.IN_PROGRESS, | ||
| SingleNodeShutdownMetadata.Status.IN_PROGRESS, | ||
| SingleNodeShutdownMetadata.Status.COMPLETE); | ||
| assertEquals(status, SingleNodeShutdownMetadata.Status.IN_PROGRESS); | ||
|
|
||
| status = SingleNodeShutdownMetadata.Status.combine(SingleNodeShutdownMetadata.Status.COMPLETE, | ||
| SingleNodeShutdownMetadata.Status.COMPLETE, | ||
| SingleNodeShutdownMetadata.Status.COMPLETE); | ||
| assertEquals(status, SingleNodeShutdownMetadata.Status.COMPLETE); | ||
|
|
||
| status = SingleNodeShutdownMetadata.Status.combine(); | ||
| assertEquals(status, SingleNodeShutdownMetadata.Status.COMPLETE); | ||
| } | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I really want to be a functional programming dork and tell you to use
reducehere, but I think this is actually clearer than what you'd have to do to make reduce work.