diff --git a/docs/reference/migration/migrate_7_0/cluster.asciidoc b/docs/reference/migration/migrate_7_0/cluster.asciidoc index 7343154175b22..732270706ff3d 100644 --- a/docs/reference/migration/migrate_7_0/cluster.asciidoc +++ b/docs/reference/migration/migrate_7_0/cluster.asciidoc @@ -24,4 +24,13 @@ These shard preferences are removed in favour of the `_prefer_nodes` and `_only_ Clusters now have soft limits on the total number of open shards in the cluster based on the number of nodes and the `cluster.max_shards_per_node` cluster setting, to prevent accidental operations that would destabilize the cluster. -More information can be found in the <>. \ No newline at end of file +More information can be found in the <>. + +[float] +==== Discovery configuration is required in production +Production deployments of Elasticsearch now require at least one of the following settings +to be specified in the `elasticsearch.yml` configuration file: + +- `discovery.zen.ping.unicast.hosts` +- `discovery.zen.hosts_provider` +- `cluster.initial_master_nodes` diff --git a/docs/reference/setup/bootstrap-checks.asciidoc b/docs/reference/setup/bootstrap-checks.asciidoc index 03f98fd38acf0..9cf3620636a41 100644 --- a/docs/reference/setup/bootstrap-checks.asciidoc +++ b/docs/reference/setup/bootstrap-checks.asciidoc @@ -236,3 +236,21 @@ versions of the HotSpot JVM. The all permission check ensures that the security policy used during bootstrap does not grant the `java.security.AllPermission` to Elasticsearch. Running with the all permission granted is equivalent to disabling the security manager. + +=== Discovery configuration check + +By default, when Elasticsearch first starts up it will try and discover other +nodes running on the same host. If no elected master can be discovered within a +few seconds then Elasticsearch will form a cluster that includes any other +nodes that were discovered. It is useful to be able to form this cluster +without any extra configuration in development mode, but this is unsuitable for +production because it's possible to form multiple clusters and lose data as a +result. + +This bootstrap check ensures that discovery is not running with the default +configuration. It can be satisfied by setting at least one of the following +properties: + +- `discovery.zen.ping.unicast.hosts` +- `discovery.zen.hosts_provider` +- `cluster.initial_master_nodes` diff --git a/qa/unconfigured-node-name/build.gradle b/qa/unconfigured-node-name/build.gradle index 3e41118964799..4df768b57a4dc 100644 --- a/qa/unconfigured-node-name/build.gradle +++ b/qa/unconfigured-node-name/build.gradle @@ -22,7 +22,7 @@ apply plugin: 'elasticsearch.rest-test' integTestCluster { setting 'node.name', null - // TODO: Run this using zen2 + // TODO: Run this using zen2, with no discovery configuration at all, demonstrating that the node forms a cluster on its own without help setting 'discovery.type', 'zen' } diff --git a/server/src/main/java/org/elasticsearch/bootstrap/BootstrapChecks.java b/server/src/main/java/org/elasticsearch/bootstrap/BootstrapChecks.java index 0c433192ad6c0..22ee36039dd28 100644 --- a/server/src/main/java/org/elasticsearch/bootstrap/BootstrapChecks.java +++ b/server/src/main/java/org/elasticsearch/bootstrap/BootstrapChecks.java @@ -23,8 +23,10 @@ import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.message.ParameterizedMessage; import org.apache.lucene.util.Constants; +import org.elasticsearch.cluster.coordination.ClusterBootstrapService; import org.elasticsearch.common.SuppressForbidden; import org.elasticsearch.common.io.PathUtils; +import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.transport.BoundTransportAddress; import org.elasticsearch.common.transport.TransportAddress; import org.elasticsearch.discovery.DiscoveryModule; @@ -46,6 +48,12 @@ import java.util.function.Predicate; import java.util.regex.Matcher; import java.util.regex.Pattern; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static org.elasticsearch.cluster.coordination.ClusterBootstrapService.INITIAL_MASTER_NODES_SETTING; +import static org.elasticsearch.discovery.DiscoveryModule.DISCOVERY_HOSTS_PROVIDER_SETTING; +import static org.elasticsearch.discovery.zen.SettingsBasedHostsProvider.DISCOVERY_ZEN_PING_UNICAST_HOSTS_SETTING; /** * We enforce bootstrap checks once a node has the transport protocol bound to a non-loopback interface or if the system property {@code @@ -207,6 +215,7 @@ static List checks() { checks.add(new EarlyAccessCheck()); checks.add(new G1GCCheck()); checks.add(new AllPermissionCheck()); + checks.add(new DiscoveryConfiguredCheck()); return Collections.unmodifiableList(checks); } @@ -713,4 +722,21 @@ boolean isAllPermissionGranted() { } + static class DiscoveryConfiguredCheck implements BootstrapCheck { + @Override + public BootstrapCheckResult check(BootstrapContext context) { + if (DiscoveryModule.ZEN2_DISCOVERY_TYPE.equals(DiscoveryModule.DISCOVERY_TYPE_SETTING.get(context.settings)) == false) { + return BootstrapCheckResult.success(); + } + if (ClusterBootstrapService.discoveryIsConfigured(context.settings)) { + return BootstrapCheckResult.success(); + } + + return BootstrapCheckResult.failure(String.format( + Locale.ROOT, + "the default discovery settings are unsuitable for production use; at least one of [%s] must be configured", + Stream.of(DISCOVERY_ZEN_PING_UNICAST_HOSTS_SETTING, DISCOVERY_HOSTS_PROVIDER_SETTING, INITIAL_MASTER_NODES_SETTING) + .map(Setting::getKey).collect(Collectors.joining(", ")))); + } + } } diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/ClusterBootstrapService.java b/server/src/main/java/org/elasticsearch/cluster/coordination/ClusterBootstrapService.java index e6e31d6d773ab..b39730c4eae50 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/ClusterBootstrapService.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/ClusterBootstrapService.java @@ -29,6 +29,7 @@ import org.elasticsearch.action.admin.cluster.bootstrap.GetDiscoveredNodesRequest; import org.elasticsearch.action.admin.cluster.bootstrap.GetDiscoveredNodesResponse; import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.common.Nullable; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.settings.Setting.Property; @@ -44,6 +45,10 @@ import java.util.Collections; import java.util.List; import java.util.function.Function; +import java.util.stream.Stream; + +import static org.elasticsearch.discovery.DiscoveryModule.DISCOVERY_HOSTS_PROVIDER_SETTING; +import static org.elasticsearch.discovery.zen.SettingsBasedHostsProvider.DISCOVERY_ZEN_PING_UNICAST_HOSTS_SETTING; public class ClusterBootstrapService { @@ -57,22 +62,82 @@ public class ClusterBootstrapService { public static final Setting> INITIAL_MASTER_NODES_SETTING = Setting.listSetting("cluster.initial_master_nodes", Collections.emptyList(), Function.identity(), Property.NodeScope); + public static final Setting UNCONFIGURED_BOOTSTRAP_TIMEOUT_SETTING = + Setting.timeSetting("discovery.unconfigured_bootstrap_timeout", + TimeValue.timeValueSeconds(3), TimeValue.timeValueMillis(1), Property.NodeScope); + private final int initialMasterNodeCount; private final List initialMasterNodes; + @Nullable + private final TimeValue unconfiguredBootstrapTimeout; private final TransportService transportService; private volatile boolean running; public ClusterBootstrapService(Settings settings, TransportService transportService) { initialMasterNodeCount = INITIAL_MASTER_NODE_COUNT_SETTING.get(settings); initialMasterNodes = INITIAL_MASTER_NODES_SETTING.get(settings); + unconfiguredBootstrapTimeout = discoveryIsConfigured(settings) ? null : UNCONFIGURED_BOOTSTRAP_TIMEOUT_SETTING.get(settings); this.transportService = transportService; } + public static boolean discoveryIsConfigured(Settings settings) { + return Stream.of(DISCOVERY_HOSTS_PROVIDER_SETTING, DISCOVERY_ZEN_PING_UNICAST_HOSTS_SETTING, + INITIAL_MASTER_NODE_COUNT_SETTING, INITIAL_MASTER_NODES_SETTING).anyMatch(s -> s.exists(settings)); + } + public void start() { assert running == false; running = true; - if ((initialMasterNodeCount > 0 || initialMasterNodes.isEmpty() == false) && transportService.getLocalNode().isMasterNode()) { + if (transportService.getLocalNode().isMasterNode() == false) { + return; + } + + if (unconfiguredBootstrapTimeout != null) { + logger.info("no discovery configuration found, will perform best-effort cluster bootstrapping after [{}] " + + "unless existing master is discovered", unconfiguredBootstrapTimeout); + final ThreadContext threadContext = transportService.getThreadPool().getThreadContext(); + try (ThreadContext.StoredContext ignore = threadContext.stashContext()) { + threadContext.markAsSystemContext(); + + transportService.getThreadPool().scheduleUnlessShuttingDown(unconfiguredBootstrapTimeout, Names.SAME, new Runnable() { + @Override + public void run() { + final GetDiscoveredNodesRequest request = new GetDiscoveredNodesRequest(); + logger.trace("sending {}", request); + transportService.sendRequest(transportService.getLocalNode(), GetDiscoveredNodesAction.NAME, request, + new TransportResponseHandler() { + @Override + public void handleResponse(GetDiscoveredNodesResponse response) { + logger.debug("discovered {}, starting to bootstrap", response.getNodes()); + awaitBootstrap(response.getBootstrapConfiguration()); + } + + @Override + public void handleException(TransportException exp) { + logger.warn("discovery attempt failed", exp); + } + + @Override + public String executor() { + return Names.SAME; + } + + @Override + public GetDiscoveredNodesResponse read(StreamInput in) throws IOException { + return new GetDiscoveredNodesResponse(in); + } + }); + } + + @Override + public String toString() { + return "unconfigured-discovery delayed bootstrap"; + } + }); + + } + } else if (initialMasterNodeCount > 0 || initialMasterNodes.isEmpty() == false) { logger.debug("unsafely waiting for discovery of [{}] master-eligible nodes", initialMasterNodeCount); final ThreadContext threadContext = transportService.getThreadPool().getThreadContext(); @@ -116,7 +181,6 @@ public GetDiscoveredNodesResponse read(StreamInput in) throws IOException { } public void stop() { - assert running == true; running = false; } diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/Coordinator.java b/server/src/main/java/org/elasticsearch/cluster/coordination/Coordinator.java index 0f2b44bdd92f7..ec73ee25088a7 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/Coordinator.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/Coordinator.java @@ -501,7 +501,9 @@ public void startInitialJoin() { becomeCandidate("startInitialJoin"); } - clusterBootstrapService.start(); + if (isInitialConfigurationSet() == false) { + clusterBootstrapService.start(); + } } @Override diff --git a/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java b/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java index ed92da83fb7eb..401cdd9059be0 100644 --- a/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java @@ -473,6 +473,7 @@ public void apply(Settings value, Settings current, Settings previous) { TransportAddVotingConfigExclusionsAction.MAXIMUM_VOTING_CONFIG_EXCLUSIONS_SETTING, ClusterBootstrapService.INITIAL_MASTER_NODES_SETTING, ClusterBootstrapService.INITIAL_MASTER_NODE_COUNT_SETTING, + ClusterBootstrapService.UNCONFIGURED_BOOTSTRAP_TIMEOUT_SETTING, LagDetector.CLUSTER_FOLLOWER_LAG_TIMEOUT_SETTING ))); diff --git a/server/src/test/java/org/elasticsearch/bootstrap/BootstrapChecksTests.java b/server/src/test/java/org/elasticsearch/bootstrap/BootstrapChecksTests.java index 4f3a3a615daee..b3ac4bc6157b7 100644 --- a/server/src/test/java/org/elasticsearch/bootstrap/BootstrapChecksTests.java +++ b/server/src/test/java/org/elasticsearch/bootstrap/BootstrapChecksTests.java @@ -21,10 +21,14 @@ import org.apache.logging.log4j.Logger; import org.apache.lucene.util.Constants; +import org.elasticsearch.cluster.coordination.ClusterBootstrapService; import org.elasticsearch.cluster.metadata.MetaData; +import org.elasticsearch.common.CheckedConsumer; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.transport.BoundTransportAddress; import org.elasticsearch.common.transport.TransportAddress; +import org.elasticsearch.discovery.DiscoveryModule; +import org.elasticsearch.discovery.zen.SettingsBasedHostsProvider; import org.elasticsearch.monitor.jvm.JvmInfo; import org.elasticsearch.node.NodeValidationException; import org.elasticsearch.test.ESTestCase; @@ -700,4 +704,34 @@ public boolean alwaysEnforce() { assertThat(alwaysEnforced, hasToString(containsString("error"))); } + public void testDiscoveryConfiguredCheck() throws NodeValidationException { + final List checks = Collections.singletonList(new BootstrapChecks.DiscoveryConfiguredCheck()); + + final BootstrapContext zen2Context = new BootstrapContext(Settings.builder() + .put(DiscoveryModule.DISCOVERY_TYPE_SETTING.getKey(), ZEN2_DISCOVERY_TYPE).build(), MetaData.EMPTY_META_DATA); + + // not always enforced + BootstrapChecks.check(zen2Context, false, checks); + + // not enforced for non-zen2 discovery + BootstrapChecks.check(new BootstrapContext(Settings.builder().put(DiscoveryModule.DISCOVERY_TYPE_SETTING.getKey(), + randomFrom(ZEN_DISCOVERY_TYPE, "single-node", randomAlphaOfLength(5))).build(), MetaData.EMPTY_META_DATA), true, checks); + + final NodeValidationException e = expectThrows(NodeValidationException.class, + () -> BootstrapChecks.check(zen2Context, true, checks)); + assertThat(e, hasToString(containsString("the default discovery settings are unsuitable for production use; at least one " + + "of [discovery.zen.ping.unicast.hosts, discovery.zen.hosts_provider, cluster.initial_master_nodes] must be configured"))); + + CheckedConsumer ensureChecksPass = b -> + { + final BootstrapContext context = new BootstrapContext(b + .put(DiscoveryModule.DISCOVERY_TYPE_SETTING.getKey(), ZEN2_DISCOVERY_TYPE).build(), MetaData.EMPTY_META_DATA); + BootstrapChecks.check(context, true, checks); + }; + + ensureChecksPass.accept(Settings.builder().putList(DiscoveryModule.DISCOVERY_HOSTS_PROVIDER_SETTING.getKey())); + ensureChecksPass.accept(Settings.builder().putList(SettingsBasedHostsProvider.DISCOVERY_ZEN_PING_UNICAST_HOSTS_SETTING.getKey())); + ensureChecksPass.accept(Settings.builder().put(ClusterBootstrapService.INITIAL_MASTER_NODE_COUNT_SETTING.getKey(), 0)); + ensureChecksPass.accept(Settings.builder().putList(ClusterBootstrapService.INITIAL_MASTER_NODES_SETTING.getKey())); + } } diff --git a/server/src/test/java/org/elasticsearch/cluster/coordination/ClusterBootstrapServiceTests.java b/server/src/test/java/org/elasticsearch/cluster/coordination/ClusterBootstrapServiceTests.java index 23030b9500fa3..618f24c8e3c8c 100644 --- a/server/src/test/java/org/elasticsearch/cluster/coordination/ClusterBootstrapServiceTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/coordination/ClusterBootstrapServiceTests.java @@ -23,12 +23,14 @@ import org.elasticsearch.action.admin.cluster.bootstrap.BootstrapClusterAction; import org.elasticsearch.action.admin.cluster.bootstrap.BootstrapClusterRequest; import org.elasticsearch.action.admin.cluster.bootstrap.BootstrapClusterResponse; +import org.elasticsearch.action.admin.cluster.bootstrap.BootstrapConfiguration.NodeDescription; import org.elasticsearch.action.admin.cluster.bootstrap.GetDiscoveredNodesAction; import org.elasticsearch.action.admin.cluster.bootstrap.GetDiscoveredNodesRequest; import org.elasticsearch.action.admin.cluster.bootstrap.GetDiscoveredNodesResponse; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.node.DiscoveryNode.Role; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.settings.Settings.Builder; import org.elasticsearch.tasks.Task; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.transport.MockTransport; @@ -50,7 +52,11 @@ import static java.util.Collections.singleton; import static org.elasticsearch.cluster.coordination.ClusterBootstrapService.INITIAL_MASTER_NODES_SETTING; import static org.elasticsearch.cluster.coordination.ClusterBootstrapService.INITIAL_MASTER_NODE_COUNT_SETTING; +import static org.elasticsearch.common.settings.Settings.builder; +import static org.elasticsearch.discovery.DiscoveryModule.DISCOVERY_HOSTS_PROVIDER_SETTING; +import static org.elasticsearch.discovery.zen.SettingsBasedHostsProvider.DISCOVERY_ZEN_PING_UNICAST_HOSTS_SETTING; import static org.elasticsearch.node.Node.NODE_NAME_SETTING; +import static org.hamcrest.Matchers.equalTo; public class ClusterBootstrapServiceTests extends ESTestCase { @@ -65,7 +71,7 @@ public void createServices() { otherNode1 = newDiscoveryNode("other1"); otherNode2 = newDiscoveryNode("other2"); - deterministicTaskQueue = new DeterministicTaskQueue(Settings.builder().put(NODE_NAME_SETTING.getKey(), "node").build(), random()); + deterministicTaskQueue = new DeterministicTaskQueue(builder().put(NODE_NAME_SETTING.getKey(), "node").build(), random()); final MockTransport transport = new MockTransport() { @Override @@ -77,6 +83,9 @@ protected void onSendRequest(long requestId, String action, TransportRequest req transportService = transport.createTransportService(Settings.EMPTY, deterministicTaskQueue.getThreadPool(), TransportService.NOOP_TRANSPORT_INTERCEPTOR, boundTransportAddress -> localNode, null, emptySet()); + clusterBootstrapService = new ClusterBootstrapService(builder().put(INITIAL_MASTER_NODE_COUNT_SETTING.getKey(), 3).build(), + transportService); + final Settings settings; if (randomBoolean()) { settings = Settings.builder().put(INITIAL_MASTER_NODE_COUNT_SETTING.getKey(), 3).build(); @@ -109,8 +118,24 @@ public void testDoesNothingOnNonMasterNodes() { deterministicTaskQueue.runAllTasks(); } - public void testDoesNothingIfSettingIsUnset() { - clusterBootstrapService = new ClusterBootstrapService(Settings.EMPTY, transportService); + public void testDoesNothingByDefaultIfHostsProviderConfigured() { + testConfiguredIfSettingSet(builder().putList(DISCOVERY_HOSTS_PROVIDER_SETTING.getKey())); + } + + public void testDoesNothingByDefaultIfUnicastHostsConfigured() { + testConfiguredIfSettingSet(builder().putList(DISCOVERY_ZEN_PING_UNICAST_HOSTS_SETTING.getKey())); + } + + public void testDoesNothingByDefaultIfMasterNodeCountConfigured() { + testConfiguredIfSettingSet(builder().put(INITIAL_MASTER_NODE_COUNT_SETTING.getKey(), 0)); + } + + public void testDoesNothingByDefaultIfMasterNodesConfigured() { + testConfiguredIfSettingSet(builder().putList(INITIAL_MASTER_NODES_SETTING.getKey())); + } + + private void testConfiguredIfSettingSet(Builder builder) { + clusterBootstrapService = new ClusterBootstrapService(builder.build(), transportService); transportService.registerRequestHandler(GetDiscoveredNodesAction.NAME, Names.SAME, GetDiscoveredNodesRequest::new, (request, channel, task) -> { throw new AssertionError("should not make a discovery request"); @@ -119,6 +144,30 @@ public void testDoesNothingIfSettingIsUnset() { deterministicTaskQueue.runAllTasks(); } + public void testBootstrapsAutomaticallyWithDefaultConfiguration() { + clusterBootstrapService = new ClusterBootstrapService(Settings.EMPTY, transportService); + + final Set discoveredNodes = Stream.of(localNode, otherNode1, otherNode2).collect(Collectors.toSet()); + transportService.registerRequestHandler(GetDiscoveredNodesAction.NAME, Names.SAME, GetDiscoveredNodesRequest::new, + (request, channel, task) -> channel.sendResponse(new GetDiscoveredNodesResponse(discoveredNodes))); + + final AtomicBoolean bootstrapped = new AtomicBoolean(); + transportService.registerRequestHandler(BootstrapClusterAction.NAME, Names.SAME, BootstrapClusterRequest::new, + (request, channel, task) -> { + assertThat(request.getBootstrapConfiguration().getNodeDescriptions().stream() + .map(NodeDescription::getId).collect(Collectors.toSet()), + equalTo(discoveredNodes.stream().map(DiscoveryNode::getId).collect(Collectors.toSet()))); + + channel.sendResponse(new BootstrapClusterResponse(randomBoolean())); + assertTrue(bootstrapped.compareAndSet(false, true)); + }); + + startServices(); + deterministicTaskQueue.runAllTasks(); + + assertTrue(bootstrapped.get()); + } + public void testDoesNotRetryOnDiscoveryFailure() { transportService.registerRequestHandler(GetDiscoveredNodesAction.NAME, Names.SAME, GetDiscoveredNodesRequest::new, new TransportRequestHandler() {