From ce85de30999377ef1af40d401ec515e25f438c8b Mon Sep 17 00:00:00 2001 From: Artem Prigoda Date: Mon, 18 Oct 2021 22:43:59 +0200 Subject: [PATCH 1/2] [7.x] Use query param instead of a system property for opting in for new cluster health response code Backport #79351 to 7.x: The original change was implemented in #78940, but we have decided to move from a system property to a request parameter, so Cloud users/clients have an easier way to opt-in for the new status code. Relates #70849 --- .../elasticsearch/client/ClusterClientIT.java | 6 +- docs/reference/cluster/health.asciidoc | 5 ++ .../rest-api-spec/api/cluster.health.json | 4 ++ .../cluster.health/20_request_timeout.yml | 19 ++++++ .../cluster/health/ClusterHealthRequest.java | 22 +++++++ .../cluster/health/ClusterHealthResponse.java | 60 ++++++++----------- .../health/TransportClusterHealthAction.java | 16 ++--- .../cluster/RestClusterHealthAction.java | 3 + .../health/ClusterHealthRequestTests.java | 1 + .../health/ClusterHealthResponsesTests.java | 10 ++-- .../TransportClusterHealthActionTests.java | 10 ++-- .../cluster/RestClusterHealthActionTests.java | 4 +- .../core/ml/utils/MlIndexAndAliasTests.java | 5 +- .../TransformInternalIndexTests.java | 4 +- 14 files changed, 110 insertions(+), 59 deletions(-) diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/ClusterClientIT.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/ClusterClientIT.java index a8922f7d90cc0..6a25193cf69c2 100644 --- a/client/rest-high-level/src/test/java/org/elasticsearch/client/ClusterClientIT.java +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/ClusterClientIT.java @@ -37,13 +37,13 @@ import org.elasticsearch.common.compress.CompressedXContent; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.ByteSizeUnit; -import org.elasticsearch.core.TimeValue; -import org.elasticsearch.xcontent.XContentType; import org.elasticsearch.common.xcontent.support.XContentMapValues; +import org.elasticsearch.core.TimeValue; import org.elasticsearch.indices.recovery.RecoverySettings; import org.elasticsearch.rest.RestStatus; import org.elasticsearch.transport.RemoteClusterService; import org.elasticsearch.transport.SniffConnectionStrategy; +import org.elasticsearch.xcontent.XContentType; import java.io.IOException; import java.util.Collections; @@ -317,7 +317,7 @@ public void testClusterHealthNotFoundIndex() throws IOException { assertThat(response.getStatus(), equalTo(ClusterHealthStatus.RED)); assertNoIndices(response); assertWarnings("The HTTP status code for a cluster health timeout will be changed from 408 to 200 in a " + - "future version. Set the [es.cluster_health.request_timeout_200] system property to [true] to suppress this message and " + + "future version. Set the [return_200_for_cluster_health_timeout] query parameter to [true] to suppress this message and " + "opt in to the future behaviour now."); } diff --git a/docs/reference/cluster/health.asciidoc b/docs/reference/cluster/health.asciidoc index e958a29d75bbc..92273b15e2e5e 100644 --- a/docs/reference/cluster/health.asciidoc +++ b/docs/reference/cluster/health.asciidoc @@ -97,6 +97,11 @@ include::{es-repo-dir}/rest-api/common-parms.asciidoc[tag=timeoutparms] provided or better, i.e. `green` > `yellow` > `red`. By default, will not wait for any status. +`return_200_for_cluster_health_timeout`:: + (Optional, Boolean) A boolean value which controls whether to return HTTP 200 + status code instead of HTTP 408 in case of a cluster health timeout from + the server side. Defaults to false. + [[cluster-health-api-response-body]] ==== {api-response-body-title} diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/cluster.health.json b/rest-api-spec/src/main/resources/rest-api-spec/api/cluster.health.json index 91712bbbded29..7d33fdd52ab81 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/api/cluster.health.json +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/cluster.health.json @@ -102,6 +102,10 @@ "red" ], "description":"Wait until cluster is in a specific state" + }, + "return_200_for_cluster_health_timeout":{ + "type":"boolean", + "description":"Whether to return HTTP 200 instead of 408 in case of a cluster health timeout from the server side" } } } diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/cluster.health/20_request_timeout.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/cluster.health/20_request_timeout.yml index 66a7cb2b48dbd..770d584552e81 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/cluster.health/20_request_timeout.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/cluster.health/20_request_timeout.yml @@ -35,3 +35,22 @@ - match: { initializing_shards: 0 } - match: { unassigned_shards: 0 } - gte: { number_of_pending_tasks: 0 } + +--- +"cluster health request timeout with 200 response code": + - do: + cluster.health: + timeout: 1ms + wait_for_active_shards: 5 + return_200_for_cluster_health_timeout: true + + - is_true: cluster_name + - is_true: timed_out + - gte: { number_of_nodes: 1 } + - gte: { number_of_data_nodes: 1 } + - match: { active_primary_shards: 0 } + - match: { active_shards: 0 } + - match: { relocating_shards: 0 } + - match: { initializing_shards: 0 } + - match: { unassigned_shards: 0 } + - gte: { number_of_pending_tasks: 0 } diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/health/ClusterHealthRequest.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/health/ClusterHealthRequest.java index a3d6fc69325da..6448ac47321a0 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/health/ClusterHealthRequest.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/health/ClusterHealthRequest.java @@ -35,6 +35,8 @@ public class ClusterHealthRequest extends MasterNodeReadRequest INDEX_PARSER = (XContentParser parser, Void context, String index) -> ClusterIndexHealth.innerFromXContent(parser, index); - private static final String ES_CLUSTER_HEALTH_REQUEST_TIMEOUT_200_KEY = "es.cluster_health.request_timeout_200"; + static final String ES_CLUSTER_HEALTH_REQUEST_TIMEOUT_200_KEY = "return_200_for_cluster_health_timeout"; static final String CLUSTER_HEALTH_REQUEST_TIMEOUT_DEPRECATION_MSG = "The HTTP status code for a cluster health timeout " + "will be changed from 408 to 200 in a future version. Set the [" + ES_CLUSTER_HEALTH_REQUEST_TIMEOUT_200_KEY + "] " + - "system property to [true] to suppress this message and opt in to the future behaviour now."; + "query parameter to [true] to suppress this message and opt in to the future behaviour now."; static { // ClusterStateHealth fields @@ -138,15 +139,7 @@ public class ClusterHealthResponse extends ActionResponse implements StatusToXCo private boolean timedOut = false; private ClusterStateHealth clusterStateHealth; private ClusterHealthStatus clusterHealthStatus; - private boolean esClusterHealthRequestTimeout200 = readEsClusterHealthRequestTimeout200FromProperty(); - - public ClusterHealthResponse() { - } - - /** For the testing of opting in for the 200 status code without setting a system property */ - ClusterHealthResponse(boolean esClusterHealthRequestTimeout200) { - this.esClusterHealthRequestTimeout200 = esClusterHealthRequestTimeout200; - } + private boolean return200ForClusterHealthTimeout; public ClusterHealthResponse(StreamInput in) throws IOException { super(in); @@ -158,15 +151,21 @@ public ClusterHealthResponse(StreamInput in) throws IOException { numberOfInFlightFetch = in.readInt(); delayedUnassignedShards= in.readInt(); taskMaxWaitingTime = in.readTimeValue(); + if (in.getVersion().onOrAfter(Version.V_7_16_0)) { + return200ForClusterHealthTimeout = in.readBoolean(); + } } /** needed for plugins BWC */ - public ClusterHealthResponse(String clusterName, String[] concreteIndices, ClusterState clusterState) { - this(clusterName, concreteIndices, clusterState, -1, -1, -1, TimeValue.timeValueHours(0)); + public ClusterHealthResponse(String clusterName, String[] concreteIndices, ClusterState clusterState, + boolean return200ForServerTimeout) { + this(clusterName, concreteIndices, clusterState, -1, -1, -1, TimeValue.timeValueHours(0), + return200ForServerTimeout); } public ClusterHealthResponse(String clusterName, String[] concreteIndices, ClusterState clusterState, int numberOfPendingTasks, - int numberOfInFlightFetch, int delayedUnassignedShards, TimeValue taskMaxWaitingTime) { + int numberOfInFlightFetch, int delayedUnassignedShards, TimeValue taskMaxWaitingTime, + boolean return200ForServerTimeout) { this.clusterName = clusterName; this.numberOfPendingTasks = numberOfPendingTasks; this.numberOfInFlightFetch = numberOfInFlightFetch; @@ -174,6 +173,7 @@ public ClusterHealthResponse(String clusterName, String[] concreteIndices, Clust this.taskMaxWaitingTime = taskMaxWaitingTime; this.clusterStateHealth = new ClusterStateHealth(clusterState, concreteIndices); this.clusterHealthStatus = clusterStateHealth.getStatus(); + this.return200ForClusterHealthTimeout = return200ForServerTimeout; } /** @@ -305,6 +305,11 @@ public void writeTo(StreamOutput out) throws IOException { out.writeInt(numberOfInFlightFetch); out.writeInt(delayedUnassignedShards); out.writeTimeValue(taskMaxWaitingTime); + if (out.getVersion().onOrAfter(Version.V_7_16_0)) { + out.writeBoolean(return200ForClusterHealthTimeout); + } else if (return200ForClusterHealthTimeout) { + throw new IllegalArgumentException("Can't fix response code in a cluster involving nodes with version " + out.getVersion()); + } } @Override @@ -317,7 +322,7 @@ public RestStatus status() { if (isTimedOut() == false) { return RestStatus.OK; } - if (esClusterHealthRequestTimeout200) { + if (return200ForClusterHealthTimeout) { return RestStatus.OK; } else { deprecationLogger.critical(DeprecationCategory.API,"cluster_health_request_timeout", @@ -383,17 +388,4 @@ public int hashCode() { return Objects.hash(clusterName, numberOfPendingTasks, numberOfInFlightFetch, delayedUnassignedShards, taskMaxWaitingTime, timedOut, clusterStateHealth, clusterHealthStatus); } - - private static boolean readEsClusterHealthRequestTimeout200FromProperty() { - String property = System.getProperty(ES_CLUSTER_HEALTH_REQUEST_TIMEOUT_200_KEY); - if (property == null) { - return false; - } - if (Boolean.parseBoolean(property)) { - return true; - } else { - throw new IllegalArgumentException(ES_CLUSTER_HEALTH_REQUEST_TIMEOUT_200_KEY + " can only be unset or [true] but was [" - + property + "]"); - } - } } diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/health/TransportClusterHealthAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/health/TransportClusterHealthAction.java index 6eede2a2b208c..bad415b9d1383 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/health/TransportClusterHealthAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/health/TransportClusterHealthAction.java @@ -30,8 +30,8 @@ import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.Strings; import org.elasticsearch.common.inject.Inject; -import org.elasticsearch.core.TimeValue; import org.elasticsearch.common.util.CollectionUtils; +import org.elasticsearch.core.TimeValue; import org.elasticsearch.index.IndexNotFoundException; import org.elasticsearch.node.NodeClosedException; import org.elasticsearch.tasks.Task; @@ -232,7 +232,8 @@ private enum TimeoutState { private ClusterHealthResponse getResponse(final ClusterHealthRequest request, ClusterState clusterState, final int waitFor, TimeoutState timeoutState) { - ClusterHealthResponse response = clusterHealth(request, clusterState, clusterService.getMasterService().numberOfPendingTasks(), + ClusterHealthResponse response = clusterHealth(request, clusterState, + clusterService.getMasterService().numberOfPendingTasks(), allocationService.getNumberOfInFlightFetches(), clusterService.getMasterService().getMaxTaskWaitTime()); int readyCounter = prepareResponse(request, response, clusterState, indexNameExpressionResolver); boolean valid = (readyCounter == waitFor); @@ -331,8 +332,8 @@ static int prepareResponse(final ClusterHealthRequest request, final ClusterHeal } - private ClusterHealthResponse clusterHealth(ClusterHealthRequest request, ClusterState clusterState, int numberOfPendingTasks, - int numberOfInFlightFetch, TimeValue pendingTaskTimeInQueue) { + private ClusterHealthResponse clusterHealth(ClusterHealthRequest request, ClusterState clusterState, + int numberOfPendingTasks, int numberOfInFlightFetch, TimeValue pendingTaskTimeInQueue) { if (logger.isTraceEnabled()) { logger.trace("Calculating health based on state version [{}]", clusterState.version()); } @@ -344,12 +345,13 @@ private ClusterHealthResponse clusterHealth(ClusterHealthRequest request, Cluste // one of the specified indices is not there - treat it as RED. ClusterHealthResponse response = new ClusterHealthResponse(clusterState.getClusterName().value(), Strings.EMPTY_ARRAY, clusterState, numberOfPendingTasks, numberOfInFlightFetch, UnassignedInfo.getNumberOfDelayedUnassigned(clusterState), - pendingTaskTimeInQueue); + pendingTaskTimeInQueue, request.doesReturn200ForClusterHealthTimeout()); response.setStatus(ClusterHealthStatus.RED); return response; } - return new ClusterHealthResponse(clusterState.getClusterName().value(), concreteIndices, clusterState, numberOfPendingTasks, - numberOfInFlightFetch, UnassignedInfo.getNumberOfDelayedUnassigned(clusterState), pendingTaskTimeInQueue); + return new ClusterHealthResponse(clusterState.getClusterName().value(), concreteIndices, clusterState, + numberOfPendingTasks, numberOfInFlightFetch, UnassignedInfo.getNumberOfDelayedUnassigned(clusterState), pendingTaskTimeInQueue, + request.doesReturn200ForClusterHealthTimeout()); } } diff --git a/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestClusterHealthAction.java b/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestClusterHealthAction.java index 9dad653b24de0..3168e8d11bc52 100644 --- a/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestClusterHealthAction.java +++ b/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestClusterHealthAction.java @@ -83,6 +83,9 @@ public static ClusterHealthRequest fromRequest(final RestRequest request) { if (request.param("wait_for_events") != null) { clusterHealthRequest.waitForEvents(Priority.valueOf(request.param("wait_for_events").toUpperCase(Locale.ROOT))); } + clusterHealthRequest.return200ForClusterHealthTimeout(request.paramAsBoolean( + "return_200_for_cluster_health_timeout", + clusterHealthRequest.doesReturn200ForClusterHealthTimeout())); return clusterHealthRequest; } diff --git a/server/src/test/java/org/elasticsearch/action/admin/cluster/health/ClusterHealthRequestTests.java b/server/src/test/java/org/elasticsearch/action/admin/cluster/health/ClusterHealthRequestTests.java index c4e67a339bc60..8fc687d65841f 100644 --- a/server/src/test/java/org/elasticsearch/action/admin/cluster/health/ClusterHealthRequestTests.java +++ b/server/src/test/java/org/elasticsearch/action/admin/cluster/health/ClusterHealthRequestTests.java @@ -42,6 +42,7 @@ public void testSerialize() throws Exception { assertThat(cloneRequest.waitForEvents(), equalTo(originalRequest.waitForEvents())); assertIndicesEquals(cloneRequest.indices(), originalRequest.indices()); assertThat(cloneRequest.indicesOptions(), equalTo(originalRequest.indicesOptions())); + assertThat(cloneRequest.doesReturn200ForClusterHealthTimeout(), equalTo(originalRequest.doesReturn200ForClusterHealthTimeout())); } public void testRequestReturnsHiddenIndicesByDefault() { diff --git a/server/src/test/java/org/elasticsearch/action/admin/cluster/health/ClusterHealthResponsesTests.java b/server/src/test/java/org/elasticsearch/action/admin/cluster/health/ClusterHealthResponsesTests.java index 01ad7731d0f76..fd3a63dd290a1 100644 --- a/server/src/test/java/org/elasticsearch/action/admin/cluster/health/ClusterHealthResponsesTests.java +++ b/server/src/test/java/org/elasticsearch/action/admin/cluster/health/ClusterHealthResponsesTests.java @@ -20,10 +20,10 @@ import org.elasticsearch.common.io.stream.Writeable; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.core.TimeValue; -import org.elasticsearch.xcontent.ToXContent; -import org.elasticsearch.xcontent.XContentParser; import org.elasticsearch.rest.RestStatus; import org.elasticsearch.test.AbstractSerializingTestCase; +import org.elasticsearch.xcontent.ToXContent; +import org.elasticsearch.xcontent.XContentParser; import org.hamcrest.Matchers; import java.io.IOException; @@ -43,7 +43,7 @@ public class ClusterHealthResponsesTests extends AbstractSerializingTestCase params) { diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/utils/MlIndexAndAliasTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/utils/MlIndexAndAliasTests.java index 4ff9651559c13..e4a838062d8b5 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/utils/MlIndexAndAliasTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/utils/MlIndexAndAliasTests.java @@ -35,11 +35,12 @@ import org.elasticsearch.cluster.metadata.Metadata; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.node.DiscoveryNodes; +import org.elasticsearch.common.Strings; import org.elasticsearch.common.collect.ImmutableOpenMap; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.transport.TransportAddress; -import org.elasticsearch.core.TimeValue; import org.elasticsearch.common.util.concurrent.ThreadContext; +import org.elasticsearch.core.TimeValue; import org.elasticsearch.indices.TestIndexNameExpressionResolver; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.threadpool.ThreadPool; @@ -109,7 +110,7 @@ public void setUpMocks() { clusterAdminClient = mock(ClusterAdminClient.class); doAnswer(invocationOnMock -> { ActionListener listener = (ActionListener) invocationOnMock.getArguments()[1]; - listener.onResponse(new ClusterHealthResponse()); + listener.onResponse(new ClusterHealthResponse("", Strings.EMPTY_ARRAY, ClusterState.EMPTY_STATE, false)); return null; }).when(clusterAdminClient).health(any(ClusterHealthRequest.class), any(ActionListener.class)); diff --git a/x-pack/plugin/transform/src/test/java/org/elasticsearch/xpack/transform/persistence/TransformInternalIndexTests.java b/x-pack/plugin/transform/src/test/java/org/elasticsearch/xpack/transform/persistence/TransformInternalIndexTests.java index 0a79e22172907..6e1be09d1631d 100644 --- a/x-pack/plugin/transform/src/test/java/org/elasticsearch/xpack/transform/persistence/TransformInternalIndexTests.java +++ b/x-pack/plugin/transform/src/test/java/org/elasticsearch/xpack/transform/persistence/TransformInternalIndexTests.java @@ -190,7 +190,7 @@ public void testCreateLatestVersionedIndexIfRequired_GivenShardInitializationPen doAnswer(invocationOnMock -> { @SuppressWarnings("unchecked") ActionListener listener = (ActionListener) invocationOnMock.getArguments()[1]; - listener.onResponse(new ClusterHealthResponse()); + listener.onResponse(new ClusterHealthResponse("", new String[]{}, ClusterState.EMPTY_STATE, false)); return null; }).when(clusterClient).health(any(), any()); @@ -274,7 +274,7 @@ public void testCreateLatestVersionedIndexIfRequired_GivenConcurrentCreationShar doAnswer(invocationOnMock -> { @SuppressWarnings("unchecked") ActionListener listener = (ActionListener) invocationOnMock.getArguments()[1]; - listener.onResponse(new ClusterHealthResponse()); + listener.onResponse(new ClusterHealthResponse("", new String[]{}, ClusterState.EMPTY_STATE, false)); return null; }).when(clusterClient).health(any(), any()); From 020c66b9e91c1b95e967c50ce89bc086f237ee02 Mon Sep 17 00:00:00 2001 From: Artem Prigoda Date: Mon, 18 Oct 2021 22:58:05 +0200 Subject: [PATCH 2/2] Don't skip the test on 7.16 --- .../rest-api-spec/test/cluster.health/20_request_timeout.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/cluster.health/20_request_timeout.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/cluster.health/20_request_timeout.yml index 770d584552e81..74261d799ba7d 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/cluster.health/20_request_timeout.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/cluster.health/20_request_timeout.yml @@ -38,6 +38,9 @@ --- "cluster health request timeout with 200 response code": + - skip: + version: " - 7.15.99" + reason: "return_200_for_cluster_health_timeout was added in 7.16" - do: cluster.health: timeout: 1ms