From 9ad843969ec0db9c1fbc1e72fec3d396b4009561 Mon Sep 17 00:00:00 2001 From: Artem Prigoda Date: Sat, 6 Nov 2021 19:46:27 +0100 Subject: [PATCH] Return 200 OK response code for a cluster health timeout (#78968) Returning 408 for a cluster health timeout was deprecated in #78180 and backported to 7.x in #78940 Now we can do a breaking change in 8.0 respecting the user choice to run ES in 7.x compatible mode via the REST Compatibility layer. Fixes #70849 --- .../elasticsearch/client/ClusterClientIT.java | 7 +--- docs/changelog/78968.yaml | 20 ++++++++++++ .../migration/migrate_8_0/cluster.asciidoc | 17 ++++++++++ .../cluster.health/20_request_timeout.yml | 8 +++-- .../cluster/health/ClusterHealthResponse.java | 25 ++++++--------- .../xcontent/StatusToXContentObject.java | 5 +++ .../action/RestStatusToXContentListener.java | 2 +- .../cluster/RestClusterHealthAction.java | 21 ++++++++++-- .../health/ClusterHealthResponsesTests.java | 32 ++++++++++++++----- .../cluster/RestClusterHealthActionTests.java | 9 ++++++ .../test/rollup/security_tests.yml | 2 -- 11 files changed, 110 insertions(+), 38 deletions(-) create mode 100644 docs/changelog/78968.yaml diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/ClusterClientIT.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/ClusterClientIT.java index 747b18656aef5..2c1258358b962 100644 --- a/client/rest-high-level/src/test/java/org/elasticsearch/client/ClusterClientIT.java +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/ClusterClientIT.java @@ -334,14 +334,9 @@ public void testClusterHealthNotFoundIndex() throws IOException { assertThat(response, notNullValue()); assertThat(response.isTimedOut(), equalTo(true)); - assertThat(response.status(), equalTo(RestStatus.REQUEST_TIMEOUT)); + assertThat(response.status(), equalTo(RestStatus.OK)); assertThat(response.getStatus(), equalTo(ClusterHealthStatus.RED)); assertNoIndices(response); - assertCriticalWarnings( - "The HTTP status code for a cluster health timeout will be changed from 408 to 200 in a " - + "future version. Set the [return_200_for_cluster_health_timeout] query parameter to [true] to suppress this message and " - + "opt in to the future behaviour now." - ); } public void testRemoteInfo() throws Exception { diff --git a/docs/changelog/78968.yaml b/docs/changelog/78968.yaml new file mode 100644 index 0000000000000..a52c9ac7315ed --- /dev/null +++ b/docs/changelog/78968.yaml @@ -0,0 +1,20 @@ +pr: 78968 +summary: HTTP Status code has changed for the Cluster Health API in case of a server timeout +area: CRUD +type: breaking +issues: [] +breaking: + title: HTTP Status code has changed for the Cluster Health API in case of a server timeout + area: API + details: |- + The cluster health API includes options for waiting + for certain health conditions to be satisfied. If the requested conditions are + not satisfied within a timeout then ES will send back a normal response + including the field `"timed_out": true`. In earlier versions it would also use + the HTTP response code `408 Request timeout` if the request timed out, and `200 + OK` otherwise. The `408 Request timeout` response code is not appropriate for + this situation, so from version 8.0.0 ES will use the response code `200 OK` + for both cases. + impact: |- + To detect a server timeout, check the `timed_out` field of the JSON response. + notable: true diff --git a/docs/reference/migration/migrate_8_0/cluster.asciidoc b/docs/reference/migration/migrate_8_0/cluster.asciidoc index 6e25193c6d82e..950057a74c185 100644 --- a/docs/reference/migration/migrate_8_0/cluster.asciidoc +++ b/docs/reference/migration/migrate_8_0/cluster.asciidoc @@ -32,4 +32,21 @@ time out. *Impact* + Do not set `cluster.join.timeout` in your `elasticsearch.yml` file. ==== + +.HTTP Status code has changed for the Cluster Health API in case of a server timeout. +[%collapsible] +==== +*Details* + +The {ref}/cluster-health.html[cluster health API] includes options for waiting +for certain health conditions to be satisfied. If the requested conditions are +not satisfied within a timeout then {es} will send back a normal response +including the field `"timed_out": true`. In earlier versions it would also use +the HTTP response code `408 Request timeout` if the request timed out, and `200 +OK` otherwise. The `408 Request timeout` response code is not appropriate for +this situation, so from version 8.0.0 {es} will use the response code `200 OK` +for both cases. + +*Impact* + +To detect a server timeout, check the `timed_out` field of the JSON response. +==== // end::notable-breaking-changes[] diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/cluster.health/20_request_timeout.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/cluster.health/20_request_timeout.yml index 16d943332dc26..935ca652ddb8e 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/cluster.health/20_request_timeout.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/cluster.health/20_request_timeout.yml @@ -1,7 +1,9 @@ --- "cluster health request timeout on waiting for nodes": + - skip: + version: " - 8.0.99" + reason: "Set for 7.99.99 when back-ported to 8.0" - do: - catch: request_timeout cluster.health: wait_for_nodes: 10 timeout: 1ms @@ -19,8 +21,10 @@ --- "cluster health request timeout waiting for active shards": + - skip: + version: " - 8.0.99" + reason: "Set for 7.99.99 when back-ported to 8.0" - do: - catch: request_timeout cluster.health: timeout: 1ms wait_for_active_shards: 5 diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/health/ClusterHealthResponse.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/health/ClusterHealthResponse.java index ac703a6adc3e4..b1cee725e18af 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/health/ClusterHealthResponse.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/health/ClusterHealthResponse.java @@ -16,11 +16,10 @@ import org.elasticsearch.common.Strings; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; -import org.elasticsearch.common.logging.DeprecationLogger; import org.elasticsearch.common.xcontent.StatusToXContentObject; +import org.elasticsearch.core.RestApiVersion; import org.elasticsearch.core.TimeValue; import org.elasticsearch.rest.RestStatus; -import org.elasticsearch.rest.action.search.RestSearchAction; import org.elasticsearch.xcontent.ConstructingObjectParser; import org.elasticsearch.xcontent.ObjectParser; import org.elasticsearch.xcontent.ParseField; @@ -57,7 +56,6 @@ public class ClusterHealthResponse extends ActionResponse implements StatusToXCo private static final String INITIALIZING_SHARDS = "initializing_shards"; private static final String UNASSIGNED_SHARDS = "unassigned_shards"; private static final String INDICES = "indices"; - private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(RestSearchAction.class); private static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>( "cluster_health_response", @@ -122,12 +120,6 @@ public class ClusterHealthResponse extends ActionResponse implements StatusToXCo XContentParser parser, Void context, String index) -> ClusterIndexHealth.innerFromXContent(parser, index); - static final String ES_CLUSTER_HEALTH_REQUEST_TIMEOUT_200_KEY = "return_200_for_cluster_health_timeout"; - static final String CLUSTER_HEALTH_REQUEST_TIMEOUT_DEPRECATION_MSG = "The HTTP status code for a cluster health timeout " - + "will be changed from 408 to 200 in a future version. Set the [" - + ES_CLUSTER_HEALTH_REQUEST_TIMEOUT_200_KEY - + "] " - + "query parameter to [true] to suppress this message and opt in to the future behaviour now."; static { // ClusterStateHealth fields @@ -351,15 +343,16 @@ public String toString() { @Override public RestStatus status() { - if (isTimedOut() == false) { - return RestStatus.OK; - } - if (return200ForClusterHealthTimeout) { - return RestStatus.OK; - } else { - deprecationLogger.compatibleCritical("cluster_health_request_timeout", CLUSTER_HEALTH_REQUEST_TIMEOUT_DEPRECATION_MSG); + return status(RestApiVersion.current()); + } + + @Override + public RestStatus status(RestApiVersion restApiVersion) { + // Legacy behaviour + if (isTimedOut() && restApiVersion == RestApiVersion.V_7 && return200ForClusterHealthTimeout == false) { return RestStatus.REQUEST_TIMEOUT; } + return RestStatus.OK; } @Override diff --git a/server/src/main/java/org/elasticsearch/common/xcontent/StatusToXContentObject.java b/server/src/main/java/org/elasticsearch/common/xcontent/StatusToXContentObject.java index e0af9777fddfc..eca47c5ebf11d 100644 --- a/server/src/main/java/org/elasticsearch/common/xcontent/StatusToXContentObject.java +++ b/server/src/main/java/org/elasticsearch/common/xcontent/StatusToXContentObject.java @@ -7,6 +7,7 @@ */ package org.elasticsearch.common.xcontent; +import org.elasticsearch.core.RestApiVersion; import org.elasticsearch.rest.RestStatus; import org.elasticsearch.xcontent.ToXContentObject; @@ -20,4 +21,8 @@ public interface StatusToXContentObject extends ToXContentObject { * Returns the REST status to make sure it is returned correctly */ RestStatus status(); + + default RestStatus status(RestApiVersion restApiVersion) { + return status(); + } } diff --git a/server/src/main/java/org/elasticsearch/rest/action/RestStatusToXContentListener.java b/server/src/main/java/org/elasticsearch/rest/action/RestStatusToXContentListener.java index 43253b0b0aa7c..3e60d36145148 100644 --- a/server/src/main/java/org/elasticsearch/rest/action/RestStatusToXContentListener.java +++ b/server/src/main/java/org/elasticsearch/rest/action/RestStatusToXContentListener.java @@ -44,7 +44,7 @@ public RestStatusToXContentListener(RestChannel channel, Function routes() { return List.of(new Route(GET, "/_cluster/health"), new Route(GET, "/_cluster/health/{index}")); @@ -81,9 +90,15 @@ public static ClusterHealthRequest fromRequest(final RestRequest request) { if (request.param("wait_for_events") != null) { clusterHealthRequest.waitForEvents(Priority.valueOf(request.param("wait_for_events").toUpperCase(Locale.ROOT))); } - clusterHealthRequest.return200ForClusterHealthTimeout( - request.paramAsBoolean("return_200_for_cluster_health_timeout", clusterHealthRequest.doesReturn200ForClusterHealthTimeout()) - ); + String return200ForClusterHealthTimeout = request.param(RETURN_200_FOR_CLUSTER_HEALTH_TIMEOUT); + if (return200ForClusterHealthTimeout != null) { + deprecationLogger.warn( + DeprecationCategory.API, + "cluster_health_request_timeout", + CLUSTER_HEALTH_REQUEST_TIMEOUT_DEPRECATION_MSG + ); + } + clusterHealthRequest.return200ForClusterHealthTimeout(Boolean.parseBoolean(return200ForClusterHealthTimeout)); return clusterHealthRequest; } diff --git a/server/src/test/java/org/elasticsearch/action/admin/cluster/health/ClusterHealthResponsesTests.java b/server/src/test/java/org/elasticsearch/action/admin/cluster/health/ClusterHealthResponsesTests.java index fc05b68130d7b..db49f4f75ba7d 100644 --- a/server/src/test/java/org/elasticsearch/action/admin/cluster/health/ClusterHealthResponsesTests.java +++ b/server/src/test/java/org/elasticsearch/action/admin/cluster/health/ClusterHealthResponsesTests.java @@ -20,6 +20,7 @@ import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.Writeable; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.core.RestApiVersion; import org.elasticsearch.core.TimeValue; import org.elasticsearch.rest.RestStatus; import org.elasticsearch.test.AbstractSerializingTestCase; @@ -43,16 +44,11 @@ public class ClusterHealthResponsesTests extends AbstractSerializingTestCase { private final ClusterHealthRequest.Level level = randomFrom(ClusterHealthRequest.Level.values()); - public void testIsTimeout() { + public void testIsTimeoutReturns200ByDefault() { ClusterHealthResponse res = new ClusterHealthResponse("", Strings.EMPTY_ARRAY, ClusterState.EMPTY_STATE, false); for (int i = 0; i < 5; i++) { res.setTimedOut(randomBoolean()); - if (res.isTimedOut()) { - assertEquals(RestStatus.REQUEST_TIMEOUT, res.status()); - assertCriticalWarnings(ClusterHealthResponse.CLUSTER_HEALTH_REQUEST_TIMEOUT_DEPRECATION_MSG); - } else { - assertEquals(RestStatus.OK, res.status()); - } + assertEquals(RestStatus.OK, res.status(RestApiVersion.V_8)); } } @@ -60,7 +56,27 @@ public void testTimeoutReturns200IfOptedIn() { ClusterHealthResponse res = new ClusterHealthResponse("", Strings.EMPTY_ARRAY, ClusterState.EMPTY_STATE, true); for (int i = 0; i < 5; i++) { res.setTimedOut(randomBoolean()); - assertEquals(RestStatus.OK, res.status()); + assertEquals(RestStatus.OK, res.status(RestApiVersion.V_8)); + } + } + + public void testTimeoutReturns200InIfOptedInV7CompatibilityMode() { + ClusterHealthResponse res = new ClusterHealthResponse("", Strings.EMPTY_ARRAY, ClusterState.EMPTY_STATE, true); + for (int i = 0; i < 5; i++) { + res.setTimedOut(randomBoolean()); + assertEquals(RestStatus.OK, res.status(RestApiVersion.V_7)); + } + } + + public void testTimeoutReturns408InV7CompatibilityMode() { + ClusterHealthResponse res = new ClusterHealthResponse("", Strings.EMPTY_ARRAY, ClusterState.EMPTY_STATE, false); + for (int i = 0; i < 5; i++) { + res.setTimedOut(randomBoolean()); + if (res.isTimedOut()) { + assertEquals(RestStatus.REQUEST_TIMEOUT, res.status(RestApiVersion.V_7)); + } else { + assertEquals(RestStatus.OK, res.status(RestApiVersion.V_7)); + } } } diff --git a/server/src/test/java/org/elasticsearch/rest/action/admin/cluster/RestClusterHealthActionTests.java b/server/src/test/java/org/elasticsearch/rest/action/admin/cluster/RestClusterHealthActionTests.java index 2abdc42edbc65..0a8c0c82ede82 100644 --- a/server/src/test/java/org/elasticsearch/rest/action/admin/cluster/RestClusterHealthActionTests.java +++ b/server/src/test/java/org/elasticsearch/rest/action/admin/cluster/RestClusterHealthActionTests.java @@ -8,6 +8,7 @@ package org.elasticsearch.rest.action.admin.cluster; +import org.apache.logging.log4j.Level; import org.elasticsearch.action.admin.cluster.health.ClusterHealthRequest; import org.elasticsearch.action.support.ActiveShardCount; import org.elasticsearch.cluster.health.ClusterHealthStatus; @@ -68,6 +69,14 @@ public void testFromRequest() { assertThat(clusterHealthRequest.waitForNodes(), equalTo(waitForNodes)); assertThat(clusterHealthRequest.waitForEvents(), equalTo(waitForEvents)); assertThat(clusterHealthRequest.doesReturn200ForClusterHealthTimeout(), equalTo(requestTimeout200)); + + assertWarnings( + true, + new DeprecationWarning( + Level.WARN, + "the [return_200_for_cluster_health_timeout] parameter is deprecated and will be removed in a future release." + ) + ); } private FakeRestRequest buildRestRequest(Map params) { diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/rollup/security_tests.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/rollup/security_tests.yml index 07f4e2b62a6f9..51a7d65176658 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/rollup/security_tests.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/rollup/security_tests.yml @@ -126,7 +126,6 @@ teardown: # this is a hacky way to sleep for 5s, since we will never have 10 nodes - do: - catch: request_timeout cluster.health: wait_for_nodes: 10 timeout: "5s" @@ -286,7 +285,6 @@ teardown: # this is a hacky way to sleep for 5s, since we will never have 10 nodes - do: - catch: request_timeout cluster.health: wait_for_nodes: 10 timeout: "5s"