From 130d2c4bc85358d02bfc868970e2d63164fc7ae0 Mon Sep 17 00:00:00 2001 From: Pablo Alcantar Morales Date: Mon, 20 Mar 2023 10:57:22 -0300 Subject: [PATCH 01/28] minimal infra to add new shard limits health indicator --- .../ShardLimitsHealthIndicatorService.java | 141 ++++++++++++++++++ .../indices/ShardLimitValidator.java | 32 +++- 2 files changed, 171 insertions(+), 2 deletions(-) create mode 100644 server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java diff --git a/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java b/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java new file mode 100644 index 0000000000000..07c4bdcdaaf0e --- /dev/null +++ b/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java @@ -0,0 +1,141 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.health.node; + +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.health.HealthIndicatorDetails; +import org.elasticsearch.health.HealthIndicatorResult; +import org.elasticsearch.health.HealthIndicatorService; +import org.elasticsearch.health.HealthStatus; +import org.elasticsearch.health.metadata.HealthMetadata; +import org.elasticsearch.indices.ShardLimitValidator; + +import java.util.List; + +/** + * This indicator reports health data about the shard limit across the cluster. + * + *

+ * The indicator will report: + * * RED when there's room for less than 5 shards (either normal or frozen nodes) + * * YELLOW when there's room for less than 10 shards (either normal or frozen nodes) + * * GREEN otherwise + *

+ * + * Although the `max_shard_per_node(.frozen)?` information is scoped by Node, we use the information from master because there is where + * the available room for new shards is checked before creating new indices. + */ +public class ShardLimitsHealthIndicatorService implements HealthIndicatorService { + + private static final String NAME = "shard_limits"; + + private final ClusterService clusterService; + + public ShardLimitsHealthIndicatorService(ClusterService clusterService) { + this.clusterService = clusterService; + } + + @Override + public String name() { + return NAME; + } + + @Override + public HealthIndicatorResult calculate(boolean verbose, int maxAffectedResourcesCount, HealthInfo healthInfo) { + var healthMetadata = HealthMetadata.getFromClusterState(clusterService.state()); + + if (healthMetadata == null || healthMetadata.getShardLimitsMetadata() == null) { + return unknownIndicator(); + } + + var normalNodesIndicator = calculateForNormalNodes(healthMetadata.getShardLimitsMetadata()); + var frozenNodesIndicator = calculateForFrozenNodes(healthMetadata.getShardLimitsMetadata()); + + return mergeIndicators(normalNodesIndicator, frozenNodesIndicator); + } + + private HealthIndicatorResult mergeIndicators(HealthIndicatorResult normalNodesIndicator, HealthIndicatorResult frozenNodesIndicator) { + // TODO: implement + return unknownIndicator(); + } + + private HealthIndicatorResult calculateForNormalNodes(HealthMetadata.ShardLimits shardLimits) { + int maxShardsPerNodeSetting = shardLimits.maxShardsPerNode(); + var result = ShardLimitValidator.checkShardLimitForNormalNodes(maxShardsPerNodeSetting, 5, 1, clusterService.state()); + + if (result.canAddShards() == false) { + return createIndicator( + HealthStatus.RED, + "Cluster is close to reach the maximum number of shards (room available is lower than " + result.totalShardsToAdd() + ")", + HealthIndicatorDetails.EMPTY, + List.of(), + List.of() + ); + } + + result = ShardLimitValidator.checkShardLimitForNormalNodes(maxShardsPerNodeSetting, 10, 1, clusterService.state()); + + if (result.canAddShards() == false) { + return createIndicator( + HealthStatus.YELLOW, + "Cluster is close to reach the maximum number of shards (room available is lower than " + result.totalShardsToAdd() + ")", + HealthIndicatorDetails.EMPTY, + List.of(), + List.of() + ); + } + + return createIndicator( + HealthStatus.GREEN, + "The cluster has enough room to add new shards to normal nodes.", + HealthIndicatorDetails.EMPTY, + List.of(), + List.of() + ); + } + + private HealthIndicatorResult calculateForFrozenNodes(HealthMetadata.ShardLimits shardLimits) { + int maxShardsPerNodeSetting = shardLimits.maxShardsPerNodeFrozen(); + var result = ShardLimitValidator.checkShardLimitForFrozenNodes(maxShardsPerNodeSetting, 5, 1, clusterService.state()); + + if (result.canAddShards() == false) { + return createIndicator( + HealthStatus.RED, + "Cluster is close to reach the maximum number of shards (room available is lower than " + result.totalShardsToAdd() + ")", + HealthIndicatorDetails.EMPTY, + List.of(), + List.of() + ); + } + + result = ShardLimitValidator.checkShardLimitForFrozenNodes(maxShardsPerNodeSetting, 10, 1, clusterService.state()); + + if (result.canAddShards() == false) { + return createIndicator( + HealthStatus.YELLOW, + "Cluster is close to reach the maximum number of shards (room available is lower than " + result.totalShardsToAdd() + ")", + HealthIndicatorDetails.EMPTY, + List.of(), + List.of() + ); + } + + return createIndicator( + HealthStatus.GREEN, + "The cluster has enough room to add new shards to frozen nodes.", + HealthIndicatorDetails.EMPTY, + List.of(), + List.of() + ); + } + + private HealthIndicatorResult unknownIndicator() { + return createIndicator(HealthStatus.UNKNOWN, "No shard limits data.", HealthIndicatorDetails.EMPTY, List.of(), List.of()); + } +} diff --git a/server/src/main/java/org/elasticsearch/indices/ShardLimitValidator.java b/server/src/main/java/org/elasticsearch/indices/ShardLimitValidator.java index 9ed799912e8fd..b82a5737ff47b 100644 --- a/server/src/main/java/org/elasticsearch/indices/ShardLimitValidator.java +++ b/server/src/main/java/org/elasticsearch/indices/ShardLimitValidator.java @@ -206,16 +206,44 @@ private Result checkShardLimitOnBothGroups(int newShards, int newFrozenShards, C * @param state The cluster state, used to get cluster settings and to get the number of open shards already in the cluster */ public static Result checkShardLimitForNormalNodes(int numberOfNewShards, int replicas, ClusterState state) { - final int maxShardsPerNode = SETTING_CLUSTER_MAX_SHARDS_PER_NODE.get(state.getMetadata().settings()); + return checkShardLimitForNormalNodes( + SETTING_CLUSTER_MAX_SHARDS_PER_NODE.get(state.getMetadata().settings()), + numberOfNewShards, + replicas, + state + ); + } + + public static Result checkShardLimitForNormalNodes( + int maxShardsPerNodeSetting, + int numberOfNewShards, + int replicas, + ClusterState state + ) { return checkShardLimit( numberOfNewShards * (1 + replicas), state, - maxShardsPerNode, + maxShardsPerNodeSetting, nodeCount(state, ShardLimitValidator::hasNonFrozen), NORMAL_GROUP ); } + public static Result checkShardLimitForFrozenNodes( + int maxShardsPerNodeSetting, + int numberOfNewShards, + int replicas, + ClusterState state + ) { + return checkShardLimit( + numberOfNewShards * (1 + replicas), + state, + maxShardsPerNodeSetting, + nodeCount(state, ShardLimitValidator::hasFrozen), + FROZEN_GROUP + ); + } + // package-private for testing static Result checkShardLimit(int newShards, ClusterState state, int maxShardsPerNode, int nodeCount, String group) { int maxShardsInCluster = maxShardsPerNode * nodeCount; From ea660a86eda23a791f8915054473a9d9b087e112 Mon Sep 17 00:00:00 2001 From: Pablo Alcantar Morales Date: Mon, 20 Mar 2023 11:09:04 -0300 Subject: [PATCH 02/28] calculate indicators for red and green --- .../ShardLimitsHealthIndicatorService.java | 124 ++++++++++-------- 1 file changed, 69 insertions(+), 55 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java b/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java index 07c4bdcdaaf0e..35503a791464d 100644 --- a/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java +++ b/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java @@ -17,6 +17,10 @@ import org.elasticsearch.indices.ShardLimitValidator; import java.util.List; +import java.util.stream.Stream; + +import static org.elasticsearch.indices.ShardLimitValidator.FROZEN_GROUP; +import static org.elasticsearch.indices.ShardLimitValidator.NORMAL_GROUP; /** * This indicator reports health data about the shard limit across the cluster. @@ -49,93 +53,103 @@ public String name() { @Override public HealthIndicatorResult calculate(boolean verbose, int maxAffectedResourcesCount, HealthInfo healthInfo) { var healthMetadata = HealthMetadata.getFromClusterState(clusterService.state()); - if (healthMetadata == null || healthMetadata.getShardLimitsMetadata() == null) { return unknownIndicator(); } - var normalNodesIndicator = calculateForNormalNodes(healthMetadata.getShardLimitsMetadata()); - var frozenNodesIndicator = calculateForFrozenNodes(healthMetadata.getShardLimitsMetadata()); - - return mergeIndicators(normalNodesIndicator, frozenNodesIndicator); + return mergeIndicators( + calculateForNormalNodes(healthMetadata.getShardLimitsMetadata()), + calculateForFrozenNodes(healthMetadata.getShardLimitsMetadata()) + ); } - private HealthIndicatorResult mergeIndicators(HealthIndicatorResult normalNodesIndicator, HealthIndicatorResult frozenNodesIndicator) { - // TODO: implement - return unknownIndicator(); + private HealthIndicatorResult mergeIndicators(StatusResult normalNodes, StatusResult frozenNodes) { + HealthStatus finalStatus = HealthStatus.merge(Stream.of(normalNodes.status, frozenNodes.status)); + var symptomBuilder = new StringBuilder(); + + if (finalStatus == HealthStatus.GREEN) { + symptomBuilder.append("The cluster has enough room to add new shards."); + } + + if (finalStatus == HealthStatus.RED) { + symptomBuilder.append("Cluster is close to reaching the maximum of shards for "); + if (normalNodes.status == frozenNodes.status) { + symptomBuilder.append("normal and frozen"); + } else if (normalNodes.status == HealthStatus.RED) { + symptomBuilder.append(NORMAL_GROUP); + } else { + symptomBuilder.append(FROZEN_GROUP); + } + symptomBuilder.append(" nodes."); + } + + return createIndicator( + finalStatus, + symptomBuilder.toString(), + buildDetails(normalNodes.result, frozenNodes.result), + List.of(), + List.of() + ); } - private HealthIndicatorResult calculateForNormalNodes(HealthMetadata.ShardLimits shardLimits) { + private StatusResult calculateForNormalNodes(HealthMetadata.ShardLimits shardLimits) { int maxShardsPerNodeSetting = shardLimits.maxShardsPerNode(); var result = ShardLimitValidator.checkShardLimitForNormalNodes(maxShardsPerNodeSetting, 5, 1, clusterService.state()); if (result.canAddShards() == false) { - return createIndicator( - HealthStatus.RED, - "Cluster is close to reach the maximum number of shards (room available is lower than " + result.totalShardsToAdd() + ")", - HealthIndicatorDetails.EMPTY, - List.of(), - List.of() - ); + return new StatusResult(HealthStatus.RED, result); } result = ShardLimitValidator.checkShardLimitForNormalNodes(maxShardsPerNodeSetting, 10, 1, clusterService.state()); - if (result.canAddShards() == false) { - return createIndicator( - HealthStatus.YELLOW, - "Cluster is close to reach the maximum number of shards (room available is lower than " + result.totalShardsToAdd() + ")", - HealthIndicatorDetails.EMPTY, - List.of(), - List.of() - ); + return new StatusResult(HealthStatus.YELLOW, result); } - return createIndicator( - HealthStatus.GREEN, - "The cluster has enough room to add new shards to normal nodes.", - HealthIndicatorDetails.EMPTY, - List.of(), - List.of() - ); + return new StatusResult(HealthStatus.GREEN, result); } - private HealthIndicatorResult calculateForFrozenNodes(HealthMetadata.ShardLimits shardLimits) { + private StatusResult calculateForFrozenNodes(HealthMetadata.ShardLimits shardLimits) { int maxShardsPerNodeSetting = shardLimits.maxShardsPerNodeFrozen(); var result = ShardLimitValidator.checkShardLimitForFrozenNodes(maxShardsPerNodeSetting, 5, 1, clusterService.state()); - if (result.canAddShards() == false) { - return createIndicator( - HealthStatus.RED, - "Cluster is close to reach the maximum number of shards (room available is lower than " + result.totalShardsToAdd() + ")", - HealthIndicatorDetails.EMPTY, - List.of(), - List.of() - ); + return new StatusResult(HealthStatus.RED, result); } result = ShardLimitValidator.checkShardLimitForFrozenNodes(maxShardsPerNodeSetting, 10, 1, clusterService.state()); - if (result.canAddShards() == false) { - return createIndicator( - HealthStatus.YELLOW, - "Cluster is close to reach the maximum number of shards (room available is lower than " + result.totalShardsToAdd() + ")", - HealthIndicatorDetails.EMPTY, - List.of(), - List.of() - ); + return new StatusResult(HealthStatus.YELLOW, result); } - return createIndicator( - HealthStatus.GREEN, - "The cluster has enough room to add new shards to frozen nodes.", - HealthIndicatorDetails.EMPTY, - List.of(), - List.of() - ); + return new StatusResult(HealthStatus.GREEN, result); + } + + static HealthIndicatorDetails buildDetails(ShardLimitValidator.Result normalNodes, ShardLimitValidator.Result frozenNodes) { + return (builder, params) -> { + builder.startObject(); + { + builder.startObject("normal_nodes"); + builder.field("max_shards_in_cluster", normalNodes.maxShardsInCluster()); + if (normalNodes.currentUsedShards().isPresent()) { + builder.field("current_used_shards_in_group", normalNodes.currentUsedShards().get()); + } + builder.endObject(); + } + { + builder.startObject("frozen_nodes"); + builder.field("max_shards_in_cluster", frozenNodes.maxShardsInCluster()); + if (frozenNodes.currentUsedShards().isPresent()) { + builder.field("current_used_shards_in_group", frozenNodes.currentUsedShards().get()); + } + builder.endObject(); + } + builder.endObject(); + return builder; + }; } private HealthIndicatorResult unknownIndicator() { return createIndicator(HealthStatus.UNKNOWN, "No shard limits data.", HealthIndicatorDetails.EMPTY, List.of(), List.of()); } + + private record StatusResult(HealthStatus status, ShardLimitValidator.Result result) {} } From 66da4641575368da0ac26943ff69fa14156dcb85 Mon Sep 17 00:00:00 2001 From: Pablo Alcantar Morales Date: Mon, 20 Mar 2023 10:56:48 -0300 Subject: [PATCH 03/28] add impacts and diagnoses --- .../ShardLimitsHealthIndicatorService.java | 64 ++++++++++++++++--- 1 file changed, 54 insertions(+), 10 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java b/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java index 35503a791464d..6e35cc61c942a 100644 --- a/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java +++ b/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java @@ -9,10 +9,13 @@ package org.elasticsearch.health.node; import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.health.Diagnosis; import org.elasticsearch.health.HealthIndicatorDetails; +import org.elasticsearch.health.HealthIndicatorImpact; import org.elasticsearch.health.HealthIndicatorResult; import org.elasticsearch.health.HealthIndicatorService; import org.elasticsearch.health.HealthStatus; +import org.elasticsearch.health.ImpactArea; import org.elasticsearch.health.metadata.HealthMetadata; import org.elasticsearch.indices.ShardLimitValidator; @@ -38,6 +41,36 @@ public class ShardLimitsHealthIndicatorService implements HealthIndicatorService { private static final String NAME = "shard_limits"; + private final String UPGRADE_BLOCKED = "The cluster has too many used shards to be able to upgrade"; + private final List INDICATOR_IMPACTS = List.of( + new HealthIndicatorImpact(NAME, "upgrade_blocked", 1, UPGRADE_BLOCKED, List.of(ImpactArea.DEPLOYMENT_MANAGEMENT)) + ); + + private final String HELP_GUIDE = "https://ela.st/max-shard-limit-reached"; + private final Diagnosis SHARD_LIMITS_REACHED_NORMAL_NODES = new Diagnosis( + new Diagnosis.Definition( + NAME, + "increase_max_shards_per_node", + "The current value of `" + + ShardLimitValidator.SETTING_CLUSTER_MAX_SHARDS_PER_NODE.getKey() + + "` does not allow to add more shards to the cluster.", + "Consider increasing the currently set value or remove indices to clear up resources " + HELP_GUIDE, + HELP_GUIDE + ), + null + ); + private final Diagnosis SHARD_LIMITS_REACHED_FROZEN_NODES = new Diagnosis( + new Diagnosis.Definition( + NAME, + "increase_max_shards_per_node_frozen", + "The current value of `" + + ShardLimitValidator.SETTING_CLUSTER_MAX_SHARDS_PER_NODE_FROZEN.getKey() + + "` does not allow to add more shards to the cluster.", + "Consider increasing the currently set value or remove indices to clear up resources " + HELP_GUIDE, + HELP_GUIDE + ), + null + ); private final ClusterService clusterService; @@ -65,30 +98,41 @@ public HealthIndicatorResult calculate(boolean verbose, int maxAffectedResources private HealthIndicatorResult mergeIndicators(StatusResult normalNodes, StatusResult frozenNodes) { HealthStatus finalStatus = HealthStatus.merge(Stream.of(normalNodes.status, frozenNodes.status)); + List indicatorImpacts = List.of(); + List diagnoses = List.of(); var symptomBuilder = new StringBuilder(); if (finalStatus == HealthStatus.GREEN) { symptomBuilder.append("The cluster has enough room to add new shards."); } - if (finalStatus == HealthStatus.RED) { + // RED and YELLOW status indicates that the cluster might have issues. finalStatus has the worst between *normal and frozen* nodes, + // so we have to check each of the groups in order of provide the right message. + if (finalStatus.indicatesHealthProblem()) { symptomBuilder.append("Cluster is close to reaching the maximum of shards for "); if (normalNodes.status == frozenNodes.status) { - symptomBuilder.append("normal and frozen"); - } else if (normalNodes.status == HealthStatus.RED) { + symptomBuilder.append(NORMAL_GROUP).append(" and ").append(FROZEN_GROUP); + diagnoses = List.of(SHARD_LIMITS_REACHED_NORMAL_NODES, SHARD_LIMITS_REACHED_FROZEN_NODES); + + } else if (normalNodes.status.indicatesHealthProblem()) { symptomBuilder.append(NORMAL_GROUP); - } else { + diagnoses = List.of(SHARD_LIMITS_REACHED_NORMAL_NODES); + + } else if (frozenNodes.status.indicatesHealthProblem()) { symptomBuilder.append(FROZEN_GROUP); + diagnoses = List.of(SHARD_LIMITS_REACHED_FROZEN_NODES); } + symptomBuilder.append(" nodes."); + indicatorImpacts = INDICATOR_IMPACTS; } return createIndicator( finalStatus, symptomBuilder.toString(), buildDetails(normalNodes.result, frozenNodes.result), - List.of(), - List.of() + indicatorImpacts, + diagnoses ); } @@ -127,18 +171,18 @@ static HealthIndicatorDetails buildDetails(ShardLimitValidator.Result normalNode return (builder, params) -> { builder.startObject(); { - builder.startObject("normal_nodes"); + builder.startObject(NORMAL_GROUP + "_nodes"); builder.field("max_shards_in_cluster", normalNodes.maxShardsInCluster()); if (normalNodes.currentUsedShards().isPresent()) { - builder.field("current_used_shards_in_group", normalNodes.currentUsedShards().get()); + builder.field("current_used_shards", normalNodes.currentUsedShards().get()); } builder.endObject(); } { - builder.startObject("frozen_nodes"); + builder.startObject(FROZEN_GROUP + "_nodes"); builder.field("max_shards_in_cluster", frozenNodes.maxShardsInCluster()); if (frozenNodes.currentUsedShards().isPresent()) { - builder.field("current_used_shards_in_group", frozenNodes.currentUsedShards().get()); + builder.field("current_used_shards", frozenNodes.currentUsedShards().get()); } builder.endObject(); } From c287f171cc97045125973aef66f4e7981a033499 Mon Sep 17 00:00:00 2001 From: Pablo Alcantar Morales Date: Mon, 20 Mar 2023 10:56:55 -0300 Subject: [PATCH 04/28] wire up the new indicator --- server/src/main/java/org/elasticsearch/node/Node.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/node/Node.java b/server/src/main/java/org/elasticsearch/node/Node.java index 877001d49bca3..903097c2b493a 100644 --- a/server/src/main/java/org/elasticsearch/node/Node.java +++ b/server/src/main/java/org/elasticsearch/node/Node.java @@ -112,6 +112,7 @@ import org.elasticsearch.health.node.DiskHealthIndicatorService; import org.elasticsearch.health.node.HealthInfoCache; import org.elasticsearch.health.node.LocalHealthMonitor; +import org.elasticsearch.health.node.ShardLimitsHealthIndicatorService; import org.elasticsearch.health.node.selection.HealthNodeTaskExecutor; import org.elasticsearch.health.stats.HealthApiStats; import org.elasticsearch.http.HttpServerTransport; @@ -212,6 +213,7 @@ import org.elasticsearch.watcher.ResourceWatcherService; import org.elasticsearch.xcontent.NamedXContentRegistry; +import javax.net.ssl.SNIHostName; import java.io.BufferedWriter; import java.io.Closeable; import java.io.File; @@ -241,8 +243,6 @@ import java.util.stream.Collectors; import java.util.stream.Stream; -import javax.net.ssl.SNIHostName; - import static java.util.stream.Collectors.toList; import static org.elasticsearch.common.util.CollectionUtils.concatLists; import static org.elasticsearch.core.Types.forciblyCast; @@ -1226,6 +1226,7 @@ private HealthService createHealthService( ) ); serverHealthIndicatorServices.add(new DiskHealthIndicatorService(clusterService)); + serverHealthIndicatorServices.add(new ShardLimitsHealthIndicatorService(clusterService)); var pluginHealthIndicatorServices = pluginsService.filterPlugins(HealthPlugin.class) .stream() .flatMap(plugin -> plugin.getHealthIndicatorServices().stream()) From b40a94e35222d64c9cf35671812bdde148d06a1d Mon Sep 17 00:00:00 2001 From: Pablo Alcantar Morales Date: Mon, 20 Mar 2023 11:12:24 -0300 Subject: [PATCH 05/28] Update docs/changelog/94552.yaml --- docs/changelog/94552.yaml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 docs/changelog/94552.yaml diff --git a/docs/changelog/94552.yaml b/docs/changelog/94552.yaml new file mode 100644 index 0000000000000..e87fd7e038ec6 --- /dev/null +++ b/docs/changelog/94552.yaml @@ -0,0 +1,5 @@ +pr: 94552 +summary: Add new `ShardLimits` Health Indicator Service +area: Health +type: feature +issues: [] From 80754aa10442748f594f34dd6308024ba180806e Mon Sep 17 00:00:00 2001 From: Pablo Alcantar Morales Date: Mon, 20 Mar 2023 11:45:01 -0300 Subject: [PATCH 06/28] spotless --- server/src/main/java/org/elasticsearch/node/Node.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/node/Node.java b/server/src/main/java/org/elasticsearch/node/Node.java index 903097c2b493a..0983a95230f61 100644 --- a/server/src/main/java/org/elasticsearch/node/Node.java +++ b/server/src/main/java/org/elasticsearch/node/Node.java @@ -213,7 +213,6 @@ import org.elasticsearch.watcher.ResourceWatcherService; import org.elasticsearch.xcontent.NamedXContentRegistry; -import javax.net.ssl.SNIHostName; import java.io.BufferedWriter; import java.io.Closeable; import java.io.File; @@ -243,6 +242,8 @@ import java.util.stream.Collectors; import java.util.stream.Stream; +import javax.net.ssl.SNIHostName; + import static java.util.stream.Collectors.toList; import static org.elasticsearch.common.util.CollectionUtils.concatLists; import static org.elasticsearch.core.Types.forciblyCast; From b8c188c53fc928995fffb335a4dfa1d1e4b59d5b Mon Sep 17 00:00:00 2001 From: Pablo Alcantar Morales Date: Mon, 20 Mar 2023 16:26:23 -0300 Subject: [PATCH 07/28] add tests for new health indicator --- .../ShardLimitsHealthIndicatorService.java | 10 +- ...hardLimitsHealthIndicatorServiceTests.java | 341 ++++++++++++++++++ 2 files changed, 346 insertions(+), 5 deletions(-) create mode 100644 server/src/test/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorServiceTests.java diff --git a/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java b/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java index 6e35cc61c942a..5747459792ed5 100644 --- a/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java +++ b/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java @@ -41,13 +41,13 @@ public class ShardLimitsHealthIndicatorService implements HealthIndicatorService { private static final String NAME = "shard_limits"; - private final String UPGRADE_BLOCKED = "The cluster has too many used shards to be able to upgrade"; - private final List INDICATOR_IMPACTS = List.of( + private static final String UPGRADE_BLOCKED = "The cluster has too many used shards to be able to upgrade"; + private static final String HELP_GUIDE = "https://ela.st/max-shard-limit-reached"; + static final List INDICATOR_IMPACTS = List.of( new HealthIndicatorImpact(NAME, "upgrade_blocked", 1, UPGRADE_BLOCKED, List.of(ImpactArea.DEPLOYMENT_MANAGEMENT)) ); - private final String HELP_GUIDE = "https://ela.st/max-shard-limit-reached"; - private final Diagnosis SHARD_LIMITS_REACHED_NORMAL_NODES = new Diagnosis( + static final Diagnosis SHARD_LIMITS_REACHED_NORMAL_NODES = new Diagnosis( new Diagnosis.Definition( NAME, "increase_max_shards_per_node", @@ -59,7 +59,7 @@ public class ShardLimitsHealthIndicatorService implements HealthIndicatorService ), null ); - private final Diagnosis SHARD_LIMITS_REACHED_FROZEN_NODES = new Diagnosis( + static final Diagnosis SHARD_LIMITS_REACHED_FROZEN_NODES = new Diagnosis( new Diagnosis.Definition( NAME, "increase_max_shards_per_node_frozen", diff --git a/server/src/test/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorServiceTests.java b/server/src/test/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorServiceTests.java new file mode 100644 index 0000000000000..4631043706e05 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorServiceTests.java @@ -0,0 +1,341 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.health.node; + +import org.elasticsearch.Version; +import org.elasticsearch.action.support.replication.ClusterStateCreationUtils; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.metadata.IndexMetadata; +import org.elasticsearch.cluster.metadata.Metadata; +import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.cluster.node.DiscoveryNodeRole; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.xcontent.LoggingDeprecationHandler; +import org.elasticsearch.health.HealthStatus; +import org.elasticsearch.health.metadata.HealthMetadata; +import org.elasticsearch.indices.ShardLimitValidator; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xcontent.ToXContent; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentFactory; +import org.elasticsearch.xcontent.XContentParser; +import org.elasticsearch.xcontent.XContentType; +import org.junit.Before; + +import java.io.IOException; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_CREATION_DATE; +import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_REPLICAS; +import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SHARDS; +import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_VERSION_CREATED; +import static org.elasticsearch.health.node.ShardLimitsHealthIndicatorService.INDICATOR_IMPACTS; +import static org.elasticsearch.health.node.ShardLimitsHealthIndicatorService.SHARD_LIMITS_REACHED_FROZEN_NODES; +import static org.elasticsearch.health.node.ShardLimitsHealthIndicatorService.SHARD_LIMITS_REACHED_NORMAL_NODES; +import static org.elasticsearch.indices.ShardLimitValidator.FROZEN_GROUP; +import static org.elasticsearch.indices.ShardLimitValidator.INDEX_SETTING_SHARD_LIMIT_GROUP; +import static org.elasticsearch.indices.ShardLimitValidator.NORMAL_GROUP; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.hasSize; +import static org.hamcrest.Matchers.is; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public class ShardLimitsHealthIndicatorServiceTests extends ESTestCase { + + public static final HealthMetadata.Disk DISK_METADATA = HealthMetadata.Disk.newBuilder().build(); + private DiscoveryNode normalNode; + private DiscoveryNode frozenNode; + + @Before + public void setUp() throws Exception { + super.setUp(); + + normalNode = new DiscoveryNode( + "normal_node", + "normal_node", + ESTestCase.buildNewFakeTransportAddress(), + Map.of(), + Set.of(DiscoveryNodeRole.MASTER_ROLE, DiscoveryNodeRole.DATA_ROLE), + Version.CURRENT + ); + + frozenNode = new DiscoveryNode( + "frozen_node", + "frozen_node", + ESTestCase.buildNewFakeTransportAddress(), + Map.of(), + Set.of(DiscoveryNodeRole.DATA_FROZEN_NODE_ROLE), + Version.CURRENT + ); + } + + public void testNoShardLimitsMetadata() throws IOException { + var clusterService = createClusterService( + createClusterState( + randomValidMaxShards(), + randomValidMaxShards(), + new HealthMetadata(DISK_METADATA, null), + createIndexInNormalNode(100) + ) + ); + var target = new ShardLimitsHealthIndicatorService(clusterService); + var indicatorResult = target.calculate(true, HealthInfo.EMPTY_HEALTH_INFO); + + assertEquals(indicatorResult.status(), HealthStatus.UNKNOWN); + assertTrue(indicatorResult.impacts().isEmpty()); + assertTrue(indicatorResult.diagnosisList().isEmpty()); + assertEquals(indicatorResult.symptom(), "No shard limits data."); + assertEquals(xContentToMap(indicatorResult.details()), Map.of()); + } + + public void testIndicatorYieldsGreenInCaseThereIsRoom() throws IOException { + int maxShardsPerNode = randomValidMaxShards(); + int maxShardsPerNodeFrozen = randomValidMaxShards(); + var clusterService = createClusterService(maxShardsPerNode, maxShardsPerNodeFrozen, createIndexInNormalNode(maxShardsPerNode / 4)); + var indicatorResult = new ShardLimitsHealthIndicatorService(clusterService).calculate(true, HealthInfo.EMPTY_HEALTH_INFO); + + assertEquals(indicatorResult.status(), HealthStatus.GREEN); + assertTrue(indicatorResult.impacts().isEmpty()); + assertTrue(indicatorResult.diagnosisList().isEmpty()); + assertEquals(indicatorResult.symptom(), "The cluster has enough room to add new shards."); + assertThat( + xContentToMap(indicatorResult.details()), + is( + Map.of( + "normal_nodes", + Map.of("max_shards_in_cluster", maxShardsPerNode), + "frozen_nodes", + Map.of("max_shards_in_cluster", maxShardsPerNodeFrozen) + ) + ) + ); + } + + public void testIndicatorYieldsYellowInCaseThereIsNotEnoughRoom() throws IOException { + { + // Only normal_nodes does not have enough space + int maxShardsPerNodeFrozen = randomValidMaxShards(); + var clusterService = createClusterService(25, maxShardsPerNodeFrozen, createIndexInNormalNode(4)); + var indicatorResult = new ShardLimitsHealthIndicatorService(clusterService).calculate(true, HealthInfo.EMPTY_HEALTH_INFO); + + assertEquals(indicatorResult.status(), HealthStatus.YELLOW); + assertEquals(indicatorResult.symptom(), "Cluster is close to reaching the maximum of shards for normal nodes."); + assertThat(indicatorResult.impacts(), equalTo(INDICATOR_IMPACTS)); + assertThat(indicatorResult.diagnosisList(), hasSize(1)); + assertThat(indicatorResult.diagnosisList().get(0), equalTo(SHARD_LIMITS_REACHED_NORMAL_NODES)); + assertThat( + xContentToMap(indicatorResult.details()), + is( + Map.of( + "normal_nodes", + Map.of("max_shards_in_cluster", 25, "current_used_shards", 8), + "frozen_nodes", + Map.of("max_shards_in_cluster", maxShardsPerNodeFrozen) + ) + ) + ); + } + { + // Only frozen_nodes does not have enough space + int maxShardsPerNode = randomValidMaxShards(); + var clusterService = createClusterService(maxShardsPerNode, 25, createIndexInFrozenNode(4)); + var indicatorResult = new ShardLimitsHealthIndicatorService(clusterService).calculate(true, HealthInfo.EMPTY_HEALTH_INFO); + + assertEquals(indicatorResult.status(), HealthStatus.YELLOW); + assertEquals(indicatorResult.symptom(), "Cluster is close to reaching the maximum of shards for frozen nodes."); + assertThat(indicatorResult.impacts(), equalTo(INDICATOR_IMPACTS)); + assertThat(indicatorResult.diagnosisList(), hasSize(1)); + assertThat(indicatorResult.diagnosisList().get(0), equalTo(SHARD_LIMITS_REACHED_FROZEN_NODES)); + assertThat( + xContentToMap(indicatorResult.details()), + is( + Map.of( + "normal_nodes", + Map.of("max_shards_in_cluster", maxShardsPerNode), + "frozen_nodes", + Map.of("max_shards_in_cluster", 25, "current_used_shards", 8) + ) + ) + ); + } + { + // Both normal and frozen nodes does not have enough space + var clusterService = createClusterService(25, 25, createIndexInNormalNode(4), createIndexInFrozenNode(4)); + var indicatorResult = new ShardLimitsHealthIndicatorService(clusterService).calculate(true, HealthInfo.EMPTY_HEALTH_INFO); + + assertEquals(indicatorResult.status(), HealthStatus.YELLOW); + assertEquals(indicatorResult.symptom(), "Cluster is close to reaching the maximum of shards for normal and frozen nodes."); + assertThat(indicatorResult.impacts(), equalTo(INDICATOR_IMPACTS)); + assertThat(indicatorResult.diagnosisList(), is(List.of(SHARD_LIMITS_REACHED_NORMAL_NODES, SHARD_LIMITS_REACHED_FROZEN_NODES))); + assertThat( + xContentToMap(indicatorResult.details()), + is( + Map.of( + "normal_nodes", + Map.of("max_shards_in_cluster", 25, "current_used_shards", 8), + "frozen_nodes", + Map.of("max_shards_in_cluster", 25, "current_used_shards", 8) + ) + ) + ); + } + } + + public void testIndicatorYieldsRedInCaseThereIsNotEnoughRoom() throws IOException { + { + // Only normal_nodes does not have enough space + int maxShardsPerNodeFrozen = randomValidMaxShards(); + var clusterService = createClusterService(25, maxShardsPerNodeFrozen, createIndexInNormalNode(11)); + var indicatorResult = new ShardLimitsHealthIndicatorService(clusterService).calculate(true, HealthInfo.EMPTY_HEALTH_INFO); + + assertEquals(indicatorResult.status(), HealthStatus.RED); + assertEquals(indicatorResult.symptom(), "Cluster is close to reaching the maximum of shards for normal nodes."); + assertThat(indicatorResult.impacts(), equalTo(INDICATOR_IMPACTS)); + assertThat(indicatorResult.diagnosisList(), hasSize(1)); + assertThat(indicatorResult.diagnosisList().get(0), equalTo(SHARD_LIMITS_REACHED_NORMAL_NODES)); + assertThat( + xContentToMap(indicatorResult.details()), + is( + Map.of( + "normal_nodes", + Map.of("max_shards_in_cluster", 25, "current_used_shards", 22), + "frozen_nodes", + Map.of("max_shards_in_cluster", maxShardsPerNodeFrozen) + ) + ) + ); + } + { + // Only frozen_nodes does not have enough space + int maxShardsPerNode = randomValidMaxShards(); + var clusterService = createClusterService(maxShardsPerNode, 25, createIndexInFrozenNode(11)); + var indicatorResult = new ShardLimitsHealthIndicatorService(clusterService).calculate(true, HealthInfo.EMPTY_HEALTH_INFO); + + assertEquals(indicatorResult.status(), HealthStatus.RED); + assertEquals(indicatorResult.symptom(), "Cluster is close to reaching the maximum of shards for frozen nodes."); + assertThat(indicatorResult.impacts(), equalTo(INDICATOR_IMPACTS)); + assertThat(indicatorResult.diagnosisList(), hasSize(1)); + assertThat(indicatorResult.diagnosisList().get(0), equalTo(SHARD_LIMITS_REACHED_FROZEN_NODES)); + assertThat( + xContentToMap(indicatorResult.details()), + is( + Map.of( + "normal_nodes", + Map.of("max_shards_in_cluster", maxShardsPerNode), + "frozen_nodes", + Map.of("max_shards_in_cluster", 25, "current_used_shards", 22) + ) + ) + ); + } + { + // Both normal and frozen nodes does not have enough space + var clusterService = createClusterService(25, 25, createIndexInNormalNode(11), createIndexInFrozenNode(11)); + var indicatorResult = new ShardLimitsHealthIndicatorService(clusterService).calculate(true, HealthInfo.EMPTY_HEALTH_INFO); + + assertEquals(indicatorResult.status(), HealthStatus.RED); + assertEquals(indicatorResult.symptom(), "Cluster is close to reaching the maximum of shards for normal and frozen nodes."); + assertThat(indicatorResult.impacts(), equalTo(INDICATOR_IMPACTS)); + assertThat(indicatorResult.diagnosisList(), is(List.of(SHARD_LIMITS_REACHED_NORMAL_NODES, SHARD_LIMITS_REACHED_FROZEN_NODES))); + assertThat( + xContentToMap(indicatorResult.details()), + is( + Map.of( + "normal_nodes", + Map.of("max_shards_in_cluster", 25, "current_used_shards", 22), + "frozen_nodes", + Map.of("max_shards_in_cluster", 25, "current_used_shards", 22) + ) + ) + ); + } + } + + private static int randomValidMaxShards() { + return randomIntBetween(50, 1000); + } + + private Map xContentToMap(ToXContent xcontent) throws IOException { + XContentBuilder builder = XContentFactory.jsonBuilder(); + xcontent.toXContent(builder, ToXContent.EMPTY_PARAMS); + XContentParser parser = XContentType.JSON.xContent() + .createParser(xContentRegistry(), LoggingDeprecationHandler.INSTANCE, BytesReference.bytes(builder).streamInput()); + return parser.map(); + } + + private static ClusterService createClusterService(ClusterState clusterState) { + var clusterService = mock(ClusterService.class); + when(clusterService.state()).thenReturn(clusterState); + return clusterService; + } + + private ClusterService createClusterService(int maxShardsPerNode, int maxShardsPerNodeFrozen, IndexMetadata.Builder... indexMetadata) { + return createClusterService( + createClusterState( + maxShardsPerNode, + maxShardsPerNodeFrozen, + new HealthMetadata(DISK_METADATA, new HealthMetadata.ShardLimits(maxShardsPerNode, maxShardsPerNodeFrozen)), + indexMetadata + ) + ); + } + + private ClusterState createClusterState( + int maxShardsPerNode, + int maxShardsPerNodeFrozen, + HealthMetadata healthMetadata, + IndexMetadata.Builder... indexMetadata + ) { + var clusterState = ClusterStateCreationUtils.state( + normalNode, + normalNode, + normalNode, + new DiscoveryNode[] { normalNode, frozenNode } + ).copyAndUpdate(b -> b.putCustom(HealthMetadata.TYPE, healthMetadata)); + + var metadata = Metadata.builder() + .persistentSettings( + Settings.builder() + .put(ShardLimitValidator.SETTING_CLUSTER_MAX_SHARDS_PER_NODE.getKey(), maxShardsPerNode) + .put(ShardLimitValidator.SETTING_CLUSTER_MAX_SHARDS_PER_NODE_FROZEN.getKey(), maxShardsPerNodeFrozen) + .build() + ); + + for (var idxMetadata : indexMetadata) { + metadata.put(idxMetadata); + } + + return ClusterState.builder(clusterState).metadata(metadata).build(); + } + + private static IndexMetadata.Builder createIndexInNormalNode(int shards) { + return createIndex(shards, NORMAL_GROUP); + } + + private static IndexMetadata.Builder createIndexInFrozenNode(int shards) { + return createIndex(shards, FROZEN_GROUP); + } + + private static IndexMetadata.Builder createIndex(int shards, String group) { + return IndexMetadata.builder("index-" + randomAlphaOfLength(20)) + .settings( + Settings.builder() + .put(SETTING_VERSION_CREATED, Version.CURRENT) + .put(SETTING_NUMBER_OF_SHARDS, shards) + .put(SETTING_NUMBER_OF_REPLICAS, 1) + .put(SETTING_CREATION_DATE, System.currentTimeMillis()) + .put(INDEX_SETTING_SHARD_LIMIT_GROUP.getKey(), group) + ); + } +} From 47caafc4cc77f27ce0d8b37a4ccf1caaf0449908 Mon Sep 17 00:00:00 2001 From: Pablo Alcantar Morales Date: Mon, 20 Mar 2023 16:34:43 -0300 Subject: [PATCH 08/28] fix wording Co-authored-by: Andrei Dan --- .../health/node/ShardLimitsHealthIndicatorService.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java b/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java index 5747459792ed5..eb7c6f1ed9feb 100644 --- a/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java +++ b/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java @@ -109,7 +109,7 @@ private HealthIndicatorResult mergeIndicators(StatusResult normalNodes, StatusRe // RED and YELLOW status indicates that the cluster might have issues. finalStatus has the worst between *normal and frozen* nodes, // so we have to check each of the groups in order of provide the right message. if (finalStatus.indicatesHealthProblem()) { - symptomBuilder.append("Cluster is close to reaching the maximum of shards for "); + symptomBuilder.append("Cluster is close to reaching the maximum number of shards for "); if (normalNodes.status == frozenNodes.status) { symptomBuilder.append(NORMAL_GROUP).append(" and ").append(FROZEN_GROUP); diagnoses = List.of(SHARD_LIMITS_REACHED_NORMAL_NODES, SHARD_LIMITS_REACHED_FROZEN_NODES); From 8dad903b14e966cf906d71298ca50644f1bf1229 Mon Sep 17 00:00:00 2001 From: Pablo Alcantar Morales Date: Mon, 20 Mar 2023 16:37:10 -0300 Subject: [PATCH 09/28] fix tests after rewording message --- ...ShardLimitsHealthIndicatorServiceTests.java | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorServiceTests.java b/server/src/test/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorServiceTests.java index 4631043706e05..096bc08435bd0 100644 --- a/server/src/test/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorServiceTests.java +++ b/server/src/test/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorServiceTests.java @@ -130,7 +130,7 @@ public void testIndicatorYieldsYellowInCaseThereIsNotEnoughRoom() throws IOExcep var indicatorResult = new ShardLimitsHealthIndicatorService(clusterService).calculate(true, HealthInfo.EMPTY_HEALTH_INFO); assertEquals(indicatorResult.status(), HealthStatus.YELLOW); - assertEquals(indicatorResult.symptom(), "Cluster is close to reaching the maximum of shards for normal nodes."); + assertEquals(indicatorResult.symptom(), "Cluster is close to reaching the maximum number of shards for normal nodes."); assertThat(indicatorResult.impacts(), equalTo(INDICATOR_IMPACTS)); assertThat(indicatorResult.diagnosisList(), hasSize(1)); assertThat(indicatorResult.diagnosisList().get(0), equalTo(SHARD_LIMITS_REACHED_NORMAL_NODES)); @@ -153,7 +153,7 @@ public void testIndicatorYieldsYellowInCaseThereIsNotEnoughRoom() throws IOExcep var indicatorResult = new ShardLimitsHealthIndicatorService(clusterService).calculate(true, HealthInfo.EMPTY_HEALTH_INFO); assertEquals(indicatorResult.status(), HealthStatus.YELLOW); - assertEquals(indicatorResult.symptom(), "Cluster is close to reaching the maximum of shards for frozen nodes."); + assertEquals(indicatorResult.symptom(), "Cluster is close to reaching the maximum number of shards for frozen nodes."); assertThat(indicatorResult.impacts(), equalTo(INDICATOR_IMPACTS)); assertThat(indicatorResult.diagnosisList(), hasSize(1)); assertThat(indicatorResult.diagnosisList().get(0), equalTo(SHARD_LIMITS_REACHED_FROZEN_NODES)); @@ -175,7 +175,10 @@ public void testIndicatorYieldsYellowInCaseThereIsNotEnoughRoom() throws IOExcep var indicatorResult = new ShardLimitsHealthIndicatorService(clusterService).calculate(true, HealthInfo.EMPTY_HEALTH_INFO); assertEquals(indicatorResult.status(), HealthStatus.YELLOW); - assertEquals(indicatorResult.symptom(), "Cluster is close to reaching the maximum of shards for normal and frozen nodes."); + assertEquals( + indicatorResult.symptom(), + "Cluster is close to reaching the maximum number of shards for normal and frozen nodes." + ); assertThat(indicatorResult.impacts(), equalTo(INDICATOR_IMPACTS)); assertThat(indicatorResult.diagnosisList(), is(List.of(SHARD_LIMITS_REACHED_NORMAL_NODES, SHARD_LIMITS_REACHED_FROZEN_NODES))); assertThat( @@ -200,7 +203,7 @@ public void testIndicatorYieldsRedInCaseThereIsNotEnoughRoom() throws IOExceptio var indicatorResult = new ShardLimitsHealthIndicatorService(clusterService).calculate(true, HealthInfo.EMPTY_HEALTH_INFO); assertEquals(indicatorResult.status(), HealthStatus.RED); - assertEquals(indicatorResult.symptom(), "Cluster is close to reaching the maximum of shards for normal nodes."); + assertEquals(indicatorResult.symptom(), "Cluster is close to reaching the maximum number of shards for normal nodes."); assertThat(indicatorResult.impacts(), equalTo(INDICATOR_IMPACTS)); assertThat(indicatorResult.diagnosisList(), hasSize(1)); assertThat(indicatorResult.diagnosisList().get(0), equalTo(SHARD_LIMITS_REACHED_NORMAL_NODES)); @@ -223,7 +226,7 @@ public void testIndicatorYieldsRedInCaseThereIsNotEnoughRoom() throws IOExceptio var indicatorResult = new ShardLimitsHealthIndicatorService(clusterService).calculate(true, HealthInfo.EMPTY_HEALTH_INFO); assertEquals(indicatorResult.status(), HealthStatus.RED); - assertEquals(indicatorResult.symptom(), "Cluster is close to reaching the maximum of shards for frozen nodes."); + assertEquals(indicatorResult.symptom(), "Cluster is close to reaching the maximum number of shards for frozen nodes."); assertThat(indicatorResult.impacts(), equalTo(INDICATOR_IMPACTS)); assertThat(indicatorResult.diagnosisList(), hasSize(1)); assertThat(indicatorResult.diagnosisList().get(0), equalTo(SHARD_LIMITS_REACHED_FROZEN_NODES)); @@ -245,7 +248,10 @@ public void testIndicatorYieldsRedInCaseThereIsNotEnoughRoom() throws IOExceptio var indicatorResult = new ShardLimitsHealthIndicatorService(clusterService).calculate(true, HealthInfo.EMPTY_HEALTH_INFO); assertEquals(indicatorResult.status(), HealthStatus.RED); - assertEquals(indicatorResult.symptom(), "Cluster is close to reaching the maximum of shards for normal and frozen nodes."); + assertEquals( + indicatorResult.symptom(), + "Cluster is close to reaching the maximum number of shards for normal and frozen nodes." + ); assertThat(indicatorResult.impacts(), equalTo(INDICATOR_IMPACTS)); assertThat(indicatorResult.diagnosisList(), is(List.of(SHARD_LIMITS_REACHED_NORMAL_NODES, SHARD_LIMITS_REACHED_FROZEN_NODES))); assertThat( From 80b85b48d6222b293b33c340a58042f319558d73 Mon Sep 17 00:00:00 2001 From: Pablo Alcantar Morales Date: Mon, 20 Mar 2023 16:58:35 -0300 Subject: [PATCH 10/28] separate impact for red and yellow status --- .../node/ShardLimitsHealthIndicatorService.java | 14 +++++++++++--- .../ShardLimitsHealthIndicatorServiceTests.java | 15 ++++++++------- 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java b/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java index eb7c6f1ed9feb..a8737a1c39287 100644 --- a/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java +++ b/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java @@ -42,10 +42,14 @@ public class ShardLimitsHealthIndicatorService implements HealthIndicatorService private static final String NAME = "shard_limits"; private static final String UPGRADE_BLOCKED = "The cluster has too many used shards to be able to upgrade"; + private static final String UPGRADE_AT_RISK = "The cluster is running low on room to add new shards hence upgrade is at risk"; private static final String HELP_GUIDE = "https://ela.st/max-shard-limit-reached"; - static final List INDICATOR_IMPACTS = List.of( + static final List RED_INDICATOR_IMPACTS = List.of( new HealthIndicatorImpact(NAME, "upgrade_blocked", 1, UPGRADE_BLOCKED, List.of(ImpactArea.DEPLOYMENT_MANAGEMENT)) ); + static final List YELLOW_INDICATOR_IMPACTS = List.of( + new HealthIndicatorImpact(NAME, "upgrade_at_risk", 2, UPGRADE_AT_RISK, List.of(ImpactArea.DEPLOYMENT_MANAGEMENT)) + ); static final Diagnosis SHARD_LIMITS_REACHED_NORMAL_NODES = new Diagnosis( new Diagnosis.Definition( @@ -98,7 +102,6 @@ public HealthIndicatorResult calculate(boolean verbose, int maxAffectedResources private HealthIndicatorResult mergeIndicators(StatusResult normalNodes, StatusResult frozenNodes) { HealthStatus finalStatus = HealthStatus.merge(Stream.of(normalNodes.status, frozenNodes.status)); - List indicatorImpacts = List.of(); List diagnoses = List.of(); var symptomBuilder = new StringBuilder(); @@ -124,9 +127,14 @@ private HealthIndicatorResult mergeIndicators(StatusResult normalNodes, StatusRe } symptomBuilder.append(" nodes."); - indicatorImpacts = INDICATOR_IMPACTS; } + var indicatorImpacts = switch (finalStatus) { + case RED -> RED_INDICATOR_IMPACTS; + case YELLOW -> YELLOW_INDICATOR_IMPACTS; + default -> List.of(); + }; + return createIndicator( finalStatus, symptomBuilder.toString(), diff --git a/server/src/test/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorServiceTests.java b/server/src/test/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorServiceTests.java index 096bc08435bd0..e6d567d0aeb6e 100644 --- a/server/src/test/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorServiceTests.java +++ b/server/src/test/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorServiceTests.java @@ -39,9 +39,10 @@ import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_REPLICAS; import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SHARDS; import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_VERSION_CREATED; -import static org.elasticsearch.health.node.ShardLimitsHealthIndicatorService.INDICATOR_IMPACTS; +import static org.elasticsearch.health.node.ShardLimitsHealthIndicatorService.RED_INDICATOR_IMPACTS; import static org.elasticsearch.health.node.ShardLimitsHealthIndicatorService.SHARD_LIMITS_REACHED_FROZEN_NODES; import static org.elasticsearch.health.node.ShardLimitsHealthIndicatorService.SHARD_LIMITS_REACHED_NORMAL_NODES; +import static org.elasticsearch.health.node.ShardLimitsHealthIndicatorService.YELLOW_INDICATOR_IMPACTS; import static org.elasticsearch.indices.ShardLimitValidator.FROZEN_GROUP; import static org.elasticsearch.indices.ShardLimitValidator.INDEX_SETTING_SHARD_LIMIT_GROUP; import static org.elasticsearch.indices.ShardLimitValidator.NORMAL_GROUP; @@ -131,7 +132,7 @@ public void testIndicatorYieldsYellowInCaseThereIsNotEnoughRoom() throws IOExcep assertEquals(indicatorResult.status(), HealthStatus.YELLOW); assertEquals(indicatorResult.symptom(), "Cluster is close to reaching the maximum number of shards for normal nodes."); - assertThat(indicatorResult.impacts(), equalTo(INDICATOR_IMPACTS)); + assertThat(indicatorResult.impacts(), equalTo(YELLOW_INDICATOR_IMPACTS)); assertThat(indicatorResult.diagnosisList(), hasSize(1)); assertThat(indicatorResult.diagnosisList().get(0), equalTo(SHARD_LIMITS_REACHED_NORMAL_NODES)); assertThat( @@ -154,7 +155,7 @@ public void testIndicatorYieldsYellowInCaseThereIsNotEnoughRoom() throws IOExcep assertEquals(indicatorResult.status(), HealthStatus.YELLOW); assertEquals(indicatorResult.symptom(), "Cluster is close to reaching the maximum number of shards for frozen nodes."); - assertThat(indicatorResult.impacts(), equalTo(INDICATOR_IMPACTS)); + assertThat(indicatorResult.impacts(), equalTo(YELLOW_INDICATOR_IMPACTS)); assertThat(indicatorResult.diagnosisList(), hasSize(1)); assertThat(indicatorResult.diagnosisList().get(0), equalTo(SHARD_LIMITS_REACHED_FROZEN_NODES)); assertThat( @@ -179,7 +180,7 @@ public void testIndicatorYieldsYellowInCaseThereIsNotEnoughRoom() throws IOExcep indicatorResult.symptom(), "Cluster is close to reaching the maximum number of shards for normal and frozen nodes." ); - assertThat(indicatorResult.impacts(), equalTo(INDICATOR_IMPACTS)); + assertThat(indicatorResult.impacts(), equalTo(YELLOW_INDICATOR_IMPACTS)); assertThat(indicatorResult.diagnosisList(), is(List.of(SHARD_LIMITS_REACHED_NORMAL_NODES, SHARD_LIMITS_REACHED_FROZEN_NODES))); assertThat( xContentToMap(indicatorResult.details()), @@ -204,7 +205,7 @@ public void testIndicatorYieldsRedInCaseThereIsNotEnoughRoom() throws IOExceptio assertEquals(indicatorResult.status(), HealthStatus.RED); assertEquals(indicatorResult.symptom(), "Cluster is close to reaching the maximum number of shards for normal nodes."); - assertThat(indicatorResult.impacts(), equalTo(INDICATOR_IMPACTS)); + assertThat(indicatorResult.impacts(), equalTo(RED_INDICATOR_IMPACTS)); assertThat(indicatorResult.diagnosisList(), hasSize(1)); assertThat(indicatorResult.diagnosisList().get(0), equalTo(SHARD_LIMITS_REACHED_NORMAL_NODES)); assertThat( @@ -227,7 +228,7 @@ public void testIndicatorYieldsRedInCaseThereIsNotEnoughRoom() throws IOExceptio assertEquals(indicatorResult.status(), HealthStatus.RED); assertEquals(indicatorResult.symptom(), "Cluster is close to reaching the maximum number of shards for frozen nodes."); - assertThat(indicatorResult.impacts(), equalTo(INDICATOR_IMPACTS)); + assertThat(indicatorResult.impacts(), equalTo(RED_INDICATOR_IMPACTS)); assertThat(indicatorResult.diagnosisList(), hasSize(1)); assertThat(indicatorResult.diagnosisList().get(0), equalTo(SHARD_LIMITS_REACHED_FROZEN_NODES)); assertThat( @@ -252,7 +253,7 @@ public void testIndicatorYieldsRedInCaseThereIsNotEnoughRoom() throws IOExceptio indicatorResult.symptom(), "Cluster is close to reaching the maximum number of shards for normal and frozen nodes." ); - assertThat(indicatorResult.impacts(), equalTo(INDICATOR_IMPACTS)); + assertThat(indicatorResult.impacts(), equalTo(RED_INDICATOR_IMPACTS)); assertThat(indicatorResult.diagnosisList(), is(List.of(SHARD_LIMITS_REACHED_NORMAL_NODES, SHARD_LIMITS_REACHED_FROZEN_NODES))); assertThat( xContentToMap(indicatorResult.details()), From f4f5ab12b7ba3404187decded47ae8a5e2cb40a3 Mon Sep 17 00:00:00 2001 From: Pablo Alcantar Morales Date: Tue, 21 Mar 2023 10:07:58 -0300 Subject: [PATCH 11/28] reword some messages --- .../ShardLimitsHealthIndicatorService.java | 28 ++++++++----------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java b/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java index a8737a1c39287..ff5b615b472bc 100644 --- a/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java +++ b/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java @@ -20,6 +20,7 @@ import org.elasticsearch.indices.ShardLimitValidator; import java.util.List; +import java.util.function.Function; import java.util.stream.Stream; import static org.elasticsearch.indices.ShardLimitValidator.FROZEN_GROUP; @@ -51,29 +52,24 @@ public class ShardLimitsHealthIndicatorService implements HealthIndicatorService new HealthIndicatorImpact(NAME, "upgrade_at_risk", 2, UPGRADE_AT_RISK, List.of(ImpactArea.DEPLOYMENT_MANAGEMENT)) ); - static final Diagnosis SHARD_LIMITS_REACHED_NORMAL_NODES = new Diagnosis( + private static final Function SHARD_LIMITS_REACHED_FN = settingName -> new Diagnosis( new Diagnosis.Definition( NAME, "increase_max_shards_per_node", - "The current value of `" - + ShardLimitValidator.SETTING_CLUSTER_MAX_SHARDS_PER_NODE.getKey() - + "` does not allow to add more shards to the cluster.", - "Consider increasing the currently set value or remove indices to clear up resources " + HELP_GUIDE, + "The current value of the setting `" + + settingName + + "` is either about to be or already reached which will not allow adding more shards to the cluster.", + "Consider increasing the currently configured value or removing indices to clear up resources " + HELP_GUIDE, HELP_GUIDE ), null ); - static final Diagnosis SHARD_LIMITS_REACHED_FROZEN_NODES = new Diagnosis( - new Diagnosis.Definition( - NAME, - "increase_max_shards_per_node_frozen", - "The current value of `" - + ShardLimitValidator.SETTING_CLUSTER_MAX_SHARDS_PER_NODE_FROZEN.getKey() - + "` does not allow to add more shards to the cluster.", - "Consider increasing the currently set value or remove indices to clear up resources " + HELP_GUIDE, - HELP_GUIDE - ), - null + + static final Diagnosis SHARD_LIMITS_REACHED_NORMAL_NODES = SHARD_LIMITS_REACHED_FN.apply( + ShardLimitValidator.SETTING_CLUSTER_MAX_SHARDS_PER_NODE.getKey() + ); + static final Diagnosis SHARD_LIMITS_REACHED_FROZEN_NODES = SHARD_LIMITS_REACHED_FN.apply( + ShardLimitValidator.SETTING_CLUSTER_MAX_SHARDS_PER_NODE_FROZEN.getKey() ); private final ClusterService clusterService; From e7a2148e050c0ef3c0a278bf2918ec0b21ce4d92 Mon Sep 17 00:00:00 2001 From: Pablo Alcantar Morales Date: Tue, 21 Mar 2023 14:37:46 -0300 Subject: [PATCH 12/28] add integration test --- .../ShardLimitsHealthIndicatorServiceIT.java | 141 ++++++++++++++++++ .../ShardLimitsHealthIndicatorService.java | 6 +- 2 files changed, 144 insertions(+), 3 deletions(-) create mode 100644 server/src/internalClusterTest/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorServiceIT.java diff --git a/server/src/internalClusterTest/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorServiceIT.java b/server/src/internalClusterTest/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorServiceIT.java new file mode 100644 index 0000000000000..88ec78a0a17b7 --- /dev/null +++ b/server/src/internalClusterTest/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorServiceIT.java @@ -0,0 +1,141 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.health.node; + +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.health.HealthIndicatorResult; +import org.elasticsearch.health.HealthService; +import org.elasticsearch.health.HealthStatus; +import org.elasticsearch.health.metadata.HealthMetadata; +import org.elasticsearch.health.node.selection.HealthNode; +import org.elasticsearch.test.ESIntegTestCase; +import org.elasticsearch.test.InternalTestCluster; +import org.elasticsearch.test.NodeRoles; +import org.junit.Before; + +import java.util.List; +import java.util.concurrent.atomic.AtomicReference; + +import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_REPLICAS; +import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SHARDS; +import static org.elasticsearch.cluster.node.DiscoveryNodeRole.DATA_FROZEN_NODE_ROLE; +import static org.elasticsearch.indices.ShardLimitValidator.SETTING_CLUSTER_MAX_SHARDS_PER_NODE; +import static org.hamcrest.Matchers.empty; +import static org.hamcrest.Matchers.hasSize; + +@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0) +public class ShardLimitsHealthIndicatorServiceIT extends ESIntegTestCase { + + private static final String INDEX_NAME = "index-name"; + private InternalTestCluster internalCluster; + + @Before + public void setUp() throws Exception { + super.setUp(); + internalCluster = internalCluster(); + updateClusterSettings(Settings.builder().put(SETTING_CLUSTER_MAX_SHARDS_PER_NODE.getKey(), 30)); + } + + public void testGreen() throws Exception { + // index: 4 shards + 1 replica = 8 shards used (30 - 8 = 22 > 10 available shards) + createIndex(4, 1); + + var result = fetchShardLimitsIndicatorResult(internalCluster); + assertEquals(result.status(), HealthStatus.GREEN); + assertEquals(result.symptom(), "The cluster has enough room to add new shards."); + assertThat(result.diagnosisList(), empty()); + assertThat(result.impacts(), empty()); + } + + public void testYellow() throws Exception { + // index: 11 shards + 1 replica = 22 shards used (30 - 22 < 10 available shards) + createIndex(10, 1); + + var result = fetchShardLimitsIndicatorResult(internalCluster); + assertEquals(result.status(), HealthStatus.YELLOW); + assertEquals(result.symptom(), "Cluster is close to reaching the maximum number of shards for normal nodes."); + assertThat(result.diagnosisList(), hasSize(1)); + assertThat(result.impacts(), hasSize(1)); + } + + public void testRed() throws Exception { + // index: 13 shards + 1 replica = 26 shards used (30 - 26 < 5 available shards) + createIndex(13, 1); + + var result = fetchShardLimitsIndicatorResult(internalCluster); + assertEquals(result.status(), HealthStatus.RED); + assertEquals(result.symptom(), "Cluster is close to reaching the maximum number of shards for normal nodes."); + assertThat(result.diagnosisList(), hasSize(1)); + assertThat(result.impacts(), hasSize(1)); + } + + private void createIndex(int shards, int replicas) { + createIndex(INDEX_NAME, Settings.builder().put(SETTING_NUMBER_OF_SHARDS, shards).put(SETTING_NUMBER_OF_REPLICAS, replicas).build()); + } + + private HealthIndicatorResult fetchShardLimitsIndicatorResult(InternalTestCluster internalCluster) throws Exception { + var healthNode = findHealthNode().getName(); + var healthService = internalCluster.getInstance(HealthService.class, healthNode); + var healthIndicatorResults = getHealthServiceResults(healthService, healthNode); + assertThat(healthIndicatorResults, hasSize(1)); + return healthIndicatorResults.get(0); + } + + private void setUpCluster(InternalTestCluster internalCluster) throws Exception { + internalCluster.startMasterOnlyNode(); + internalCluster.startDataOnlyNode(); + internalCluster.startNode(NodeRoles.onlyRole(DATA_FROZEN_NODE_ROLE)); + ensureStableCluster(internalCluster.getNodeNames().length); + waitForHealthMetadata(); + } + + private List getHealthServiceResults(HealthService healthService, String node) throws Exception { + AtomicReference> resultListReference = new AtomicReference<>(); + ActionListener> listener = new ActionListener<>() { + @Override + public void onResponse(List healthIndicatorResults) { + resultListReference.set(healthIndicatorResults); + } + + @Override + public void onFailure(Exception e) { + throw new RuntimeException(e); + } + }; + healthService.getHealth(internalCluster().client(node), ShardLimitsHealthIndicatorService.NAME, true, 1000, listener); + assertBusy(() -> assertNotNull(resultListReference.get())); + return resultListReference.get(); + } + + private void waitForHealthMetadata() throws Exception { + assertBusy(() -> { + var healthMetadata = HealthMetadata.getFromClusterState(internalCluster().clusterService().state()); + + assertNotNull(healthMetadata); + assertNotNull(healthMetadata.getShardLimitsMetadata()); + assertTrue( + "max_shards_per_node settings must be greater than 0", + healthMetadata.getShardLimitsMetadata().maxShardsPerNode() > 0 + ); + assertTrue( + "max_shards_per_node.frozen settings must be greater than 0", + healthMetadata.getShardLimitsMetadata().maxShardsPerNodeFrozen() > 0 + ); + }); + } + + private static DiscoveryNode findHealthNode() { + var state = internalCluster().clusterService().state(); + DiscoveryNode healthNode = HealthNode.findHealthNode(state); + assertNotNull(healthNode); + return healthNode; + } +} diff --git a/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java b/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java index ff5b615b472bc..e5800615eeeb2 100644 --- a/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java +++ b/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java @@ -41,10 +41,10 @@ */ public class ShardLimitsHealthIndicatorService implements HealthIndicatorService { - private static final String NAME = "shard_limits"; private static final String UPGRADE_BLOCKED = "The cluster has too many used shards to be able to upgrade"; private static final String UPGRADE_AT_RISK = "The cluster is running low on room to add new shards hence upgrade is at risk"; private static final String HELP_GUIDE = "https://ela.st/max-shard-limit-reached"; + static final String NAME = "shard_limits"; static final List RED_INDICATOR_IMPACTS = List.of( new HealthIndicatorImpact(NAME, "upgrade_blocked", 1, UPGRADE_BLOCKED, List.of(ImpactArea.DEPLOYMENT_MANAGEMENT)) ); @@ -97,8 +97,8 @@ public HealthIndicatorResult calculate(boolean verbose, int maxAffectedResources } private HealthIndicatorResult mergeIndicators(StatusResult normalNodes, StatusResult frozenNodes) { - HealthStatus finalStatus = HealthStatus.merge(Stream.of(normalNodes.status, frozenNodes.status)); - List diagnoses = List.of(); + var finalStatus = HealthStatus.merge(Stream.of(normalNodes.status, frozenNodes.status)); + var diagnoses = List.of(); var symptomBuilder = new StringBuilder(); if (finalStatus == HealthStatus.GREEN) { From 077cc42285006ac7dda43f70ea5d57f84f322760 Mon Sep 17 00:00:00 2001 From: Pablo Alcantar Morales Date: Tue, 21 Mar 2023 14:55:24 -0300 Subject: [PATCH 13/28] add structural tests for diagnosis definitions fix bad definition name --- .../node/ShardLimitsHealthIndicatorService.java | 8 +++++--- .../ShardLimitsHealthIndicatorServiceTests.java | 14 ++++++++++++++ 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java b/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java index e5800615eeeb2..3552a4a78b1ae 100644 --- a/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java +++ b/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java @@ -20,7 +20,7 @@ import org.elasticsearch.indices.ShardLimitValidator; import java.util.List; -import java.util.function.Function; +import java.util.function.BiFunction; import java.util.stream.Stream; import static org.elasticsearch.indices.ShardLimitValidator.FROZEN_GROUP; @@ -52,10 +52,10 @@ public class ShardLimitsHealthIndicatorService implements HealthIndicatorService new HealthIndicatorImpact(NAME, "upgrade_at_risk", 2, UPGRADE_AT_RISK, List.of(ImpactArea.DEPLOYMENT_MANAGEMENT)) ); - private static final Function SHARD_LIMITS_REACHED_FN = settingName -> new Diagnosis( + private static final BiFunction SHARD_LIMITS_REACHED_FN = (id, settingName) -> new Diagnosis( new Diagnosis.Definition( NAME, - "increase_max_shards_per_node", + id, "The current value of the setting `" + settingName + "` is either about to be or already reached which will not allow adding more shards to the cluster.", @@ -66,9 +66,11 @@ public class ShardLimitsHealthIndicatorService implements HealthIndicatorService ); static final Diagnosis SHARD_LIMITS_REACHED_NORMAL_NODES = SHARD_LIMITS_REACHED_FN.apply( + "increase_max_shards_per_node", ShardLimitValidator.SETTING_CLUSTER_MAX_SHARDS_PER_NODE.getKey() ); static final Diagnosis SHARD_LIMITS_REACHED_FROZEN_NODES = SHARD_LIMITS_REACHED_FN.apply( + "increase_max_shards_per_node_frozen", ShardLimitValidator.SETTING_CLUSTER_MAX_SHARDS_PER_NODE_FROZEN.getKey() ); diff --git a/server/src/test/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorServiceTests.java b/server/src/test/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorServiceTests.java index e6d567d0aeb6e..22f4d6787d416 100644 --- a/server/src/test/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorServiceTests.java +++ b/server/src/test/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorServiceTests.java @@ -269,6 +269,20 @@ public void testIndicatorYieldsRedInCaseThereIsNotEnoughRoom() throws IOExceptio } } + // We expose the indicator name and the diagnoses in the x-pack usage API. In order to index them properly in a telemetry index + // they need to be declared in the health-api-indexer.edn in the telemetry repository. + public void testMappedFieldsForTelemetry() { + assertEquals(ShardLimitsHealthIndicatorService.NAME, "shard_limits"); + assertEquals( + "elasticsearch:health:shard_limits:diagnosis:increase_max_shards_per_node", + SHARD_LIMITS_REACHED_NORMAL_NODES.definition().getUniqueId() + ); + assertEquals( + "elasticsearch:health:shard_limits:diagnosis:increase_max_shards_per_node_frozen", + SHARD_LIMITS_REACHED_FROZEN_NODES.definition().getUniqueId() + ); + } + private static int randomValidMaxShards() { return randomIntBetween(50, 1000); } From 00882078ca9b75dd851ffdcfa1e8feef514803c3 Mon Sep 17 00:00:00 2001 From: Pablo Alcantar Morales Date: Wed, 22 Mar 2023 11:09:04 -0300 Subject: [PATCH 14/28] add documentation to public methods add tests to public methods remove method which could lead to confusions --- .../indices/ShardLimitValidator.java | 40 ++++++------ .../indices/ShardLimitValidatorTests.java | 62 +++++++++++-------- .../deprecation/ClusterDeprecationChecks.java | 4 ++ 3 files changed, 60 insertions(+), 46 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/indices/ShardLimitValidator.java b/server/src/main/java/org/elasticsearch/indices/ShardLimitValidator.java index b82a5737ff47b..06bb1439be4e6 100644 --- a/server/src/main/java/org/elasticsearch/indices/ShardLimitValidator.java +++ b/server/src/main/java/org/elasticsearch/indices/ShardLimitValidator.java @@ -201,21 +201,14 @@ private Result checkShardLimitOnBothGroups(int newShards, int newFrozenShards, C * This method checks whether there is enough room in the cluster to add the given number of shards with the given number of replicas * without exceeding the "cluster.max_shards_per_node" setting for _normal_ nodes. This check does not guarantee that the number of * shards can be added, just that there is theoretically room to add them without exceeding the shards per node configuration. - * @param numberOfNewShards The number of primary shards that we want to be able to add to the cluster - * @param replicas The number of replicas of the primary shards that we want to be able to add to the cluster - * @param state The cluster state, used to get cluster settings and to get the number of open shards already in the cluster + * @param maxConfiguredShardsPerNode The maximum available number of shards to be allocated within a node + * @param numberOfNewShards The number of primary shards that we want to be able to add to the cluster + * @param replicas The number of replicas of the primary shards that we want to be able to add to the cluster + * @param state The cluster state, used to get cluster settings and to get the number of open shards already in + * the cluster */ - public static Result checkShardLimitForNormalNodes(int numberOfNewShards, int replicas, ClusterState state) { - return checkShardLimitForNormalNodes( - SETTING_CLUSTER_MAX_SHARDS_PER_NODE.get(state.getMetadata().settings()), - numberOfNewShards, - replicas, - state - ); - } - public static Result checkShardLimitForNormalNodes( - int maxShardsPerNodeSetting, + int maxConfiguredShardsPerNode, int numberOfNewShards, int replicas, ClusterState state @@ -223,14 +216,24 @@ public static Result checkShardLimitForNormalNodes( return checkShardLimit( numberOfNewShards * (1 + replicas), state, - maxShardsPerNodeSetting, + maxConfiguredShardsPerNode, nodeCount(state, ShardLimitValidator::hasNonFrozen), NORMAL_GROUP ); } + /** + * This method checks whether there is enough room in the cluster to add the given number of shards with the given number of replicas + * without exceeding the "cluster.max_shards_per_node_frozen" setting for _frozen_ nodes. This check does not guarantee that the number + * of shards can be added, just that there is theoretically room to add them without exceeding the shards per node configuration. + * @param maxConfiguredShardsPerNode The maximum available number of shards to be allocated within a node + * @param numberOfNewShards The number of primary shards that we want to be able to add to the cluster + * @param replicas The number of replicas of the primary shards that we want to be able to add to the cluster + * @param state The cluster state, used to get cluster settings and to get the number of open shards already in + * the cluster + */ public static Result checkShardLimitForFrozenNodes( - int maxShardsPerNodeSetting, + int maxConfiguredShardsPerNode, int numberOfNewShards, int replicas, ClusterState state @@ -238,15 +241,14 @@ public static Result checkShardLimitForFrozenNodes( return checkShardLimit( numberOfNewShards * (1 + replicas), state, - maxShardsPerNodeSetting, + maxConfiguredShardsPerNode, nodeCount(state, ShardLimitValidator::hasFrozen), FROZEN_GROUP ); } - // package-private for testing - static Result checkShardLimit(int newShards, ClusterState state, int maxShardsPerNode, int nodeCount, String group) { - int maxShardsInCluster = maxShardsPerNode * nodeCount; + private static Result checkShardLimit(int newShards, ClusterState state, int maxConfiguredShardsPerNode, int nodeCount, String group) { + int maxShardsInCluster = maxConfiguredShardsPerNode * nodeCount; int currentOpenShards = state.getMetadata().getTotalOpenIndexShards(); // Only enforce the shard limit if we have at least one data node, so that we don't block diff --git a/server/src/test/java/org/elasticsearch/indices/ShardLimitValidatorTests.java b/server/src/test/java/org/elasticsearch/indices/ShardLimitValidatorTests.java index 331f40e214c11..3351d77ff4466 100644 --- a/server/src/test/java/org/elasticsearch/indices/ShardLimitValidatorTests.java +++ b/server/src/test/java/org/elasticsearch/indices/ShardLimitValidatorTests.java @@ -32,6 +32,7 @@ import static org.elasticsearch.cluster.metadata.MetadataIndexStateServiceTests.addClosedIndex; import static org.elasticsearch.cluster.metadata.MetadataIndexStateServiceTests.addOpenedIndex; import static org.elasticsearch.cluster.shards.ShardCounts.forDataNodeCount; +import static org.elasticsearch.indices.ShardLimitValidator.FROZEN_GROUP; import static org.elasticsearch.indices.ShardLimitValidator.NORMAL_GROUP; import static org.elasticsearch.indices.ShardLimitValidator.SETTING_CLUSTER_MAX_SHARDS_PER_NODE; import static org.mockito.Mockito.mock; @@ -39,7 +40,17 @@ public class ShardLimitValidatorTests extends ESTestCase { + @FunctionalInterface + interface CheckShardLimitMethod { + ShardLimitValidator.Result call(int maxConfiguredShardsPerNode, int numberOfNewShards, int replicas, ClusterState state); + } + public void testOverShardLimit() { + testOverShardLimit(ShardLimitValidator::checkShardLimitForNormalNodes, NORMAL_GROUP); + testOverShardLimit(ShardLimitValidator::checkShardLimitForFrozenNodes, FROZEN_GROUP); + } + + private void testOverShardLimit(CheckShardLimitMethod targetMethod, String group) { int nodesInCluster = randomIntBetween(1, 90); ShardCounts counts = forDataNodeCount(nodesInCluster); ClusterState state = createClusterForShardLimitTest( @@ -47,16 +58,13 @@ public void testOverShardLimit() { counts.getFirstIndexShards(), counts.getFirstIndexReplicas(), counts.getShardsPerNode(), - NORMAL_GROUP + group ); - - int shardsToAdd = counts.getFailingIndexShards() * (1 + counts.getFailingIndexReplicas()); - var shardLimitsResult = ShardLimitValidator.checkShardLimit( - shardsToAdd, - state, + ShardLimitValidator.Result shardLimitsResult = targetMethod.call( counts.getShardsPerNode(), - nodesInCluster, - NORMAL_GROUP + counts.getFailingIndexShards(), + counts.getFailingIndexReplicas(), + state ); int totalShards = counts.getFailingIndexShards() * (1 + counts.getFailingIndexReplicas()); @@ -72,7 +80,7 @@ public void testOverShardLimit() { + "]/[" + maxShards + "] maximum " - + NORMAL_GROUP + + group + " shards open", ShardLimitValidator.errorMessageFrom(shardLimitsResult) ); @@ -80,37 +88,37 @@ public void testOverShardLimit() { assertEquals(shardLimitsResult.totalShardsToAdd(), totalShards); shardLimitsResult.currentUsedShards() .ifPresentOrElse(v -> assertEquals(currentOpenShards, v.intValue()), () -> fail("currentUsedShard should be defined")); - assertEquals(shardLimitsResult.group(), "normal"); + assertEquals(shardLimitsResult.group(), group); } public void testUnderShardLimit() { - int nodesInCluster = randomIntBetween(2, 90); - // Calculate the counts for a cluster 1 node smaller than we have to ensure we have headroom - ShardCounts counts = forDataNodeCount(nodesInCluster - 1); + testUnderShardLimit(ShardLimitValidator::checkShardLimitForNormalNodes, NORMAL_GROUP); + testUnderShardLimit(ShardLimitValidator::checkShardLimitForFrozenNodes, FROZEN_GROUP); + } + + private void testUnderShardLimit(CheckShardLimitMethod targetMethod, String group) { + int nodesInCluster = randomIntBetween(10, 90); + // Calculate the counts for a cluster with maximum of 60% of occupancy + ShardCounts counts = forDataNodeCount((int) (nodesInCluster * 0.6)); ClusterState state = createClusterForShardLimitTest( nodesInCluster, counts.getFirstIndexShards(), counts.getFirstIndexReplicas(), counts.getShardsPerNode(), - NORMAL_GROUP + group ); - int existingShards = counts.getFirstIndexShards() * (1 + counts.getFirstIndexReplicas()); + int replicas = randomIntBetween(0, 3); int maxShardsInCluster = counts.getShardsPerNode() * nodesInCluster; - int shardsToAdd = randomIntBetween(1, maxShardsInCluster - existingShards); - var shardLimitsResult = ShardLimitValidator.checkShardLimit( - shardsToAdd, - state, - counts.getShardsPerNode(), - nodesInCluster, - NORMAL_GROUP - ); - + int existingShards = counts.getFirstIndexShards() * (1 + counts.getFirstIndexReplicas()); + int availableRoom = maxShardsInCluster - existingShards; + int shardsToAdd = randomIntBetween(1, Math.max(availableRoom / (replicas + 1), 1)); + ShardLimitValidator.Result shardLimitsResult = targetMethod.call(counts.getShardsPerNode(), shardsToAdd, replicas, state); assertTrue(shardLimitsResult.canAddShards()); - assertEquals(shardLimitsResult.maxShardsInCluster(), maxShardsInCluster); - assertEquals(shardLimitsResult.totalShardsToAdd(), shardsToAdd); + assertEquals(shardLimitsResult.maxShardsInCluster(), counts.getShardsPerNode() * nodesInCluster); + assertEquals(shardLimitsResult.totalShardsToAdd(), shardsToAdd * (replicas + 1)); assertFalse(shardLimitsResult.currentUsedShards().isPresent()); - assertEquals(shardLimitsResult.group(), "normal"); + assertEquals(shardLimitsResult.group(), group); } public void testValidateShardLimitOpenIndices() { diff --git a/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/ClusterDeprecationChecks.java b/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/ClusterDeprecationChecks.java index 058527132b24a..76cf7dc6f2aac 100644 --- a/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/ClusterDeprecationChecks.java +++ b/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/ClusterDeprecationChecks.java @@ -13,6 +13,8 @@ import java.util.Locale; +import static org.elasticsearch.indices.ShardLimitValidator.SETTING_CLUSTER_MAX_SHARDS_PER_NODE; + public class ClusterDeprecationChecks { /** * Upgrading can require the addition of one or more small indices. This method checks that based on configuration we have the room @@ -24,7 +26,9 @@ static DeprecationIssue checkShards(ClusterState clusterState) { // Make sure we have room to add a small non-frozen index if needed final int shardsInFutureNewSmallIndex = 5; final int replicasForFutureIndex = 1; + final int maxConfiguredShardsPerNode = SETTING_CLUSTER_MAX_SHARDS_PER_NODE.get(clusterState.getMetadata().settings()); ShardLimitValidator.Result shardLimitsResult = ShardLimitValidator.checkShardLimitForNormalNodes( + maxConfiguredShardsPerNode, shardsInFutureNewSmallIndex, replicasForFutureIndex, clusterState From 7d38c6f79f9b64b0ef7dca09fe30c249d1c5b594 Mon Sep 17 00:00:00 2001 From: Pablo Alcantar Morales Date: Wed, 22 Mar 2023 11:12:35 -0300 Subject: [PATCH 15/28] add teardown method ensuring settings is back to its default value --- .../node/ShardLimitsHealthIndicatorServiceIT.java | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorServiceIT.java b/server/src/internalClusterTest/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorServiceIT.java index 88ec78a0a17b7..940f3d644fac7 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorServiceIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorServiceIT.java @@ -19,6 +19,7 @@ import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.test.InternalTestCluster; import org.elasticsearch.test.NodeRoles; +import org.junit.After; import org.junit.Before; import java.util.List; @@ -44,6 +45,15 @@ public void setUp() throws Exception { updateClusterSettings(Settings.builder().put(SETTING_CLUSTER_MAX_SHARDS_PER_NODE.getKey(), 30)); } + @After + public void tearDown() throws Exception { + super.tearDown(); + updateClusterSettings( + Settings.builder() + .put(SETTING_CLUSTER_MAX_SHARDS_PER_NODE.getKey(), SETTING_CLUSTER_MAX_SHARDS_PER_NODE.getDefault(Settings.EMPTY)) + ); + } + public void testGreen() throws Exception { // index: 4 shards + 1 replica = 8 shards used (30 - 8 = 22 > 10 available shards) createIndex(4, 1); From a93a5700ab1369d36cabc00712f849e47c6c50c7 Mon Sep 17 00:00:00 2001 From: Pablo Alcantar Morales Date: Wed, 22 Mar 2023 11:19:23 -0300 Subject: [PATCH 16/28] improve diagnoses message --- .../ShardLimitsHealthIndicatorService.java | 29 +++++++++---------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java b/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java index 3552a4a78b1ae..f133743d02843 100644 --- a/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java +++ b/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java @@ -9,6 +9,7 @@ package org.elasticsearch.health.node; import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.settings.Setting; import org.elasticsearch.health.Diagnosis; import org.elasticsearch.health.HealthIndicatorDetails; import org.elasticsearch.health.HealthIndicatorImpact; @@ -41,37 +42,35 @@ */ public class ShardLimitsHealthIndicatorService implements HealthIndicatorService { + static final String NAME = "shard_limits"; + private static final String UPGRADE_BLOCKED = "The cluster has too many used shards to be able to upgrade"; private static final String UPGRADE_AT_RISK = "The cluster is running low on room to add new shards hence upgrade is at risk"; private static final String HELP_GUIDE = "https://ela.st/max-shard-limit-reached"; - static final String NAME = "shard_limits"; - static final List RED_INDICATOR_IMPACTS = List.of( - new HealthIndicatorImpact(NAME, "upgrade_blocked", 1, UPGRADE_BLOCKED, List.of(ImpactArea.DEPLOYMENT_MANAGEMENT)) - ); - static final List YELLOW_INDICATOR_IMPACTS = List.of( - new HealthIndicatorImpact(NAME, "upgrade_at_risk", 2, UPGRADE_AT_RISK, List.of(ImpactArea.DEPLOYMENT_MANAGEMENT)) - ); - - private static final BiFunction SHARD_LIMITS_REACHED_FN = (id, settingName) -> new Diagnosis( + private static final BiFunction, Diagnosis> SHARD_LIMITS_REACHED_FN = (id, setting) -> new Diagnosis( new Diagnosis.Definition( NAME, id, - "The current value of the setting `" - + settingName - + "` is either about to be or already reached which will not allow adding more shards to the cluster.", - "Consider increasing the currently configured value or removing indices to clear up resources " + HELP_GUIDE, + "Elasticsearch is about to reach the maximum number of shards it can host.", + "Increase the value of [" + setting.getKey() + "] cluster setting or remove indices to clear up resources.", HELP_GUIDE ), null ); + static final List RED_INDICATOR_IMPACTS = List.of( + new HealthIndicatorImpact(NAME, "upgrade_blocked", 1, UPGRADE_BLOCKED, List.of(ImpactArea.DEPLOYMENT_MANAGEMENT)) + ); + static final List YELLOW_INDICATOR_IMPACTS = List.of( + new HealthIndicatorImpact(NAME, "upgrade_at_risk", 2, UPGRADE_AT_RISK, List.of(ImpactArea.DEPLOYMENT_MANAGEMENT)) + ); static final Diagnosis SHARD_LIMITS_REACHED_NORMAL_NODES = SHARD_LIMITS_REACHED_FN.apply( "increase_max_shards_per_node", - ShardLimitValidator.SETTING_CLUSTER_MAX_SHARDS_PER_NODE.getKey() + ShardLimitValidator.SETTING_CLUSTER_MAX_SHARDS_PER_NODE ); static final Diagnosis SHARD_LIMITS_REACHED_FROZEN_NODES = SHARD_LIMITS_REACHED_FN.apply( "increase_max_shards_per_node_frozen", - ShardLimitValidator.SETTING_CLUSTER_MAX_SHARDS_PER_NODE_FROZEN.getKey() + ShardLimitValidator.SETTING_CLUSTER_MAX_SHARDS_PER_NODE_FROZEN ); private final ClusterService clusterService; From 63cdf25d766c6e7516afd4ed3a37b0771d3e8151 Mon Sep 17 00:00:00 2001 From: Pablo Alcantar Morales Date: Wed, 22 Mar 2023 11:52:30 -0300 Subject: [PATCH 17/28] reword unknown indicator message --- .../health/node/ShardLimitsHealthIndicatorService.java | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java b/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java index f133743d02843..5f395f3d03a86 100644 --- a/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java +++ b/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java @@ -197,7 +197,13 @@ static HealthIndicatorDetails buildDetails(ShardLimitValidator.Result normalNode } private HealthIndicatorResult unknownIndicator() { - return createIndicator(HealthStatus.UNKNOWN, "No shard limits data.", HealthIndicatorDetails.EMPTY, List.of(), List.of()); + return createIndicator( + HealthStatus.UNKNOWN, + "No number of shards limits data.", + HealthIndicatorDetails.EMPTY, + List.of(), + List.of() + ); } private record StatusResult(HealthStatus status, ShardLimitValidator.Result result) {} From 31b8dee32fe1f8c43e3fd0abe5cf22a3919c9802 Mon Sep 17 00:00:00 2001 From: Pablo Alcantar Morales Date: Wed, 22 Mar 2023 12:36:41 -0300 Subject: [PATCH 18/28] reword some messages based on @tylerperk comments --- .../ShardLimitsHealthIndicatorServiceIT.java | 8 +++--- .../ShardLimitsHealthIndicatorService.java | 6 ++--- ...hardLimitsHealthIndicatorServiceTests.java | 26 ++++++++++++++----- 3 files changed, 26 insertions(+), 14 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorServiceIT.java b/server/src/internalClusterTest/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorServiceIT.java index 940f3d644fac7..549a403b5c440 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorServiceIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorServiceIT.java @@ -71,7 +71,7 @@ public void testYellow() throws Exception { var result = fetchShardLimitsIndicatorResult(internalCluster); assertEquals(result.status(), HealthStatus.YELLOW); - assertEquals(result.symptom(), "Cluster is close to reaching the maximum number of shards for normal nodes."); + assertEquals(result.symptom(), "Cluster is close to reaching the configured maximum number of shards for normal nodes."); assertThat(result.diagnosisList(), hasSize(1)); assertThat(result.impacts(), hasSize(1)); } @@ -82,7 +82,7 @@ public void testRed() throws Exception { var result = fetchShardLimitsIndicatorResult(internalCluster); assertEquals(result.status(), HealthStatus.RED); - assertEquals(result.symptom(), "Cluster is close to reaching the maximum number of shards for normal nodes."); + assertEquals(result.symptom(), "Cluster is close to reaching the configured maximum number of shards for normal nodes."); assertThat(result.diagnosisList(), hasSize(1)); assertThat(result.impacts(), hasSize(1)); } @@ -132,11 +132,11 @@ private void waitForHealthMetadata() throws Exception { assertNotNull(healthMetadata); assertNotNull(healthMetadata.getShardLimitsMetadata()); assertTrue( - "max_shards_per_node settings must be greater than 0", + "max_shards_per_node setting must be greater than 0", healthMetadata.getShardLimitsMetadata().maxShardsPerNode() > 0 ); assertTrue( - "max_shards_per_node.frozen settings must be greater than 0", + "max_shards_per_node.frozen setting must be greater than 0", healthMetadata.getShardLimitsMetadata().maxShardsPerNodeFrozen() > 0 ); }); diff --git a/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java b/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java index 5f395f3d03a86..6fa82a218a64d 100644 --- a/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java +++ b/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java @@ -51,7 +51,7 @@ public class ShardLimitsHealthIndicatorService implements HealthIndicatorService new Diagnosis.Definition( NAME, id, - "Elasticsearch is about to reach the maximum number of shards it can host.", + "Elasticsearch is about to reach the maximum number of shards it can host, based on your current settings.", "Increase the value of [" + setting.getKey() + "] cluster setting or remove indices to clear up resources.", HELP_GUIDE ), @@ -109,7 +109,7 @@ private HealthIndicatorResult mergeIndicators(StatusResult normalNodes, StatusRe // RED and YELLOW status indicates that the cluster might have issues. finalStatus has the worst between *normal and frozen* nodes, // so we have to check each of the groups in order of provide the right message. if (finalStatus.indicatesHealthProblem()) { - symptomBuilder.append("Cluster is close to reaching the maximum number of shards for "); + symptomBuilder.append("Cluster is close to reaching the configured maximum number of shards for "); if (normalNodes.status == frozenNodes.status) { symptomBuilder.append(NORMAL_GROUP).append(" and ").append(FROZEN_GROUP); diagnoses = List.of(SHARD_LIMITS_REACHED_NORMAL_NODES, SHARD_LIMITS_REACHED_FROZEN_NODES); @@ -199,7 +199,7 @@ static HealthIndicatorDetails buildDetails(ShardLimitValidator.Result normalNode private HealthIndicatorResult unknownIndicator() { return createIndicator( HealthStatus.UNKNOWN, - "No number of shards limits data.", + "Unable to determine shard limit status.", HealthIndicatorDetails.EMPTY, List.of(), List.of() diff --git a/server/src/test/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorServiceTests.java b/server/src/test/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorServiceTests.java index 22f4d6787d416..869079f9112c0 100644 --- a/server/src/test/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorServiceTests.java +++ b/server/src/test/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorServiceTests.java @@ -96,7 +96,7 @@ public void testNoShardLimitsMetadata() throws IOException { assertEquals(indicatorResult.status(), HealthStatus.UNKNOWN); assertTrue(indicatorResult.impacts().isEmpty()); assertTrue(indicatorResult.diagnosisList().isEmpty()); - assertEquals(indicatorResult.symptom(), "No shard limits data."); + assertEquals(indicatorResult.symptom(), "Unable to determine shard limit status."); assertEquals(xContentToMap(indicatorResult.details()), Map.of()); } @@ -131,7 +131,10 @@ public void testIndicatorYieldsYellowInCaseThereIsNotEnoughRoom() throws IOExcep var indicatorResult = new ShardLimitsHealthIndicatorService(clusterService).calculate(true, HealthInfo.EMPTY_HEALTH_INFO); assertEquals(indicatorResult.status(), HealthStatus.YELLOW); - assertEquals(indicatorResult.symptom(), "Cluster is close to reaching the maximum number of shards for normal nodes."); + assertEquals( + indicatorResult.symptom(), + "Cluster is close to reaching the configured maximum number of shards for normal nodes." + ); assertThat(indicatorResult.impacts(), equalTo(YELLOW_INDICATOR_IMPACTS)); assertThat(indicatorResult.diagnosisList(), hasSize(1)); assertThat(indicatorResult.diagnosisList().get(0), equalTo(SHARD_LIMITS_REACHED_NORMAL_NODES)); @@ -154,7 +157,10 @@ public void testIndicatorYieldsYellowInCaseThereIsNotEnoughRoom() throws IOExcep var indicatorResult = new ShardLimitsHealthIndicatorService(clusterService).calculate(true, HealthInfo.EMPTY_HEALTH_INFO); assertEquals(indicatorResult.status(), HealthStatus.YELLOW); - assertEquals(indicatorResult.symptom(), "Cluster is close to reaching the maximum number of shards for frozen nodes."); + assertEquals( + indicatorResult.symptom(), + "Cluster is close to reaching the configured maximum number of shards for frozen nodes." + ); assertThat(indicatorResult.impacts(), equalTo(YELLOW_INDICATOR_IMPACTS)); assertThat(indicatorResult.diagnosisList(), hasSize(1)); assertThat(indicatorResult.diagnosisList().get(0), equalTo(SHARD_LIMITS_REACHED_FROZEN_NODES)); @@ -178,7 +184,7 @@ public void testIndicatorYieldsYellowInCaseThereIsNotEnoughRoom() throws IOExcep assertEquals(indicatorResult.status(), HealthStatus.YELLOW); assertEquals( indicatorResult.symptom(), - "Cluster is close to reaching the maximum number of shards for normal and frozen nodes." + "Cluster is close to reaching the configured maximum number of shards for normal and frozen nodes." ); assertThat(indicatorResult.impacts(), equalTo(YELLOW_INDICATOR_IMPACTS)); assertThat(indicatorResult.diagnosisList(), is(List.of(SHARD_LIMITS_REACHED_NORMAL_NODES, SHARD_LIMITS_REACHED_FROZEN_NODES))); @@ -204,7 +210,10 @@ public void testIndicatorYieldsRedInCaseThereIsNotEnoughRoom() throws IOExceptio var indicatorResult = new ShardLimitsHealthIndicatorService(clusterService).calculate(true, HealthInfo.EMPTY_HEALTH_INFO); assertEquals(indicatorResult.status(), HealthStatus.RED); - assertEquals(indicatorResult.symptom(), "Cluster is close to reaching the maximum number of shards for normal nodes."); + assertEquals( + indicatorResult.symptom(), + "Cluster is close to reaching the configured maximum number of shards for normal nodes." + ); assertThat(indicatorResult.impacts(), equalTo(RED_INDICATOR_IMPACTS)); assertThat(indicatorResult.diagnosisList(), hasSize(1)); assertThat(indicatorResult.diagnosisList().get(0), equalTo(SHARD_LIMITS_REACHED_NORMAL_NODES)); @@ -227,7 +236,10 @@ public void testIndicatorYieldsRedInCaseThereIsNotEnoughRoom() throws IOExceptio var indicatorResult = new ShardLimitsHealthIndicatorService(clusterService).calculate(true, HealthInfo.EMPTY_HEALTH_INFO); assertEquals(indicatorResult.status(), HealthStatus.RED); - assertEquals(indicatorResult.symptom(), "Cluster is close to reaching the maximum number of shards for frozen nodes."); + assertEquals( + indicatorResult.symptom(), + "Cluster is close to reaching the configured maximum number of shards for frozen nodes." + ); assertThat(indicatorResult.impacts(), equalTo(RED_INDICATOR_IMPACTS)); assertThat(indicatorResult.diagnosisList(), hasSize(1)); assertThat(indicatorResult.diagnosisList().get(0), equalTo(SHARD_LIMITS_REACHED_FROZEN_NODES)); @@ -251,7 +263,7 @@ public void testIndicatorYieldsRedInCaseThereIsNotEnoughRoom() throws IOExceptio assertEquals(indicatorResult.status(), HealthStatus.RED); assertEquals( indicatorResult.symptom(), - "Cluster is close to reaching the maximum number of shards for normal and frozen nodes." + "Cluster is close to reaching the configured maximum number of shards for normal and frozen nodes." ); assertThat(indicatorResult.impacts(), equalTo(RED_INDICATOR_IMPACTS)); assertThat(indicatorResult.diagnosisList(), is(List.of(SHARD_LIMITS_REACHED_NORMAL_NODES, SHARD_LIMITS_REACHED_FROZEN_NODES))); From c768eb01fc8f4edfd772602117538ec7a10712da Mon Sep 17 00:00:00 2001 From: Pablo Alcantar Morales Date: Wed, 22 Mar 2023 14:14:52 -0300 Subject: [PATCH 19/28] rename indicator details --- .../ShardLimitsHealthIndicatorService.java | 4 +-- ...hardLimitsHealthIndicatorServiceTests.java | 28 +++++++++---------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java b/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java index 6fa82a218a64d..7e2c1c674f8b9 100644 --- a/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java +++ b/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java @@ -176,7 +176,7 @@ static HealthIndicatorDetails buildDetails(ShardLimitValidator.Result normalNode return (builder, params) -> { builder.startObject(); { - builder.startObject(NORMAL_GROUP + "_nodes"); + builder.startObject("data"); builder.field("max_shards_in_cluster", normalNodes.maxShardsInCluster()); if (normalNodes.currentUsedShards().isPresent()) { builder.field("current_used_shards", normalNodes.currentUsedShards().get()); @@ -184,7 +184,7 @@ static HealthIndicatorDetails buildDetails(ShardLimitValidator.Result normalNode builder.endObject(); } { - builder.startObject(FROZEN_GROUP + "_nodes"); + builder.startObject("frozen"); builder.field("max_shards_in_cluster", frozenNodes.maxShardsInCluster()); if (frozenNodes.currentUsedShards().isPresent()) { builder.field("current_used_shards", frozenNodes.currentUsedShards().get()); diff --git a/server/src/test/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorServiceTests.java b/server/src/test/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorServiceTests.java index 869079f9112c0..cc4a8a76542c2 100644 --- a/server/src/test/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorServiceTests.java +++ b/server/src/test/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorServiceTests.java @@ -114,9 +114,9 @@ public void testIndicatorYieldsGreenInCaseThereIsRoom() throws IOException { xContentToMap(indicatorResult.details()), is( Map.of( - "normal_nodes", + "data", Map.of("max_shards_in_cluster", maxShardsPerNode), - "frozen_nodes", + "frozen", Map.of("max_shards_in_cluster", maxShardsPerNodeFrozen) ) ) @@ -142,9 +142,9 @@ public void testIndicatorYieldsYellowInCaseThereIsNotEnoughRoom() throws IOExcep xContentToMap(indicatorResult.details()), is( Map.of( - "normal_nodes", + "data", Map.of("max_shards_in_cluster", 25, "current_used_shards", 8), - "frozen_nodes", + "frozen", Map.of("max_shards_in_cluster", maxShardsPerNodeFrozen) ) ) @@ -168,9 +168,9 @@ public void testIndicatorYieldsYellowInCaseThereIsNotEnoughRoom() throws IOExcep xContentToMap(indicatorResult.details()), is( Map.of( - "normal_nodes", + "data", Map.of("max_shards_in_cluster", maxShardsPerNode), - "frozen_nodes", + "frozen", Map.of("max_shards_in_cluster", 25, "current_used_shards", 8) ) ) @@ -192,9 +192,9 @@ public void testIndicatorYieldsYellowInCaseThereIsNotEnoughRoom() throws IOExcep xContentToMap(indicatorResult.details()), is( Map.of( - "normal_nodes", + "data", Map.of("max_shards_in_cluster", 25, "current_used_shards", 8), - "frozen_nodes", + "frozen", Map.of("max_shards_in_cluster", 25, "current_used_shards", 8) ) ) @@ -221,9 +221,9 @@ public void testIndicatorYieldsRedInCaseThereIsNotEnoughRoom() throws IOExceptio xContentToMap(indicatorResult.details()), is( Map.of( - "normal_nodes", + "data", Map.of("max_shards_in_cluster", 25, "current_used_shards", 22), - "frozen_nodes", + "frozen", Map.of("max_shards_in_cluster", maxShardsPerNodeFrozen) ) ) @@ -247,9 +247,9 @@ public void testIndicatorYieldsRedInCaseThereIsNotEnoughRoom() throws IOExceptio xContentToMap(indicatorResult.details()), is( Map.of( - "normal_nodes", + "data", Map.of("max_shards_in_cluster", maxShardsPerNode), - "frozen_nodes", + "frozen", Map.of("max_shards_in_cluster", 25, "current_used_shards", 22) ) ) @@ -271,9 +271,9 @@ public void testIndicatorYieldsRedInCaseThereIsNotEnoughRoom() throws IOExceptio xContentToMap(indicatorResult.details()), is( Map.of( - "normal_nodes", + "data", Map.of("max_shards_in_cluster", 25, "current_used_shards", 22), - "frozen_nodes", + "frozen", Map.of("max_shards_in_cluster", 25, "current_used_shards", 22) ) ) From 2a27665441486167163f01208fe6fb42a59d1a9c Mon Sep 17 00:00:00 2001 From: Pablo Alcantar Morales Date: Wed, 22 Mar 2023 14:21:59 -0300 Subject: [PATCH 20/28] clearly indicate what type of indices to remove --- .../ShardLimitsHealthIndicatorService.java | 35 ++++++++++++------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java b/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java index 7e2c1c674f8b9..e916bab6b788c 100644 --- a/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java +++ b/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java @@ -9,6 +9,7 @@ package org.elasticsearch.health.node; import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.TriFunction; import org.elasticsearch.common.settings.Setting; import org.elasticsearch.health.Diagnosis; import org.elasticsearch.health.HealthIndicatorDetails; @@ -21,7 +22,6 @@ import org.elasticsearch.indices.ShardLimitValidator; import java.util.List; -import java.util.function.BiFunction; import java.util.stream.Stream; import static org.elasticsearch.indices.ShardLimitValidator.FROZEN_GROUP; @@ -47,16 +47,23 @@ public class ShardLimitsHealthIndicatorService implements HealthIndicatorService private static final String UPGRADE_BLOCKED = "The cluster has too many used shards to be able to upgrade"; private static final String UPGRADE_AT_RISK = "The cluster is running low on room to add new shards hence upgrade is at risk"; private static final String HELP_GUIDE = "https://ela.st/max-shard-limit-reached"; - private static final BiFunction, Diagnosis> SHARD_LIMITS_REACHED_FN = (id, setting) -> new Diagnosis( - new Diagnosis.Definition( - NAME, - id, - "Elasticsearch is about to reach the maximum number of shards it can host, based on your current settings.", - "Increase the value of [" + setting.getKey() + "] cluster setting or remove indices to clear up resources.", - HELP_GUIDE - ), - null - ); + private static final TriFunction, String, Diagnosis> SHARD_LIMITS_REACHED_FN = ( + id, + setting, + indexType) -> new Diagnosis( + new Diagnosis.Definition( + NAME, + id, + "Elasticsearch is about to reach the maximum number of shards it can host, based on your current settings.", + "Increase the value of [" + + setting.getKey() + + "] cluster setting or remove " + + indexType + + " indices to clear up resources.", + HELP_GUIDE + ), + null + ); static final List RED_INDICATOR_IMPACTS = List.of( new HealthIndicatorImpact(NAME, "upgrade_blocked", 1, UPGRADE_BLOCKED, List.of(ImpactArea.DEPLOYMENT_MANAGEMENT)) @@ -66,11 +73,13 @@ public class ShardLimitsHealthIndicatorService implements HealthIndicatorService ); static final Diagnosis SHARD_LIMITS_REACHED_NORMAL_NODES = SHARD_LIMITS_REACHED_FN.apply( "increase_max_shards_per_node", - ShardLimitValidator.SETTING_CLUSTER_MAX_SHARDS_PER_NODE + ShardLimitValidator.SETTING_CLUSTER_MAX_SHARDS_PER_NODE, + "non-frozen" ); static final Diagnosis SHARD_LIMITS_REACHED_FROZEN_NODES = SHARD_LIMITS_REACHED_FN.apply( "increase_max_shards_per_node_frozen", - ShardLimitValidator.SETTING_CLUSTER_MAX_SHARDS_PER_NODE_FROZEN + ShardLimitValidator.SETTING_CLUSTER_MAX_SHARDS_PER_NODE_FROZEN, + "frozen" ); private final ClusterService clusterService; From f2bc7fb37e73f63239a54ea8a55f32c4e9b1e6f9 Mon Sep 17 00:00:00 2001 From: Pablo Alcantar Morales Date: Wed, 22 Mar 2023 14:26:30 -0300 Subject: [PATCH 21/28] add impact for index creation at risk or blocked --- .../node/ShardLimitsHealthIndicatorServiceIT.java | 4 ++-- .../node/ShardLimitsHealthIndicatorService.java | 14 ++++++++++---- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorServiceIT.java b/server/src/internalClusterTest/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorServiceIT.java index 549a403b5c440..6521c9c52be00 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorServiceIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorServiceIT.java @@ -73,7 +73,7 @@ public void testYellow() throws Exception { assertEquals(result.status(), HealthStatus.YELLOW); assertEquals(result.symptom(), "Cluster is close to reaching the configured maximum number of shards for normal nodes."); assertThat(result.diagnosisList(), hasSize(1)); - assertThat(result.impacts(), hasSize(1)); + assertThat(result.impacts(), hasSize(2)); } public void testRed() throws Exception { @@ -84,7 +84,7 @@ public void testRed() throws Exception { assertEquals(result.status(), HealthStatus.RED); assertEquals(result.symptom(), "Cluster is close to reaching the configured maximum number of shards for normal nodes."); assertThat(result.diagnosisList(), hasSize(1)); - assertThat(result.impacts(), hasSize(1)); + assertThat(result.impacts(), hasSize(2)); } private void createIndex(int shards, int replicas) { diff --git a/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java b/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java index e916bab6b788c..cf0f9b50a0919 100644 --- a/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java +++ b/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java @@ -44,8 +44,12 @@ public class ShardLimitsHealthIndicatorService implements HealthIndicatorService static final String NAME = "shard_limits"; - private static final String UPGRADE_BLOCKED = "The cluster has too many used shards to be able to upgrade"; - private static final String UPGRADE_AT_RISK = "The cluster is running low on room to add new shards hence upgrade is at risk"; + private static final String UPGRADE_BLOCKED = "The cluster has too many used shards to be able to upgrade."; + private static final String UPGRADE_AT_RISK = "The cluster is running low on room to add new shards hence upgrade is at risk."; + private static final String INDEX_CREATION_BLOCKED = + "The cluster is running low on room to add new shards hence the creation of new indices might fail."; + private static final String INDEX_CREATION_RISK = + "The cluster is running low on room to add new shards hence the creation of new indices might fail."; private static final String HELP_GUIDE = "https://ela.st/max-shard-limit-reached"; private static final TriFunction, String, Diagnosis> SHARD_LIMITS_REACHED_FN = ( id, @@ -66,10 +70,12 @@ public class ShardLimitsHealthIndicatorService implements HealthIndicatorService ); static final List RED_INDICATOR_IMPACTS = List.of( - new HealthIndicatorImpact(NAME, "upgrade_blocked", 1, UPGRADE_BLOCKED, List.of(ImpactArea.DEPLOYMENT_MANAGEMENT)) + new HealthIndicatorImpact(NAME, "upgrade_blocked", 1, UPGRADE_BLOCKED, List.of(ImpactArea.DEPLOYMENT_MANAGEMENT)), + new HealthIndicatorImpact(NAME, "creation_of_new_indices_blocked", 1, INDEX_CREATION_BLOCKED, List.of(ImpactArea.INGEST)) ); static final List YELLOW_INDICATOR_IMPACTS = List.of( - new HealthIndicatorImpact(NAME, "upgrade_at_risk", 2, UPGRADE_AT_RISK, List.of(ImpactArea.DEPLOYMENT_MANAGEMENT)) + new HealthIndicatorImpact(NAME, "upgrade_at_risk", 2, UPGRADE_AT_RISK, List.of(ImpactArea.DEPLOYMENT_MANAGEMENT)), + new HealthIndicatorImpact(NAME, "creation_of_new_indices_at_risk", 2, INDEX_CREATION_RISK, List.of(ImpactArea.INGEST)) ); static final Diagnosis SHARD_LIMITS_REACHED_NORMAL_NODES = SHARD_LIMITS_REACHED_FN.apply( "increase_max_shards_per_node", From b22c1598411e0befff05139cd19c96edad1cdf23 Mon Sep 17 00:00:00 2001 From: Pablo Alcantar Morales Date: Wed, 22 Mar 2023 15:21:39 -0300 Subject: [PATCH 22/28] refactor calculate method this makes the method generic enough, so can easily test the internal logic --- .../ShardLimitsHealthIndicatorService.java | 39 ++++++--------- ...hardLimitsHealthIndicatorServiceTests.java | 49 ++++++++++++++++--- 2 files changed, 58 insertions(+), 30 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java b/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java index cf0f9b50a0919..257ab34c230f2 100644 --- a/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java +++ b/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java @@ -8,6 +8,7 @@ package org.elasticsearch.health.node; +import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.TriFunction; import org.elasticsearch.common.settings.Setting; @@ -101,14 +102,16 @@ public String name() { @Override public HealthIndicatorResult calculate(boolean verbose, int maxAffectedResourcesCount, HealthInfo healthInfo) { - var healthMetadata = HealthMetadata.getFromClusterState(clusterService.state()); + var state = clusterService.state(); + var healthMetadata = HealthMetadata.getFromClusterState(state); if (healthMetadata == null || healthMetadata.getShardLimitsMetadata() == null) { return unknownIndicator(); } + var shardLimitsMetadata = healthMetadata.getShardLimitsMetadata(); return mergeIndicators( - calculateForNormalNodes(healthMetadata.getShardLimitsMetadata()), - calculateForFrozenNodes(healthMetadata.getShardLimitsMetadata()) + calculateFrom(shardLimitsMetadata.maxShardsPerNode(), state, ShardLimitValidator::checkShardLimitForNormalNodes), + calculateFrom(shardLimitsMetadata.maxShardsPerNodeFrozen(), state, ShardLimitValidator::checkShardLimitForFrozenNodes) ); } @@ -156,30 +159,13 @@ private HealthIndicatorResult mergeIndicators(StatusResult normalNodes, StatusRe ); } - private StatusResult calculateForNormalNodes(HealthMetadata.ShardLimits shardLimits) { - int maxShardsPerNodeSetting = shardLimits.maxShardsPerNode(); - var result = ShardLimitValidator.checkShardLimitForNormalNodes(maxShardsPerNodeSetting, 5, 1, clusterService.state()); - - if (result.canAddShards() == false) { - return new StatusResult(HealthStatus.RED, result); - } - - result = ShardLimitValidator.checkShardLimitForNormalNodes(maxShardsPerNodeSetting, 10, 1, clusterService.state()); - if (result.canAddShards() == false) { - return new StatusResult(HealthStatus.YELLOW, result); - } - - return new StatusResult(HealthStatus.GREEN, result); - } - - private StatusResult calculateForFrozenNodes(HealthMetadata.ShardLimits shardLimits) { - int maxShardsPerNodeSetting = shardLimits.maxShardsPerNodeFrozen(); - var result = ShardLimitValidator.checkShardLimitForFrozenNodes(maxShardsPerNodeSetting, 5, 1, clusterService.state()); + static StatusResult calculateFrom(int maxShardsPerNodeSetting, ClusterState state, ShardLimitsChecker checker) { + var result = checker.check(maxShardsPerNodeSetting, 5, 1, state); if (result.canAddShards() == false) { return new StatusResult(HealthStatus.RED, result); } - result = ShardLimitValidator.checkShardLimitForFrozenNodes(maxShardsPerNodeSetting, 10, 1, clusterService.state()); + result = checker.check(maxShardsPerNodeSetting, 10, 1, state); if (result.canAddShards() == false) { return new StatusResult(HealthStatus.YELLOW, result); } @@ -221,5 +207,10 @@ private HealthIndicatorResult unknownIndicator() { ); } - private record StatusResult(HealthStatus status, ShardLimitValidator.Result result) {} + record StatusResult(HealthStatus status, ShardLimitValidator.Result result) {} + + @FunctionalInterface + interface ShardLimitsChecker { + ShardLimitValidator.Result check(int maxConfiguredShardsPerNode, int numberOfNewShards, int replicas, ClusterState state); + } } diff --git a/server/src/test/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorServiceTests.java b/server/src/test/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorServiceTests.java index cc4a8a76542c2..fad3e9784c682 100644 --- a/server/src/test/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorServiceTests.java +++ b/server/src/test/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorServiceTests.java @@ -33,16 +33,23 @@ import java.io.IOException; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Set; +import java.util.function.Function; +import java.util.stream.Stream; import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_CREATION_DATE; import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_REPLICAS; import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SHARDS; import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_VERSION_CREATED; +import static org.elasticsearch.health.HealthStatus.GREEN; +import static org.elasticsearch.health.HealthStatus.RED; +import static org.elasticsearch.health.HealthStatus.YELLOW; import static org.elasticsearch.health.node.ShardLimitsHealthIndicatorService.RED_INDICATOR_IMPACTS; import static org.elasticsearch.health.node.ShardLimitsHealthIndicatorService.SHARD_LIMITS_REACHED_FROZEN_NODES; import static org.elasticsearch.health.node.ShardLimitsHealthIndicatorService.SHARD_LIMITS_REACHED_NORMAL_NODES; import static org.elasticsearch.health.node.ShardLimitsHealthIndicatorService.YELLOW_INDICATOR_IMPACTS; +import static org.elasticsearch.health.node.ShardLimitsHealthIndicatorService.calculateFrom; import static org.elasticsearch.indices.ShardLimitValidator.FROZEN_GROUP; import static org.elasticsearch.indices.ShardLimitValidator.INDEX_SETTING_SHARD_LIMIT_GROUP; import static org.elasticsearch.indices.ShardLimitValidator.NORMAL_GROUP; @@ -130,7 +137,7 @@ public void testIndicatorYieldsYellowInCaseThereIsNotEnoughRoom() throws IOExcep var clusterService = createClusterService(25, maxShardsPerNodeFrozen, createIndexInNormalNode(4)); var indicatorResult = new ShardLimitsHealthIndicatorService(clusterService).calculate(true, HealthInfo.EMPTY_HEALTH_INFO); - assertEquals(indicatorResult.status(), HealthStatus.YELLOW); + assertEquals(indicatorResult.status(), YELLOW); assertEquals( indicatorResult.symptom(), "Cluster is close to reaching the configured maximum number of shards for normal nodes." @@ -156,7 +163,7 @@ public void testIndicatorYieldsYellowInCaseThereIsNotEnoughRoom() throws IOExcep var clusterService = createClusterService(maxShardsPerNode, 25, createIndexInFrozenNode(4)); var indicatorResult = new ShardLimitsHealthIndicatorService(clusterService).calculate(true, HealthInfo.EMPTY_HEALTH_INFO); - assertEquals(indicatorResult.status(), HealthStatus.YELLOW); + assertEquals(indicatorResult.status(), YELLOW); assertEquals( indicatorResult.symptom(), "Cluster is close to reaching the configured maximum number of shards for frozen nodes." @@ -181,7 +188,7 @@ public void testIndicatorYieldsYellowInCaseThereIsNotEnoughRoom() throws IOExcep var clusterService = createClusterService(25, 25, createIndexInNormalNode(4), createIndexInFrozenNode(4)); var indicatorResult = new ShardLimitsHealthIndicatorService(clusterService).calculate(true, HealthInfo.EMPTY_HEALTH_INFO); - assertEquals(indicatorResult.status(), HealthStatus.YELLOW); + assertEquals(indicatorResult.status(), YELLOW); assertEquals( indicatorResult.symptom(), "Cluster is close to reaching the configured maximum number of shards for normal and frozen nodes." @@ -209,7 +216,7 @@ public void testIndicatorYieldsRedInCaseThereIsNotEnoughRoom() throws IOExceptio var clusterService = createClusterService(25, maxShardsPerNodeFrozen, createIndexInNormalNode(11)); var indicatorResult = new ShardLimitsHealthIndicatorService(clusterService).calculate(true, HealthInfo.EMPTY_HEALTH_INFO); - assertEquals(indicatorResult.status(), HealthStatus.RED); + assertEquals(indicatorResult.status(), RED); assertEquals( indicatorResult.symptom(), "Cluster is close to reaching the configured maximum number of shards for normal nodes." @@ -235,7 +242,7 @@ public void testIndicatorYieldsRedInCaseThereIsNotEnoughRoom() throws IOExceptio var clusterService = createClusterService(maxShardsPerNode, 25, createIndexInFrozenNode(11)); var indicatorResult = new ShardLimitsHealthIndicatorService(clusterService).calculate(true, HealthInfo.EMPTY_HEALTH_INFO); - assertEquals(indicatorResult.status(), HealthStatus.RED); + assertEquals(indicatorResult.status(), RED); assertEquals( indicatorResult.symptom(), "Cluster is close to reaching the configured maximum number of shards for frozen nodes." @@ -260,7 +267,7 @@ public void testIndicatorYieldsRedInCaseThereIsNotEnoughRoom() throws IOExceptio var clusterService = createClusterService(25, 25, createIndexInNormalNode(11), createIndexInFrozenNode(11)); var indicatorResult = new ShardLimitsHealthIndicatorService(clusterService).calculate(true, HealthInfo.EMPTY_HEALTH_INFO); - assertEquals(indicatorResult.status(), HealthStatus.RED); + assertEquals(indicatorResult.status(), RED); assertEquals( indicatorResult.symptom(), "Cluster is close to reaching the configured maximum number of shards for normal and frozen nodes." @@ -281,6 +288,36 @@ public void testIndicatorYieldsRedInCaseThereIsNotEnoughRoom() throws IOExceptio } } + public void testCalculateMethods() { + var mockedState = mock(ClusterState.class); + var randomMaxShardsPerNodeSetting = randomInt(); + Function checkerWrapper = shardsToAdd -> ( + maxConfiguredShardsPerNode, + numberOfNewShards, + replicas, + state) -> { + assertEquals(mockedState, state); + assertEquals(randomMaxShardsPerNodeSetting, maxConfiguredShardsPerNode); + return new ShardLimitValidator.Result( + numberOfNewShards != shardsToAdd && replicas == 1, + Optional.empty(), + randomInt(), + randomInt(), + randomAlphaOfLength(5) + ); + }; + + assertEquals(calculateFrom(randomMaxShardsPerNodeSetting, mockedState, checkerWrapper.apply(5)).status(), RED); + assertEquals(calculateFrom(randomMaxShardsPerNodeSetting, mockedState, checkerWrapper.apply(10)).status(), YELLOW); + + // Let's cover the holes :) + Stream.of(randomIntBetween(1, 4), randomIntBetween(6, 9), randomIntBetween(11, Integer.MAX_VALUE)) + .map(checkerWrapper) + .map(checker -> calculateFrom(randomMaxShardsPerNodeSetting, mockedState, checker)) + .map(ShardLimitsHealthIndicatorService.StatusResult::status) + .forEach(status -> assertEquals(status, GREEN)); + } + // We expose the indicator name and the diagnoses in the x-pack usage API. In order to index them properly in a telemetry index // they need to be declared in the health-api-indexer.edn in the telemetry repository. public void testMappedFieldsForTelemetry() { From abaac96c51c4c6a3ac7a5a2b80272856c9804b2e Mon Sep 17 00:00:00 2001 From: Pablo Alcantar Morales Date: Thu, 23 Mar 2023 14:16:53 -0300 Subject: [PATCH 23/28] rename remaining references to normal group --- .../ShardLimitsHealthIndicatorServiceIT.java | 4 +- .../ShardLimitsHealthIndicatorService.java | 47 +++++++------- ...hardLimitsHealthIndicatorServiceTests.java | 64 ++++++++----------- 3 files changed, 52 insertions(+), 63 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorServiceIT.java b/server/src/internalClusterTest/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorServiceIT.java index 6521c9c52be00..c4486935a4aeb 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorServiceIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorServiceIT.java @@ -71,7 +71,7 @@ public void testYellow() throws Exception { var result = fetchShardLimitsIndicatorResult(internalCluster); assertEquals(result.status(), HealthStatus.YELLOW); - assertEquals(result.symptom(), "Cluster is close to reaching the configured maximum number of shards for normal nodes."); + assertEquals(result.symptom(), "Cluster is close to reaching the configured maximum number of shards for data nodes."); assertThat(result.diagnosisList(), hasSize(1)); assertThat(result.impacts(), hasSize(2)); } @@ -82,7 +82,7 @@ public void testRed() throws Exception { var result = fetchShardLimitsIndicatorResult(internalCluster); assertEquals(result.status(), HealthStatus.RED); - assertEquals(result.symptom(), "Cluster is close to reaching the configured maximum number of shards for normal nodes."); + assertEquals(result.symptom(), "Cluster is close to reaching the configured maximum number of shards for data nodes."); assertThat(result.diagnosisList(), hasSize(1)); assertThat(result.impacts(), hasSize(2)); } diff --git a/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java b/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java index 257ab34c230f2..ddfdef2c75252 100644 --- a/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java +++ b/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java @@ -25,16 +25,13 @@ import java.util.List; import java.util.stream.Stream; -import static org.elasticsearch.indices.ShardLimitValidator.FROZEN_GROUP; -import static org.elasticsearch.indices.ShardLimitValidator.NORMAL_GROUP; - /** * This indicator reports health data about the shard limit across the cluster. * *

* The indicator will report: - * * RED when there's room for less than 5 shards (either normal or frozen nodes) - * * YELLOW when there's room for less than 10 shards (either normal or frozen nodes) + * * RED when there's room for less than 5 shards (either data or frozen nodes) + * * YELLOW when there's room for less than 10 shards (either data or frozen nodes) * * GREEN otherwise *

* @@ -45,6 +42,8 @@ public class ShardLimitsHealthIndicatorService implements HealthIndicatorService static final String NAME = "shard_limits"; + static final String DATA_NODE_NAME = "data"; + static final String FROZEN_NODE_NAME = "frozen"; private static final String UPGRADE_BLOCKED = "The cluster has too many used shards to be able to upgrade."; private static final String UPGRADE_AT_RISK = "The cluster is running low on room to add new shards hence upgrade is at risk."; private static final String INDEX_CREATION_BLOCKED = @@ -78,10 +77,10 @@ public class ShardLimitsHealthIndicatorService implements HealthIndicatorService new HealthIndicatorImpact(NAME, "upgrade_at_risk", 2, UPGRADE_AT_RISK, List.of(ImpactArea.DEPLOYMENT_MANAGEMENT)), new HealthIndicatorImpact(NAME, "creation_of_new_indices_at_risk", 2, INDEX_CREATION_RISK, List.of(ImpactArea.INGEST)) ); - static final Diagnosis SHARD_LIMITS_REACHED_NORMAL_NODES = SHARD_LIMITS_REACHED_FN.apply( + static final Diagnosis SHARD_LIMITS_REACHED_DATA_NODES = SHARD_LIMITS_REACHED_FN.apply( "increase_max_shards_per_node", ShardLimitValidator.SETTING_CLUSTER_MAX_SHARDS_PER_NODE, - "non-frozen" + "data" ); static final Diagnosis SHARD_LIMITS_REACHED_FROZEN_NODES = SHARD_LIMITS_REACHED_FN.apply( "increase_max_shards_per_node_frozen", @@ -115,8 +114,8 @@ public HealthIndicatorResult calculate(boolean verbose, int maxAffectedResources ); } - private HealthIndicatorResult mergeIndicators(StatusResult normalNodes, StatusResult frozenNodes) { - var finalStatus = HealthStatus.merge(Stream.of(normalNodes.status, frozenNodes.status)); + private HealthIndicatorResult mergeIndicators(StatusResult dataNodes, StatusResult frozenNodes) { + var finalStatus = HealthStatus.merge(Stream.of(dataNodes.status, frozenNodes.status)); var diagnoses = List.of(); var symptomBuilder = new StringBuilder(); @@ -124,20 +123,20 @@ private HealthIndicatorResult mergeIndicators(StatusResult normalNodes, StatusRe symptomBuilder.append("The cluster has enough room to add new shards."); } - // RED and YELLOW status indicates that the cluster might have issues. finalStatus has the worst between *normal and frozen* nodes, - // so we have to check each of the groups in order of provide the right message. + // RED and YELLOW status indicates that the cluster might have issues. finalStatus has the worst between *data (non-frozen) and + // frozen* nodes, so we have to check each of the groups in order of provide the right message. if (finalStatus.indicatesHealthProblem()) { symptomBuilder.append("Cluster is close to reaching the configured maximum number of shards for "); - if (normalNodes.status == frozenNodes.status) { - symptomBuilder.append(NORMAL_GROUP).append(" and ").append(FROZEN_GROUP); - diagnoses = List.of(SHARD_LIMITS_REACHED_NORMAL_NODES, SHARD_LIMITS_REACHED_FROZEN_NODES); + if (dataNodes.status == frozenNodes.status) { + symptomBuilder.append(DATA_NODE_NAME).append(" and ").append(FROZEN_NODE_NAME); + diagnoses = List.of(SHARD_LIMITS_REACHED_DATA_NODES, SHARD_LIMITS_REACHED_FROZEN_NODES); - } else if (normalNodes.status.indicatesHealthProblem()) { - symptomBuilder.append(NORMAL_GROUP); - diagnoses = List.of(SHARD_LIMITS_REACHED_NORMAL_NODES); + } else if (dataNodes.status.indicatesHealthProblem()) { + symptomBuilder.append(DATA_NODE_NAME); + diagnoses = List.of(SHARD_LIMITS_REACHED_DATA_NODES); } else if (frozenNodes.status.indicatesHealthProblem()) { - symptomBuilder.append(FROZEN_GROUP); + symptomBuilder.append(FROZEN_NODE_NAME); diagnoses = List.of(SHARD_LIMITS_REACHED_FROZEN_NODES); } @@ -153,7 +152,7 @@ private HealthIndicatorResult mergeIndicators(StatusResult normalNodes, StatusRe return createIndicator( finalStatus, symptomBuilder.toString(), - buildDetails(normalNodes.result, frozenNodes.result), + buildDetails(dataNodes.result, frozenNodes.result), indicatorImpacts, diagnoses ); @@ -173,14 +172,14 @@ static StatusResult calculateFrom(int maxShardsPerNodeSetting, ClusterState stat return new StatusResult(HealthStatus.GREEN, result); } - static HealthIndicatorDetails buildDetails(ShardLimitValidator.Result normalNodes, ShardLimitValidator.Result frozenNodes) { + static HealthIndicatorDetails buildDetails(ShardLimitValidator.Result dataNodes, ShardLimitValidator.Result frozenNodes) { return (builder, params) -> { builder.startObject(); { - builder.startObject("data"); - builder.field("max_shards_in_cluster", normalNodes.maxShardsInCluster()); - if (normalNodes.currentUsedShards().isPresent()) { - builder.field("current_used_shards", normalNodes.currentUsedShards().get()); + builder.startObject(DATA_NODE_NAME); + builder.field("max_shards_in_cluster", dataNodes.maxShardsInCluster()); + if (dataNodes.currentUsedShards().isPresent()) { + builder.field("current_used_shards", dataNodes.currentUsedShards().get()); } builder.endObject(); } diff --git a/server/src/test/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorServiceTests.java b/server/src/test/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorServiceTests.java index fad3e9784c682..36f2035650d37 100644 --- a/server/src/test/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorServiceTests.java +++ b/server/src/test/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorServiceTests.java @@ -46,8 +46,8 @@ import static org.elasticsearch.health.HealthStatus.RED; import static org.elasticsearch.health.HealthStatus.YELLOW; import static org.elasticsearch.health.node.ShardLimitsHealthIndicatorService.RED_INDICATOR_IMPACTS; +import static org.elasticsearch.health.node.ShardLimitsHealthIndicatorService.SHARD_LIMITS_REACHED_DATA_NODES; import static org.elasticsearch.health.node.ShardLimitsHealthIndicatorService.SHARD_LIMITS_REACHED_FROZEN_NODES; -import static org.elasticsearch.health.node.ShardLimitsHealthIndicatorService.SHARD_LIMITS_REACHED_NORMAL_NODES; import static org.elasticsearch.health.node.ShardLimitsHealthIndicatorService.YELLOW_INDICATOR_IMPACTS; import static org.elasticsearch.health.node.ShardLimitsHealthIndicatorService.calculateFrom; import static org.elasticsearch.indices.ShardLimitValidator.FROZEN_GROUP; @@ -62,16 +62,16 @@ public class ShardLimitsHealthIndicatorServiceTests extends ESTestCase { public static final HealthMetadata.Disk DISK_METADATA = HealthMetadata.Disk.newBuilder().build(); - private DiscoveryNode normalNode; + private DiscoveryNode dataNode; private DiscoveryNode frozenNode; @Before public void setUp() throws Exception { super.setUp(); - normalNode = new DiscoveryNode( - "normal_node", - "normal_node", + dataNode = new DiscoveryNode( + "data_node", + "data_node", ESTestCase.buildNewFakeTransportAddress(), Map.of(), Set.of(DiscoveryNodeRole.MASTER_ROLE, DiscoveryNodeRole.DATA_ROLE), @@ -94,7 +94,7 @@ public void testNoShardLimitsMetadata() throws IOException { randomValidMaxShards(), randomValidMaxShards(), new HealthMetadata(DISK_METADATA, null), - createIndexInNormalNode(100) + createIndexInDataNode(100) ) ); var target = new ShardLimitsHealthIndicatorService(clusterService); @@ -110,7 +110,7 @@ public void testNoShardLimitsMetadata() throws IOException { public void testIndicatorYieldsGreenInCaseThereIsRoom() throws IOException { int maxShardsPerNode = randomValidMaxShards(); int maxShardsPerNodeFrozen = randomValidMaxShards(); - var clusterService = createClusterService(maxShardsPerNode, maxShardsPerNodeFrozen, createIndexInNormalNode(maxShardsPerNode / 4)); + var clusterService = createClusterService(maxShardsPerNode, maxShardsPerNodeFrozen, createIndexInDataNode(maxShardsPerNode / 4)); var indicatorResult = new ShardLimitsHealthIndicatorService(clusterService).calculate(true, HealthInfo.EMPTY_HEALTH_INFO); assertEquals(indicatorResult.status(), HealthStatus.GREEN); @@ -132,19 +132,16 @@ public void testIndicatorYieldsGreenInCaseThereIsRoom() throws IOException { public void testIndicatorYieldsYellowInCaseThereIsNotEnoughRoom() throws IOException { { - // Only normal_nodes does not have enough space + // Only data_nodes does not have enough space int maxShardsPerNodeFrozen = randomValidMaxShards(); - var clusterService = createClusterService(25, maxShardsPerNodeFrozen, createIndexInNormalNode(4)); + var clusterService = createClusterService(25, maxShardsPerNodeFrozen, createIndexInDataNode(4)); var indicatorResult = new ShardLimitsHealthIndicatorService(clusterService).calculate(true, HealthInfo.EMPTY_HEALTH_INFO); assertEquals(indicatorResult.status(), YELLOW); - assertEquals( - indicatorResult.symptom(), - "Cluster is close to reaching the configured maximum number of shards for normal nodes." - ); + assertEquals(indicatorResult.symptom(), "Cluster is close to reaching the configured maximum number of shards for data nodes."); assertThat(indicatorResult.impacts(), equalTo(YELLOW_INDICATOR_IMPACTS)); assertThat(indicatorResult.diagnosisList(), hasSize(1)); - assertThat(indicatorResult.diagnosisList().get(0), equalTo(SHARD_LIMITS_REACHED_NORMAL_NODES)); + assertThat(indicatorResult.diagnosisList().get(0), equalTo(SHARD_LIMITS_REACHED_DATA_NODES)); assertThat( xContentToMap(indicatorResult.details()), is( @@ -184,17 +181,17 @@ public void testIndicatorYieldsYellowInCaseThereIsNotEnoughRoom() throws IOExcep ); } { - // Both normal and frozen nodes does not have enough space - var clusterService = createClusterService(25, 25, createIndexInNormalNode(4), createIndexInFrozenNode(4)); + // Both data and frozen nodes does not have enough space + var clusterService = createClusterService(25, 25, createIndexInDataNode(4), createIndexInFrozenNode(4)); var indicatorResult = new ShardLimitsHealthIndicatorService(clusterService).calculate(true, HealthInfo.EMPTY_HEALTH_INFO); assertEquals(indicatorResult.status(), YELLOW); assertEquals( indicatorResult.symptom(), - "Cluster is close to reaching the configured maximum number of shards for normal and frozen nodes." + "Cluster is close to reaching the configured maximum number of shards for data and frozen nodes." ); assertThat(indicatorResult.impacts(), equalTo(YELLOW_INDICATOR_IMPACTS)); - assertThat(indicatorResult.diagnosisList(), is(List.of(SHARD_LIMITS_REACHED_NORMAL_NODES, SHARD_LIMITS_REACHED_FROZEN_NODES))); + assertThat(indicatorResult.diagnosisList(), is(List.of(SHARD_LIMITS_REACHED_DATA_NODES, SHARD_LIMITS_REACHED_FROZEN_NODES))); assertThat( xContentToMap(indicatorResult.details()), is( @@ -211,19 +208,16 @@ public void testIndicatorYieldsYellowInCaseThereIsNotEnoughRoom() throws IOExcep public void testIndicatorYieldsRedInCaseThereIsNotEnoughRoom() throws IOException { { - // Only normal_nodes does not have enough space + // Only data_nodes does not have enough space int maxShardsPerNodeFrozen = randomValidMaxShards(); - var clusterService = createClusterService(25, maxShardsPerNodeFrozen, createIndexInNormalNode(11)); + var clusterService = createClusterService(25, maxShardsPerNodeFrozen, createIndexInDataNode(11)); var indicatorResult = new ShardLimitsHealthIndicatorService(clusterService).calculate(true, HealthInfo.EMPTY_HEALTH_INFO); assertEquals(indicatorResult.status(), RED); - assertEquals( - indicatorResult.symptom(), - "Cluster is close to reaching the configured maximum number of shards for normal nodes." - ); + assertEquals(indicatorResult.symptom(), "Cluster is close to reaching the configured maximum number of shards for data nodes."); assertThat(indicatorResult.impacts(), equalTo(RED_INDICATOR_IMPACTS)); assertThat(indicatorResult.diagnosisList(), hasSize(1)); - assertThat(indicatorResult.diagnosisList().get(0), equalTo(SHARD_LIMITS_REACHED_NORMAL_NODES)); + assertThat(indicatorResult.diagnosisList().get(0), equalTo(SHARD_LIMITS_REACHED_DATA_NODES)); assertThat( xContentToMap(indicatorResult.details()), is( @@ -263,17 +257,17 @@ public void testIndicatorYieldsRedInCaseThereIsNotEnoughRoom() throws IOExceptio ); } { - // Both normal and frozen nodes does not have enough space - var clusterService = createClusterService(25, 25, createIndexInNormalNode(11), createIndexInFrozenNode(11)); + // Both data and frozen nodes does not have enough space + var clusterService = createClusterService(25, 25, createIndexInDataNode(11), createIndexInFrozenNode(11)); var indicatorResult = new ShardLimitsHealthIndicatorService(clusterService).calculate(true, HealthInfo.EMPTY_HEALTH_INFO); assertEquals(indicatorResult.status(), RED); assertEquals( indicatorResult.symptom(), - "Cluster is close to reaching the configured maximum number of shards for normal and frozen nodes." + "Cluster is close to reaching the configured maximum number of shards for data and frozen nodes." ); assertThat(indicatorResult.impacts(), equalTo(RED_INDICATOR_IMPACTS)); - assertThat(indicatorResult.diagnosisList(), is(List.of(SHARD_LIMITS_REACHED_NORMAL_NODES, SHARD_LIMITS_REACHED_FROZEN_NODES))); + assertThat(indicatorResult.diagnosisList(), is(List.of(SHARD_LIMITS_REACHED_DATA_NODES, SHARD_LIMITS_REACHED_FROZEN_NODES))); assertThat( xContentToMap(indicatorResult.details()), is( @@ -324,7 +318,7 @@ public void testMappedFieldsForTelemetry() { assertEquals(ShardLimitsHealthIndicatorService.NAME, "shard_limits"); assertEquals( "elasticsearch:health:shard_limits:diagnosis:increase_max_shards_per_node", - SHARD_LIMITS_REACHED_NORMAL_NODES.definition().getUniqueId() + SHARD_LIMITS_REACHED_DATA_NODES.definition().getUniqueId() ); assertEquals( "elasticsearch:health:shard_limits:diagnosis:increase_max_shards_per_node_frozen", @@ -367,12 +361,8 @@ private ClusterState createClusterState( HealthMetadata healthMetadata, IndexMetadata.Builder... indexMetadata ) { - var clusterState = ClusterStateCreationUtils.state( - normalNode, - normalNode, - normalNode, - new DiscoveryNode[] { normalNode, frozenNode } - ).copyAndUpdate(b -> b.putCustom(HealthMetadata.TYPE, healthMetadata)); + var clusterState = ClusterStateCreationUtils.state(dataNode, dataNode, dataNode, new DiscoveryNode[] { dataNode, frozenNode }) + .copyAndUpdate(b -> b.putCustom(HealthMetadata.TYPE, healthMetadata)); var metadata = Metadata.builder() .persistentSettings( @@ -389,7 +379,7 @@ private ClusterState createClusterState( return ClusterState.builder(clusterState).metadata(metadata).build(); } - private static IndexMetadata.Builder createIndexInNormalNode(int shards) { + private static IndexMetadata.Builder createIndexInDataNode(int shards) { return createIndex(shards, NORMAL_GROUP); } From a38a0ef791cb0614a35b0a24b59b8aa8e0fded68 Mon Sep 17 00:00:00 2001 From: Pablo Alcantar Morales Date: Thu, 23 Mar 2023 14:12:29 -0300 Subject: [PATCH 24/28] rename shard_limits concept to shards_capacity --- ...ardsCapacityHealthIndicatorServiceIT.java} | 12 ++-- ...ShardsCapacityHealthIndicatorService.java} | 28 ++++----- .../java/org/elasticsearch/node/Node.java | 4 +- ...sCapacityHealthIndicatorServiceTests.java} | 58 +++++++++---------- 4 files changed, 51 insertions(+), 51 deletions(-) rename server/src/internalClusterTest/java/org/elasticsearch/health/node/{ShardLimitsHealthIndicatorServiceIT.java => ShardsCapacityHealthIndicatorServiceIT.java} (92%) rename server/src/main/java/org/elasticsearch/health/node/{ShardLimitsHealthIndicatorService.java => ShardsCapacityHealthIndicatorService.java} (88%) rename server/src/test/java/org/elasticsearch/health/node/{ShardLimitsHealthIndicatorServiceTests.java => ShardsCapacityHealthIndicatorServiceTests.java} (85%) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorServiceIT.java b/server/src/internalClusterTest/java/org/elasticsearch/health/node/ShardsCapacityHealthIndicatorServiceIT.java similarity index 92% rename from server/src/internalClusterTest/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorServiceIT.java rename to server/src/internalClusterTest/java/org/elasticsearch/health/node/ShardsCapacityHealthIndicatorServiceIT.java index c4486935a4aeb..f2182b47c39f5 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorServiceIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/health/node/ShardsCapacityHealthIndicatorServiceIT.java @@ -33,7 +33,7 @@ import static org.hamcrest.Matchers.hasSize; @ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0) -public class ShardLimitsHealthIndicatorServiceIT extends ESIntegTestCase { +public class ShardsCapacityHealthIndicatorServiceIT extends ESIntegTestCase { private static final String INDEX_NAME = "index-name"; private InternalTestCluster internalCluster; @@ -58,7 +58,7 @@ public void testGreen() throws Exception { // index: 4 shards + 1 replica = 8 shards used (30 - 8 = 22 > 10 available shards) createIndex(4, 1); - var result = fetchShardLimitsIndicatorResult(internalCluster); + var result = fetchShardsCapacityIndicatorResult(internalCluster); assertEquals(result.status(), HealthStatus.GREEN); assertEquals(result.symptom(), "The cluster has enough room to add new shards."); assertThat(result.diagnosisList(), empty()); @@ -69,7 +69,7 @@ public void testYellow() throws Exception { // index: 11 shards + 1 replica = 22 shards used (30 - 22 < 10 available shards) createIndex(10, 1); - var result = fetchShardLimitsIndicatorResult(internalCluster); + var result = fetchShardsCapacityIndicatorResult(internalCluster); assertEquals(result.status(), HealthStatus.YELLOW); assertEquals(result.symptom(), "Cluster is close to reaching the configured maximum number of shards for data nodes."); assertThat(result.diagnosisList(), hasSize(1)); @@ -80,7 +80,7 @@ public void testRed() throws Exception { // index: 13 shards + 1 replica = 26 shards used (30 - 26 < 5 available shards) createIndex(13, 1); - var result = fetchShardLimitsIndicatorResult(internalCluster); + var result = fetchShardsCapacityIndicatorResult(internalCluster); assertEquals(result.status(), HealthStatus.RED); assertEquals(result.symptom(), "Cluster is close to reaching the configured maximum number of shards for data nodes."); assertThat(result.diagnosisList(), hasSize(1)); @@ -91,7 +91,7 @@ private void createIndex(int shards, int replicas) { createIndex(INDEX_NAME, Settings.builder().put(SETTING_NUMBER_OF_SHARDS, shards).put(SETTING_NUMBER_OF_REPLICAS, replicas).build()); } - private HealthIndicatorResult fetchShardLimitsIndicatorResult(InternalTestCluster internalCluster) throws Exception { + private HealthIndicatorResult fetchShardsCapacityIndicatorResult(InternalTestCluster internalCluster) throws Exception { var healthNode = findHealthNode().getName(); var healthService = internalCluster.getInstance(HealthService.class, healthNode); var healthIndicatorResults = getHealthServiceResults(healthService, healthNode); @@ -120,7 +120,7 @@ public void onFailure(Exception e) { throw new RuntimeException(e); } }; - healthService.getHealth(internalCluster().client(node), ShardLimitsHealthIndicatorService.NAME, true, 1000, listener); + healthService.getHealth(internalCluster().client(node), ShardsCapacityHealthIndicatorService.NAME, true, 1000, listener); assertBusy(() -> assertNotNull(resultListReference.get())); return resultListReference.get(); } diff --git a/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java b/server/src/main/java/org/elasticsearch/health/node/ShardsCapacityHealthIndicatorService.java similarity index 88% rename from server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java rename to server/src/main/java/org/elasticsearch/health/node/ShardsCapacityHealthIndicatorService.java index ddfdef2c75252..0b422c7ae3ea8 100644 --- a/server/src/main/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorService.java +++ b/server/src/main/java/org/elasticsearch/health/node/ShardsCapacityHealthIndicatorService.java @@ -26,7 +26,7 @@ import java.util.stream.Stream; /** - * This indicator reports health data about the shard limit across the cluster. + * This indicator reports health data about the shard capacity across the cluster. * *

* The indicator will report: @@ -38,9 +38,9 @@ * Although the `max_shard_per_node(.frozen)?` information is scoped by Node, we use the information from master because there is where * the available room for new shards is checked before creating new indices. */ -public class ShardLimitsHealthIndicatorService implements HealthIndicatorService { +public class ShardsCapacityHealthIndicatorService implements HealthIndicatorService { - static final String NAME = "shard_limits"; + static final String NAME = "shards_capacity"; static final String DATA_NODE_NAME = "data"; static final String FROZEN_NODE_NAME = "frozen"; @@ -50,8 +50,8 @@ public class ShardLimitsHealthIndicatorService implements HealthIndicatorService "The cluster is running low on room to add new shards hence the creation of new indices might fail."; private static final String INDEX_CREATION_RISK = "The cluster is running low on room to add new shards hence the creation of new indices might fail."; - private static final String HELP_GUIDE = "https://ela.st/max-shard-limit-reached"; - private static final TriFunction, String, Diagnosis> SHARD_LIMITS_REACHED_FN = ( + private static final String HELP_GUIDE = "https://ela.st/fix-shards-capacity"; + private static final TriFunction, String, Diagnosis> SHARD_MAX_CAPACITY_REACHED_FN = ( id, setting, indexType) -> new Diagnosis( @@ -77,12 +77,12 @@ public class ShardLimitsHealthIndicatorService implements HealthIndicatorService new HealthIndicatorImpact(NAME, "upgrade_at_risk", 2, UPGRADE_AT_RISK, List.of(ImpactArea.DEPLOYMENT_MANAGEMENT)), new HealthIndicatorImpact(NAME, "creation_of_new_indices_at_risk", 2, INDEX_CREATION_RISK, List.of(ImpactArea.INGEST)) ); - static final Diagnosis SHARD_LIMITS_REACHED_DATA_NODES = SHARD_LIMITS_REACHED_FN.apply( + static final Diagnosis SHARDS_MAX_CAPACITY_REACHED_DATA_NODES = SHARD_MAX_CAPACITY_REACHED_FN.apply( "increase_max_shards_per_node", ShardLimitValidator.SETTING_CLUSTER_MAX_SHARDS_PER_NODE, "data" ); - static final Diagnosis SHARD_LIMITS_REACHED_FROZEN_NODES = SHARD_LIMITS_REACHED_FN.apply( + static final Diagnosis SHARDS_MAX_CAPACITY_REACHED_FROZEN_NODES = SHARD_MAX_CAPACITY_REACHED_FN.apply( "increase_max_shards_per_node_frozen", ShardLimitValidator.SETTING_CLUSTER_MAX_SHARDS_PER_NODE_FROZEN, "frozen" @@ -90,7 +90,7 @@ public class ShardLimitsHealthIndicatorService implements HealthIndicatorService private final ClusterService clusterService; - public ShardLimitsHealthIndicatorService(ClusterService clusterService) { + public ShardsCapacityHealthIndicatorService(ClusterService clusterService) { this.clusterService = clusterService; } @@ -129,15 +129,15 @@ private HealthIndicatorResult mergeIndicators(StatusResult dataNodes, StatusResu symptomBuilder.append("Cluster is close to reaching the configured maximum number of shards for "); if (dataNodes.status == frozenNodes.status) { symptomBuilder.append(DATA_NODE_NAME).append(" and ").append(FROZEN_NODE_NAME); - diagnoses = List.of(SHARD_LIMITS_REACHED_DATA_NODES, SHARD_LIMITS_REACHED_FROZEN_NODES); + diagnoses = List.of(SHARDS_MAX_CAPACITY_REACHED_DATA_NODES, SHARDS_MAX_CAPACITY_REACHED_FROZEN_NODES); } else if (dataNodes.status.indicatesHealthProblem()) { symptomBuilder.append(DATA_NODE_NAME); - diagnoses = List.of(SHARD_LIMITS_REACHED_DATA_NODES); + diagnoses = List.of(SHARDS_MAX_CAPACITY_REACHED_DATA_NODES); } else if (frozenNodes.status.indicatesHealthProblem()) { symptomBuilder.append(FROZEN_NODE_NAME); - diagnoses = List.of(SHARD_LIMITS_REACHED_FROZEN_NODES); + diagnoses = List.of(SHARDS_MAX_CAPACITY_REACHED_FROZEN_NODES); } symptomBuilder.append(" nodes."); @@ -158,7 +158,7 @@ private HealthIndicatorResult mergeIndicators(StatusResult dataNodes, StatusResu ); } - static StatusResult calculateFrom(int maxShardsPerNodeSetting, ClusterState state, ShardLimitsChecker checker) { + static StatusResult calculateFrom(int maxShardsPerNodeSetting, ClusterState state, ShardsCapacityChecker checker) { var result = checker.check(maxShardsPerNodeSetting, 5, 1, state); if (result.canAddShards() == false) { return new StatusResult(HealthStatus.RED, result); @@ -199,7 +199,7 @@ static HealthIndicatorDetails buildDetails(ShardLimitValidator.Result dataNodes, private HealthIndicatorResult unknownIndicator() { return createIndicator( HealthStatus.UNKNOWN, - "Unable to determine shard limit status.", + "Unable to determine shard capacity status.", HealthIndicatorDetails.EMPTY, List.of(), List.of() @@ -209,7 +209,7 @@ private HealthIndicatorResult unknownIndicator() { record StatusResult(HealthStatus status, ShardLimitValidator.Result result) {} @FunctionalInterface - interface ShardLimitsChecker { + interface ShardsCapacityChecker { ShardLimitValidator.Result check(int maxConfiguredShardsPerNode, int numberOfNewShards, int replicas, ClusterState state); } } diff --git a/server/src/main/java/org/elasticsearch/node/Node.java b/server/src/main/java/org/elasticsearch/node/Node.java index 0983a95230f61..5fe0ff33f8bb5 100644 --- a/server/src/main/java/org/elasticsearch/node/Node.java +++ b/server/src/main/java/org/elasticsearch/node/Node.java @@ -112,7 +112,7 @@ import org.elasticsearch.health.node.DiskHealthIndicatorService; import org.elasticsearch.health.node.HealthInfoCache; import org.elasticsearch.health.node.LocalHealthMonitor; -import org.elasticsearch.health.node.ShardLimitsHealthIndicatorService; +import org.elasticsearch.health.node.ShardsCapacityHealthIndicatorService; import org.elasticsearch.health.node.selection.HealthNodeTaskExecutor; import org.elasticsearch.health.stats.HealthApiStats; import org.elasticsearch.http.HttpServerTransport; @@ -1227,7 +1227,7 @@ private HealthService createHealthService( ) ); serverHealthIndicatorServices.add(new DiskHealthIndicatorService(clusterService)); - serverHealthIndicatorServices.add(new ShardLimitsHealthIndicatorService(clusterService)); + serverHealthIndicatorServices.add(new ShardsCapacityHealthIndicatorService(clusterService)); var pluginHealthIndicatorServices = pluginsService.filterPlugins(HealthPlugin.class) .stream() .flatMap(plugin -> plugin.getHealthIndicatorServices().stream()) diff --git a/server/src/test/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorServiceTests.java b/server/src/test/java/org/elasticsearch/health/node/ShardsCapacityHealthIndicatorServiceTests.java similarity index 85% rename from server/src/test/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorServiceTests.java rename to server/src/test/java/org/elasticsearch/health/node/ShardsCapacityHealthIndicatorServiceTests.java index 36f2035650d37..f79e1e66e0b77 100644 --- a/server/src/test/java/org/elasticsearch/health/node/ShardLimitsHealthIndicatorServiceTests.java +++ b/server/src/test/java/org/elasticsearch/health/node/ShardsCapacityHealthIndicatorServiceTests.java @@ -45,11 +45,11 @@ import static org.elasticsearch.health.HealthStatus.GREEN; import static org.elasticsearch.health.HealthStatus.RED; import static org.elasticsearch.health.HealthStatus.YELLOW; -import static org.elasticsearch.health.node.ShardLimitsHealthIndicatorService.RED_INDICATOR_IMPACTS; -import static org.elasticsearch.health.node.ShardLimitsHealthIndicatorService.SHARD_LIMITS_REACHED_DATA_NODES; -import static org.elasticsearch.health.node.ShardLimitsHealthIndicatorService.SHARD_LIMITS_REACHED_FROZEN_NODES; -import static org.elasticsearch.health.node.ShardLimitsHealthIndicatorService.YELLOW_INDICATOR_IMPACTS; -import static org.elasticsearch.health.node.ShardLimitsHealthIndicatorService.calculateFrom; +import static org.elasticsearch.health.node.ShardsCapacityHealthIndicatorService.RED_INDICATOR_IMPACTS; +import static org.elasticsearch.health.node.ShardsCapacityHealthIndicatorService.SHARDS_MAX_CAPACITY_REACHED_DATA_NODES; +import static org.elasticsearch.health.node.ShardsCapacityHealthIndicatorService.SHARDS_MAX_CAPACITY_REACHED_FROZEN_NODES; +import static org.elasticsearch.health.node.ShardsCapacityHealthIndicatorService.YELLOW_INDICATOR_IMPACTS; +import static org.elasticsearch.health.node.ShardsCapacityHealthIndicatorService.calculateFrom; import static org.elasticsearch.indices.ShardLimitValidator.FROZEN_GROUP; import static org.elasticsearch.indices.ShardLimitValidator.INDEX_SETTING_SHARD_LIMIT_GROUP; import static org.elasticsearch.indices.ShardLimitValidator.NORMAL_GROUP; @@ -59,7 +59,7 @@ import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; -public class ShardLimitsHealthIndicatorServiceTests extends ESTestCase { +public class ShardsCapacityHealthIndicatorServiceTests extends ESTestCase { public static final HealthMetadata.Disk DISK_METADATA = HealthMetadata.Disk.newBuilder().build(); private DiscoveryNode dataNode; @@ -88,7 +88,7 @@ public void setUp() throws Exception { ); } - public void testNoShardLimitsMetadata() throws IOException { + public void testNoShardsCapacityMetadata() throws IOException { var clusterService = createClusterService( createClusterState( randomValidMaxShards(), @@ -97,13 +97,13 @@ public void testNoShardLimitsMetadata() throws IOException { createIndexInDataNode(100) ) ); - var target = new ShardLimitsHealthIndicatorService(clusterService); + var target = new ShardsCapacityHealthIndicatorService(clusterService); var indicatorResult = target.calculate(true, HealthInfo.EMPTY_HEALTH_INFO); assertEquals(indicatorResult.status(), HealthStatus.UNKNOWN); assertTrue(indicatorResult.impacts().isEmpty()); assertTrue(indicatorResult.diagnosisList().isEmpty()); - assertEquals(indicatorResult.symptom(), "Unable to determine shard limit status."); + assertEquals(indicatorResult.symptom(), "Unable to determine shard capacity status."); assertEquals(xContentToMap(indicatorResult.details()), Map.of()); } @@ -111,7 +111,7 @@ public void testIndicatorYieldsGreenInCaseThereIsRoom() throws IOException { int maxShardsPerNode = randomValidMaxShards(); int maxShardsPerNodeFrozen = randomValidMaxShards(); var clusterService = createClusterService(maxShardsPerNode, maxShardsPerNodeFrozen, createIndexInDataNode(maxShardsPerNode / 4)); - var indicatorResult = new ShardLimitsHealthIndicatorService(clusterService).calculate(true, HealthInfo.EMPTY_HEALTH_INFO); + var indicatorResult = new ShardsCapacityHealthIndicatorService(clusterService).calculate(true, HealthInfo.EMPTY_HEALTH_INFO); assertEquals(indicatorResult.status(), HealthStatus.GREEN); assertTrue(indicatorResult.impacts().isEmpty()); @@ -135,13 +135,13 @@ public void testIndicatorYieldsYellowInCaseThereIsNotEnoughRoom() throws IOExcep // Only data_nodes does not have enough space int maxShardsPerNodeFrozen = randomValidMaxShards(); var clusterService = createClusterService(25, maxShardsPerNodeFrozen, createIndexInDataNode(4)); - var indicatorResult = new ShardLimitsHealthIndicatorService(clusterService).calculate(true, HealthInfo.EMPTY_HEALTH_INFO); + var indicatorResult = new ShardsCapacityHealthIndicatorService(clusterService).calculate(true, HealthInfo.EMPTY_HEALTH_INFO); assertEquals(indicatorResult.status(), YELLOW); assertEquals(indicatorResult.symptom(), "Cluster is close to reaching the configured maximum number of shards for data nodes."); assertThat(indicatorResult.impacts(), equalTo(YELLOW_INDICATOR_IMPACTS)); assertThat(indicatorResult.diagnosisList(), hasSize(1)); - assertThat(indicatorResult.diagnosisList().get(0), equalTo(SHARD_LIMITS_REACHED_DATA_NODES)); + assertThat(indicatorResult.diagnosisList().get(0), equalTo(SHARDS_MAX_CAPACITY_REACHED_DATA_NODES)); assertThat( xContentToMap(indicatorResult.details()), is( @@ -158,7 +158,7 @@ public void testIndicatorYieldsYellowInCaseThereIsNotEnoughRoom() throws IOExcep // Only frozen_nodes does not have enough space int maxShardsPerNode = randomValidMaxShards(); var clusterService = createClusterService(maxShardsPerNode, 25, createIndexInFrozenNode(4)); - var indicatorResult = new ShardLimitsHealthIndicatorService(clusterService).calculate(true, HealthInfo.EMPTY_HEALTH_INFO); + var indicatorResult = new ShardsCapacityHealthIndicatorService(clusterService).calculate(true, HealthInfo.EMPTY_HEALTH_INFO); assertEquals(indicatorResult.status(), YELLOW); assertEquals( @@ -167,7 +167,7 @@ public void testIndicatorYieldsYellowInCaseThereIsNotEnoughRoom() throws IOExcep ); assertThat(indicatorResult.impacts(), equalTo(YELLOW_INDICATOR_IMPACTS)); assertThat(indicatorResult.diagnosisList(), hasSize(1)); - assertThat(indicatorResult.diagnosisList().get(0), equalTo(SHARD_LIMITS_REACHED_FROZEN_NODES)); + assertThat(indicatorResult.diagnosisList().get(0), equalTo(SHARDS_MAX_CAPACITY_REACHED_FROZEN_NODES)); assertThat( xContentToMap(indicatorResult.details()), is( @@ -183,7 +183,7 @@ public void testIndicatorYieldsYellowInCaseThereIsNotEnoughRoom() throws IOExcep { // Both data and frozen nodes does not have enough space var clusterService = createClusterService(25, 25, createIndexInDataNode(4), createIndexInFrozenNode(4)); - var indicatorResult = new ShardLimitsHealthIndicatorService(clusterService).calculate(true, HealthInfo.EMPTY_HEALTH_INFO); + var indicatorResult = new ShardsCapacityHealthIndicatorService(clusterService).calculate(true, HealthInfo.EMPTY_HEALTH_INFO); assertEquals(indicatorResult.status(), YELLOW); assertEquals( @@ -191,7 +191,7 @@ public void testIndicatorYieldsYellowInCaseThereIsNotEnoughRoom() throws IOExcep "Cluster is close to reaching the configured maximum number of shards for data and frozen nodes." ); assertThat(indicatorResult.impacts(), equalTo(YELLOW_INDICATOR_IMPACTS)); - assertThat(indicatorResult.diagnosisList(), is(List.of(SHARD_LIMITS_REACHED_DATA_NODES, SHARD_LIMITS_REACHED_FROZEN_NODES))); + assertThat(indicatorResult.diagnosisList(), is(List.of(SHARDS_MAX_CAPACITY_REACHED_DATA_NODES, SHARDS_MAX_CAPACITY_REACHED_FROZEN_NODES))); assertThat( xContentToMap(indicatorResult.details()), is( @@ -211,13 +211,13 @@ public void testIndicatorYieldsRedInCaseThereIsNotEnoughRoom() throws IOExceptio // Only data_nodes does not have enough space int maxShardsPerNodeFrozen = randomValidMaxShards(); var clusterService = createClusterService(25, maxShardsPerNodeFrozen, createIndexInDataNode(11)); - var indicatorResult = new ShardLimitsHealthIndicatorService(clusterService).calculate(true, HealthInfo.EMPTY_HEALTH_INFO); + var indicatorResult = new ShardsCapacityHealthIndicatorService(clusterService).calculate(true, HealthInfo.EMPTY_HEALTH_INFO); assertEquals(indicatorResult.status(), RED); assertEquals(indicatorResult.symptom(), "Cluster is close to reaching the configured maximum number of shards for data nodes."); assertThat(indicatorResult.impacts(), equalTo(RED_INDICATOR_IMPACTS)); assertThat(indicatorResult.diagnosisList(), hasSize(1)); - assertThat(indicatorResult.diagnosisList().get(0), equalTo(SHARD_LIMITS_REACHED_DATA_NODES)); + assertThat(indicatorResult.diagnosisList().get(0), equalTo(SHARDS_MAX_CAPACITY_REACHED_DATA_NODES)); assertThat( xContentToMap(indicatorResult.details()), is( @@ -234,7 +234,7 @@ public void testIndicatorYieldsRedInCaseThereIsNotEnoughRoom() throws IOExceptio // Only frozen_nodes does not have enough space int maxShardsPerNode = randomValidMaxShards(); var clusterService = createClusterService(maxShardsPerNode, 25, createIndexInFrozenNode(11)); - var indicatorResult = new ShardLimitsHealthIndicatorService(clusterService).calculate(true, HealthInfo.EMPTY_HEALTH_INFO); + var indicatorResult = new ShardsCapacityHealthIndicatorService(clusterService).calculate(true, HealthInfo.EMPTY_HEALTH_INFO); assertEquals(indicatorResult.status(), RED); assertEquals( @@ -243,7 +243,7 @@ public void testIndicatorYieldsRedInCaseThereIsNotEnoughRoom() throws IOExceptio ); assertThat(indicatorResult.impacts(), equalTo(RED_INDICATOR_IMPACTS)); assertThat(indicatorResult.diagnosisList(), hasSize(1)); - assertThat(indicatorResult.diagnosisList().get(0), equalTo(SHARD_LIMITS_REACHED_FROZEN_NODES)); + assertThat(indicatorResult.diagnosisList().get(0), equalTo(SHARDS_MAX_CAPACITY_REACHED_FROZEN_NODES)); assertThat( xContentToMap(indicatorResult.details()), is( @@ -259,7 +259,7 @@ public void testIndicatorYieldsRedInCaseThereIsNotEnoughRoom() throws IOExceptio { // Both data and frozen nodes does not have enough space var clusterService = createClusterService(25, 25, createIndexInDataNode(11), createIndexInFrozenNode(11)); - var indicatorResult = new ShardLimitsHealthIndicatorService(clusterService).calculate(true, HealthInfo.EMPTY_HEALTH_INFO); + var indicatorResult = new ShardsCapacityHealthIndicatorService(clusterService).calculate(true, HealthInfo.EMPTY_HEALTH_INFO); assertEquals(indicatorResult.status(), RED); assertEquals( @@ -267,7 +267,7 @@ public void testIndicatorYieldsRedInCaseThereIsNotEnoughRoom() throws IOExceptio "Cluster is close to reaching the configured maximum number of shards for data and frozen nodes." ); assertThat(indicatorResult.impacts(), equalTo(RED_INDICATOR_IMPACTS)); - assertThat(indicatorResult.diagnosisList(), is(List.of(SHARD_LIMITS_REACHED_DATA_NODES, SHARD_LIMITS_REACHED_FROZEN_NODES))); + assertThat(indicatorResult.diagnosisList(), is(List.of(SHARDS_MAX_CAPACITY_REACHED_DATA_NODES, SHARDS_MAX_CAPACITY_REACHED_FROZEN_NODES))); assertThat( xContentToMap(indicatorResult.details()), is( @@ -285,7 +285,7 @@ public void testIndicatorYieldsRedInCaseThereIsNotEnoughRoom() throws IOExceptio public void testCalculateMethods() { var mockedState = mock(ClusterState.class); var randomMaxShardsPerNodeSetting = randomInt(); - Function checkerWrapper = shardsToAdd -> ( + Function checkerWrapper = shardsToAdd -> ( maxConfiguredShardsPerNode, numberOfNewShards, replicas, @@ -308,21 +308,21 @@ public void testCalculateMethods() { Stream.of(randomIntBetween(1, 4), randomIntBetween(6, 9), randomIntBetween(11, Integer.MAX_VALUE)) .map(checkerWrapper) .map(checker -> calculateFrom(randomMaxShardsPerNodeSetting, mockedState, checker)) - .map(ShardLimitsHealthIndicatorService.StatusResult::status) + .map(ShardsCapacityHealthIndicatorService.StatusResult::status) .forEach(status -> assertEquals(status, GREEN)); } // We expose the indicator name and the diagnoses in the x-pack usage API. In order to index them properly in a telemetry index // they need to be declared in the health-api-indexer.edn in the telemetry repository. public void testMappedFieldsForTelemetry() { - assertEquals(ShardLimitsHealthIndicatorService.NAME, "shard_limits"); + assertEquals(ShardsCapacityHealthIndicatorService.NAME, "shards_capacity"); assertEquals( - "elasticsearch:health:shard_limits:diagnosis:increase_max_shards_per_node", - SHARD_LIMITS_REACHED_DATA_NODES.definition().getUniqueId() + "elasticsearch:health:shards_capacity:diagnosis:increase_max_shards_per_node", + SHARDS_MAX_CAPACITY_REACHED_DATA_NODES.definition().getUniqueId() ); assertEquals( - "elasticsearch:health:shard_limits:diagnosis:increase_max_shards_per_node_frozen", - SHARD_LIMITS_REACHED_FROZEN_NODES.definition().getUniqueId() + "elasticsearch:health:shards_capacity:diagnosis:increase_max_shards_per_node_frozen", + SHARDS_MAX_CAPACITY_REACHED_FROZEN_NODES.definition().getUniqueId() ); } From 9ed826730e8a836db3446160da3b2244b58f5cee Mon Sep 17 00:00:00 2001 From: Pablo Alcantar Morales Date: Thu, 23 Mar 2023 14:18:10 -0300 Subject: [PATCH 25/28] fix changelog --- docs/changelog/94552.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/changelog/94552.yaml b/docs/changelog/94552.yaml index e87fd7e038ec6..0803d76bee6be 100644 --- a/docs/changelog/94552.yaml +++ b/docs/changelog/94552.yaml @@ -1,5 +1,5 @@ pr: 94552 -summary: Add new `ShardLimits` Health Indicator Service +summary: Add new `ShardsCapacity` Health Indicator Service area: Health type: feature issues: [] From 3b71a0e997f91b83626b7b29dddf698d5de852bd Mon Sep 17 00:00:00 2001 From: Pablo Alcantar Morales Date: Thu, 23 Mar 2023 14:20:37 -0300 Subject: [PATCH 26/28] spotless --- .../ShardsCapacityHealthIndicatorServiceTests.java | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/health/node/ShardsCapacityHealthIndicatorServiceTests.java b/server/src/test/java/org/elasticsearch/health/node/ShardsCapacityHealthIndicatorServiceTests.java index f79e1e66e0b77..bb1002bdb9e2b 100644 --- a/server/src/test/java/org/elasticsearch/health/node/ShardsCapacityHealthIndicatorServiceTests.java +++ b/server/src/test/java/org/elasticsearch/health/node/ShardsCapacityHealthIndicatorServiceTests.java @@ -191,7 +191,10 @@ public void testIndicatorYieldsYellowInCaseThereIsNotEnoughRoom() throws IOExcep "Cluster is close to reaching the configured maximum number of shards for data and frozen nodes." ); assertThat(indicatorResult.impacts(), equalTo(YELLOW_INDICATOR_IMPACTS)); - assertThat(indicatorResult.diagnosisList(), is(List.of(SHARDS_MAX_CAPACITY_REACHED_DATA_NODES, SHARDS_MAX_CAPACITY_REACHED_FROZEN_NODES))); + assertThat( + indicatorResult.diagnosisList(), + is(List.of(SHARDS_MAX_CAPACITY_REACHED_DATA_NODES, SHARDS_MAX_CAPACITY_REACHED_FROZEN_NODES)) + ); assertThat( xContentToMap(indicatorResult.details()), is( @@ -267,7 +270,10 @@ public void testIndicatorYieldsRedInCaseThereIsNotEnoughRoom() throws IOExceptio "Cluster is close to reaching the configured maximum number of shards for data and frozen nodes." ); assertThat(indicatorResult.impacts(), equalTo(RED_INDICATOR_IMPACTS)); - assertThat(indicatorResult.diagnosisList(), is(List.of(SHARDS_MAX_CAPACITY_REACHED_DATA_NODES, SHARDS_MAX_CAPACITY_REACHED_FROZEN_NODES))); + assertThat( + indicatorResult.diagnosisList(), + is(List.of(SHARDS_MAX_CAPACITY_REACHED_DATA_NODES, SHARDS_MAX_CAPACITY_REACHED_FROZEN_NODES)) + ); assertThat( xContentToMap(indicatorResult.details()), is( From a11fc85e6591ac42ae19b4c88c6609fc508e66a5 Mon Sep 17 00:00:00 2001 From: Pablo Alcantar Morales Date: Fri, 24 Mar 2023 09:07:15 -0300 Subject: [PATCH 27/28] use the actual classes instead of mocks --- ...dsCapacityHealthIndicatorServiceTests.java | 37 ++++++++++++++++--- 1 file changed, 31 insertions(+), 6 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/health/node/ShardsCapacityHealthIndicatorServiceTests.java b/server/src/test/java/org/elasticsearch/health/node/ShardsCapacityHealthIndicatorServiceTests.java index bb1002bdb9e2b..f0e8b734abcdf 100644 --- a/server/src/test/java/org/elasticsearch/health/node/ShardsCapacityHealthIndicatorServiceTests.java +++ b/server/src/test/java/org/elasticsearch/health/node/ShardsCapacityHealthIndicatorServiceTests.java @@ -22,13 +22,19 @@ import org.elasticsearch.health.HealthStatus; import org.elasticsearch.health.metadata.HealthMetadata; import org.elasticsearch.indices.ShardLimitValidator; +import org.elasticsearch.test.ClusterServiceUtils; import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.threadpool.TestThreadPool; +import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.xcontent.ToXContent; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentFactory; import org.elasticsearch.xcontent.XContentParser; import org.elasticsearch.xcontent.XContentType; +import org.junit.After; +import org.junit.AfterClass; import org.junit.Before; +import org.junit.BeforeClass; import java.io.IOException; import java.util.List; @@ -56,12 +62,14 @@ import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.hasSize; import static org.hamcrest.Matchers.is; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; public class ShardsCapacityHealthIndicatorServiceTests extends ESTestCase { public static final HealthMetadata.Disk DISK_METADATA = HealthMetadata.Disk.newBuilder().build(); + + private static ThreadPool threadPool; + + private ClusterService clusterService; private DiscoveryNode dataNode; private DiscoveryNode frozenNode; @@ -86,6 +94,24 @@ public void setUp() throws Exception { Set.of(DiscoveryNodeRole.DATA_FROZEN_NODE_ROLE), Version.CURRENT ); + + clusterService = ClusterServiceUtils.createClusterService(threadPool); + } + + @After + public void tearDown() throws Exception { + super.tearDown(); + clusterService.close(); + } + + @BeforeClass + public static void setUpThreadPool() { + threadPool = new TestThreadPool(getTestClass().getSimpleName()); + } + + @AfterClass + public static void tearDownThreadPool() { + terminate(threadPool); } public void testNoShardsCapacityMetadata() throws IOException { @@ -289,7 +315,7 @@ public void testIndicatorYieldsRedInCaseThereIsNotEnoughRoom() throws IOExceptio } public void testCalculateMethods() { - var mockedState = mock(ClusterState.class); + var mockedState = ClusterState.EMPTY_STATE; var randomMaxShardsPerNodeSetting = randomInt(); Function checkerWrapper = shardsToAdd -> ( maxConfiguredShardsPerNode, @@ -344,9 +370,8 @@ private Map xContentToMap(ToXContent xcontent) throws IOExceptio return parser.map(); } - private static ClusterService createClusterService(ClusterState clusterState) { - var clusterService = mock(ClusterService.class); - when(clusterService.state()).thenReturn(clusterState); + private ClusterService createClusterService(ClusterState clusterState) { + ClusterServiceUtils.setState(clusterService, clusterState); return clusterService; } From cf177ac330babbb1e395cb8954633897101cf37b Mon Sep 17 00:00:00 2001 From: Pablo Alcantar Morales Date: Fri, 24 Mar 2023 09:09:22 -0300 Subject: [PATCH 28/28] reword messages --- .../health/node/ShardsCapacityHealthIndicatorService.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/health/node/ShardsCapacityHealthIndicatorService.java b/server/src/main/java/org/elasticsearch/health/node/ShardsCapacityHealthIndicatorService.java index 0b422c7ae3ea8..1852e504b61db 100644 --- a/server/src/main/java/org/elasticsearch/health/node/ShardsCapacityHealthIndicatorService.java +++ b/server/src/main/java/org/elasticsearch/health/node/ShardsCapacityHealthIndicatorService.java @@ -45,11 +45,12 @@ public class ShardsCapacityHealthIndicatorService implements HealthIndicatorServ static final String DATA_NODE_NAME = "data"; static final String FROZEN_NODE_NAME = "frozen"; private static final String UPGRADE_BLOCKED = "The cluster has too many used shards to be able to upgrade."; - private static final String UPGRADE_AT_RISK = "The cluster is running low on room to add new shards hence upgrade is at risk."; + private static final String UPGRADE_AT_RISK = + "The cluster is running low on room to add new shard. Upgrading to a new version is at risk."; private static final String INDEX_CREATION_BLOCKED = - "The cluster is running low on room to add new shards hence the creation of new indices might fail."; + "The cluster is running low on room to add new shards. Adding data to new indices is at risk"; private static final String INDEX_CREATION_RISK = - "The cluster is running low on room to add new shards hence the creation of new indices might fail."; + "The cluster is running low on room to add new shards. Adding data to new indices might soon fail."; private static final String HELP_GUIDE = "https://ela.st/fix-shards-capacity"; private static final TriFunction, String, Diagnosis> SHARD_MAX_CAPACITY_REACHED_FN = ( id,