Skip to content

Commit 3d57a78

Browse files
committed
Add extra logging for investigation into #52000 (#52472)
It looks like #52000 is caused by a slowdown in cluster state application (maybe due to #50907) but I would like to understand the details to ensure that there's nothing else going on here too before simply increasing the timeout. This commit enables some relevant `DEBUG` loggers and also captures stack traces from all threads rather than just the three hottest ones.
1 parent 84de601 commit 3d57a78

File tree

2 files changed

+5
-2
lines changed

2 files changed

+5
-2
lines changed

server/src/test/java/org/elasticsearch/cluster/allocation/ClusterRerouteIT.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
import org.elasticsearch.test.ESIntegTestCase.Scope;
5858
import org.elasticsearch.test.InternalTestCluster;
5959
import org.elasticsearch.test.MockLogAppender;
60+
import org.elasticsearch.test.junit.annotations.TestLogging;
6061

6162
import java.nio.file.Path;
6263
import java.util.Arrays;
@@ -187,6 +188,8 @@ public void testRerouteWithAllocateLocalGateway_enableAllocationSettings() throw
187188
rerouteWithAllocateLocalGateway(commonSettings);
188189
}
189190

191+
@TestLogging(reason = "https://github.com/elastic/elasticsearch/issues/52000",
192+
value = "org.elasticsearch.gateway.PersistedClusterStateService:DEBUG,org.elasticsearch.cluster.service.MasterService:DEBUG")
190193
public void testDelayWithALargeAmountOfShards() throws Exception {
191194
Settings commonSettings = Settings.builder()
192195
.put(ThrottlingAllocationDecider.CLUSTER_ROUTING_ALLOCATION_NODE_CONCURRENT_INCOMING_RECOVERIES_SETTING.getKey(), 1)

test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -919,8 +919,8 @@ private ClusterHealthStatus ensureColor(ClusterHealthStatus clusterHealthStatus,
919919

920920
ClusterHealthResponse actionGet = client().admin().cluster().health(healthRequest).actionGet();
921921
if (actionGet.isTimedOut()) {
922-
final String hotThreads = client().admin().cluster().prepareNodesHotThreads().setIgnoreIdleThreads(false).get().getNodes()
923-
.stream().map(NodeHotThreads::getHotThreads).collect(Collectors.joining("\n"));
922+
final String hotThreads = client().admin().cluster().prepareNodesHotThreads().setThreads(99999).setIgnoreIdleThreads(false)
923+
.get().getNodes().stream().map(NodeHotThreads::getHotThreads).collect(Collectors.joining("\n"));
924924
logger.info("{} timed out, cluster state:\n{}\npending tasks:\n{}\nhot threads:\n{}\n",
925925
method,
926926
client().admin().cluster().prepareState().get().getState(),

0 commit comments

Comments
 (0)