Add extra logging for investigation into #52000 (#52472)

DaveCTurner · DaveCTurner · commit 3d57a78deb24 · 2020-02-18T13:02:33.000Z
It looks like #52000 is caused by a slowdown in cluster state application (maybe due to #50907) but I would like to understand the details to ensure that there's nothing else going on here too before simply increasing the timeout. This commit enables some relevant `DEBUG` loggers and also captures stack traces from all threads rather than just the three hottest ones.
diff --git a/server/src/test/java/org/elasticsearch/cluster/allocation/ClusterRerouteIT.java b/server/src/test/java/org/elasticsearch/cluster/allocation/ClusterRerouteIT.java
@@ -57,6 +57,7 @@
 import org.elasticsearch.test.ESIntegTestCase.Scope;
 import org.elasticsearch.test.InternalTestCluster;
 import org.elasticsearch.test.MockLogAppender;
+import org.elasticsearch.test.junit.annotations.TestLogging;
 
 import java.nio.file.Path;
 import java.util.Arrays;
@@ -187,6 +188,8 @@ public void testRerouteWithAllocateLocalGateway_enableAllocationSettings() throw
         rerouteWithAllocateLocalGateway(commonSettings);
     }
 
+    @TestLogging(reason = "https://github.com/elastic/elasticsearch/issues/52000",
+        value = "org.elasticsearch.gateway.PersistedClusterStateService:DEBUG,org.elasticsearch.cluster.service.MasterService:DEBUG")
     public void testDelayWithALargeAmountOfShards() throws Exception {
         Settings commonSettings = Settings.builder()
                 .put(ThrottlingAllocationDecider.CLUSTER_ROUTING_ALLOCATION_NODE_CONCURRENT_INCOMING_RECOVERIES_SETTING.getKey(), 1)
diff --git a/test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java b/test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java
@@ -919,8 +919,8 @@ private ClusterHealthStatus ensureColor(ClusterHealthStatus clusterHealthStatus,
 
         ClusterHealthResponse actionGet = client().admin().cluster().health(healthRequest).actionGet();
         if (actionGet.isTimedOut()) {
-            final String hotThreads = client().admin().cluster().prepareNodesHotThreads().setIgnoreIdleThreads(false).get().getNodes()
-                .stream().map(NodeHotThreads::getHotThreads).collect(Collectors.joining("\n"));
+            final String hotThreads = client().admin().cluster().prepareNodesHotThreads().setThreads(99999).setIgnoreIdleThreads(false)
+                .get().getNodes().stream().map(NodeHotThreads::getHotThreads).collect(Collectors.joining("\n"));
             logger.info("{} timed out, cluster state:\n{}\npending tasks:\n{}\nhot threads:\n{}\n",
                 method,
                 client().admin().cluster().prepareState().get().getState(),