Skip to content

Commit 8f141b8

Browse files
authored
Fix ClusterInfoServiceIT timeouts (#36758)
The test testClusterInfoServiceInformationClearOnError relies on timing behavior. It sets InternalClusterInfoService.INTERNAL_CLUSTER_INFO_TIMEOUT_SETTING to 1s and relies on the fact that the stats request completes within that timeframe (which our ever-so-slow CI seems to violate at times). Unfortunately the logging has been misimplemented in InternalClusterInfoService, so the corresponding log messages showing that the requests have timed out are missing for this. The issue can be locally reproduced by reducing the timeout to something lower. Closes #36554
1 parent 18691da commit 8f141b8

File tree

2 files changed

+13
-5
lines changed

2 files changed

+13
-5
lines changed

server/src/main/java/org/elasticsearch/cluster/InternalClusterInfoService.java

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -345,17 +345,19 @@ public void onFailure(Exception e) {
345345
});
346346

347347
try {
348-
nodeLatch.await(fetchTimeout.getMillis(), TimeUnit.MILLISECONDS);
348+
if (nodeLatch.await(fetchTimeout.getMillis(), TimeUnit.MILLISECONDS) == false) {
349+
logger.warn("Failed to update node information for ClusterInfoUpdateJob within {} timeout", fetchTimeout);
350+
}
349351
} catch (InterruptedException e) {
350352
Thread.currentThread().interrupt(); // restore interrupt status
351-
logger.warn("Failed to update node information for ClusterInfoUpdateJob within {} timeout", fetchTimeout);
352353
}
353354

354355
try {
355-
indicesLatch.await(fetchTimeout.getMillis(), TimeUnit.MILLISECONDS);
356+
if (indicesLatch.await(fetchTimeout.getMillis(), TimeUnit.MILLISECONDS) == false) {
357+
logger.warn("Failed to update shard information for ClusterInfoUpdateJob within {} timeout", fetchTimeout);
358+
}
356359
} catch (InterruptedException e) {
357360
Thread.currentThread().interrupt(); // restore interrupt status
358-
logger.warn("Failed to update shard information for ClusterInfoUpdateJob within {} timeout", fetchTimeout);
359361
}
360362
ClusterInfo clusterInfo = getClusterInfo();
361363
try {

server/src/test/java/org/elasticsearch/cluster/ClusterInfoServiceIT.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,6 @@ protected Settings nodeSettings(int nodeOrdinal) {
111111
.put(super.nodeSettings(nodeOrdinal))
112112
// manual collection or upon cluster forming.
113113
.put(NodeEnvironment.MAX_LOCAL_STORAGE_NODES_SETTING.getKey(), 2)
114-
.put(InternalClusterInfoService.INTERNAL_CLUSTER_INFO_TIMEOUT_SETTING.getKey(), "1s")
115114
.build();
116115
}
117116

@@ -120,6 +119,11 @@ protected Collection<Class<? extends Plugin>> nodePlugins() {
120119
return Arrays.asList(TestPlugin.class, MockTransportService.TestPlugin.class);
121120
}
122121

122+
private void setClusterInfoTimeout(String timeValue) {
123+
assertAcked(client().admin().cluster().prepareUpdateSettings().setTransientSettings(Settings.builder()
124+
.put(InternalClusterInfoService.INTERNAL_CLUSTER_INFO_TIMEOUT_SETTING.getKey(), timeValue).build()));
125+
}
126+
123127
public void testClusterInfoServiceCollectsInformation() throws Exception {
124128
internalCluster().startNodes(2);
125129
assertAcked(prepareCreate("test").setSettings(Settings.builder()
@@ -204,6 +208,7 @@ public void testClusterInfoServiceInformationClearOnError() {
204208
});
205209
}
206210

211+
setClusterInfoTimeout("1s");
207212
// timeouts shouldn't clear the info
208213
timeout.set(true);
209214
info = infoService.refresh();
@@ -237,6 +242,7 @@ public void testClusterInfoServiceInformationClearOnError() {
237242

238243
// check we recover
239244
blockingActionFilter.blockActions();
245+
setClusterInfoTimeout("15s");
240246
info = infoService.refresh();
241247
assertNotNull("info should not be null", info);
242248
assertThat(info.getNodeLeastAvailableDiskUsages().size(), equalTo(2));

0 commit comments

Comments
 (0)