From 67d9d89f1a86e1182ab6d76ef362690cf6cc2c37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-S=C3=A9bastien=20P=C3=A9dron?= Date: Fri, 14 Mar 2025 17:25:25 +0100 Subject: [PATCH] rabbit_khepri: Remove setup retries [Why] Khepri already managed retries if needed, we can just use a timeout. Note that the timeout was already bumped to a more appropriate 5 minutes, which also matches what we had with Mnesia. However, with 10 retries by default, it meant that this timeout at the end of `init/1` would thus be 5 * 10 = 50 minutes. --- deps/rabbit/src/rabbit_khepri.erl | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/deps/rabbit/src/rabbit_khepri.erl b/deps/rabbit/src/rabbit_khepri.erl index 537021efa341..5424917ee00c 100644 --- a/deps/rabbit/src/rabbit_khepri.erl +++ b/deps/rabbit/src/rabbit_khepri.erl @@ -288,12 +288,6 @@ retry_timeout() -> undefined -> 300_000 end. -retry_limit() -> - case application:get_env(rabbit, khepri_leader_wait_retry_limit) of - {ok, T} -> T; - undefined -> 10 - end. - %% @private -spec init(IsVirgin) -> Ret when @@ -333,22 +327,13 @@ init(IsVirgin) -> end. await_replication() -> - await_replication(retry_timeout(), retry_limit()). - -await_replication(_Timeout, 0) -> - {error, timeout}; -await_replication(Timeout, Retries) -> + Timeout = retry_timeout(), ?LOG_DEBUG( "Khepri-based " ?RA_FRIENDLY_NAME " waiting to catch up on replication " - "to the Raft cluster leader. Waiting for ~tb ms, ~tb retries left", - [Timeout, Retries], + "to the Raft cluster leader. Waiting for ~tb ms", + [Timeout], #{domain => ?RMQLOG_DOMAIN_DB}), - case fence(Timeout) of - ok -> - ok; - {error, timeout} -> - await_replication(Timeout, Retries -1) - end. + fence(Timeout). %% @private