Skip to content

Commit 4486843

Browse files
committed
QQ: don't try to contact non-connected nodes for starts
Some systems may incur a substantial latency penalty when attempting reconnections to down nodes so to avoid this some stat related functions that gather info from all QQ member nodes no only try those nodes that are connected. This should help keeping things like the mgmt API functions and ctl commands a bit more responsive.
1 parent 1f6ead8 commit 4486843

File tree

2 files changed

+14
-7
lines changed

2 files changed

+14
-7
lines changed

deps/rabbit/src/rabbit_quorum_queue.erl

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -971,9 +971,8 @@ cleanup_data_dir() ->
971971
end
972972
|| Q <- rabbit_amqqueue:list_by_type(?MODULE),
973973
lists:member(node(), get_nodes(Q))],
974-
NoQQClusters = rabbit_ra_registry:list_not_quorum_clusters(),
975974
Registered = ra_directory:list_registered(?RA_SYSTEM),
976-
Running = Names ++ NoQQClusters,
975+
Running = Names,
977976
_ = [maybe_delete_data_dir(UId) || {Name, UId} <- Registered,
978977
not lists:member(Name, Running)],
979978
ok.
@@ -1436,7 +1435,7 @@ i(memory, Q) when ?is_amqqueue(Q) ->
14361435
i(state, Q) when ?is_amqqueue(Q) ->
14371436
{Name, Node} = amqqueue:get_pid(Q),
14381437
%% Check against the leader or last known leader
1439-
case rpc:call(Node, ?MODULE, cluster_state, [Name], ?RPC_TIMEOUT) of
1438+
case erpc:call(Node, ?MODULE, cluster_state, [Name], ?RPC_TIMEOUT) of
14401439
{badrpc, _} -> down;
14411440
State -> State
14421441
end;
@@ -1457,7 +1456,7 @@ i(online, Q) -> online(Q);
14571456
i(leader, Q) -> leader(Q);
14581457
i(open_files, Q) when ?is_amqqueue(Q) ->
14591458
{Name, _} = amqqueue:get_pid(Q),
1460-
Nodes = get_nodes(Q),
1459+
Nodes = get_connected_nodes(Q),
14611460
{Data, _} = rpc:multicall(Nodes, ?MODULE, open_files, [Name]),
14621461
lists:flatten(Data);
14631462
i(single_active_consumer_pid, Q) when ?is_amqqueue(Q) ->
@@ -1559,7 +1558,7 @@ peek(_Pos, Q) when ?is_amqqueue(Q) andalso ?amqqueue_is_classic(Q) ->
15591558
{error, classic_queue_not_supported}.
15601559

15611560
online(Q) when ?is_amqqueue(Q) ->
1562-
Nodes = get_nodes(Q),
1561+
Nodes = get_connected_nodes(Q),
15631562
{Name, _} = amqqueue:get_pid(Q),
15641563
[Node || Node <- Nodes, is_process_alive(Name, Node)].
15651564

@@ -1568,7 +1567,11 @@ format(Q) when ?is_amqqueue(Q) ->
15681567
[{members, Nodes}, {online, online(Q)}, {leader, leader(Q)}].
15691568

15701569
is_process_alive(Name, Node) ->
1571-
erlang:is_pid(rpc:call(Node, erlang, whereis, [Name], ?RPC_TIMEOUT)).
1570+
%% don't attempt rpc if node is not already connected
1571+
%% as this function is used for metrics and stats and the additional
1572+
%% latency isn't warranted
1573+
lists:member(Node, [node() | nodes()]) andalso
1574+
erlang:is_pid(erpc:call(Node, erlang, whereis, [Name], ?RPC_TIMEOUT)).
15721575

15731576
-spec quorum_messages(rabbit_amqqueue:name()) -> non_neg_integer().
15741577

@@ -1626,6 +1629,10 @@ get_nodes(Q) when ?is_amqqueue(Q) ->
16261629
#{nodes := Nodes} = amqqueue:get_type_state(Q),
16271630
Nodes.
16281631

1632+
get_connected_nodes(Q) when ?is_amqqueue(Q) ->
1633+
ErlangNodes = [node() | nodes()],
1634+
[N || N <- get_nodes(Q), lists:member(N, ErlangNodes)].
1635+
16291636
update_type_state(Q, Fun) when ?is_amqqueue(Q) ->
16301637
Ts = amqqueue:get_type_state(Q),
16311638
amqqueue:set_type_state(Q, Fun(Ts)).

deps/rabbit/test/quorum_queue_SUITE.erl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1705,7 +1705,7 @@ add_member(Config) ->
17051705
ok = rabbit_control_helper:command(stop_app, Server1),
17061706
ok = rabbit_control_helper:command(join_cluster, Server1, [atom_to_list(Server0)], []),
17071707
rabbit_control_helper:command(start_app, Server1),
1708-
?assertEqual(ok, rpc:call(Server0, rabbit_quorum_queue, add_member,
1708+
?assertEqual(ok, rpc:call(Server1, rabbit_quorum_queue, add_member,
17091709
[<<"/">>, QQ, Server1, 5000])),
17101710
Info = rpc:call(Server0, rabbit_quorum_queue, infos,
17111711
[rabbit_misc:r(<<"/">>, queue, QQ)]),

0 commit comments

Comments
 (0)