Skip to content

Commit 7f3ead8

Browse files
pcacjrgregkh
authored andcommitted
smb: client: fix potential deadlock when reconnecting channels
[ Upstream commit 711741f ] Fix cifs_signal_cifsd_for_reconnect() to take the correct lock order and prevent the following deadlock from happening ====================================================== WARNING: possible circular locking dependency detected 6.16.0-rc3-build2+ #1301 Tainted: G S W ------------------------------------------------------ cifsd/6055 is trying to acquire lock: ffff88810ad56038 (&tcp_ses->srv_lock){+.+.}-{3:3}, at: cifs_signal_cifsd_for_reconnect+0x134/0x200 but task is already holding lock: ffff888119c64330 (&ret_buf->chan_lock){+.+.}-{3:3}, at: cifs_signal_cifsd_for_reconnect+0xcf/0x200 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (&ret_buf->chan_lock){+.+.}-{3:3}: validate_chain+0x1cf/0x270 __lock_acquire+0x60e/0x780 lock_acquire.part.0+0xb4/0x1f0 _raw_spin_lock+0x2f/0x40 cifs_setup_session+0x81/0x4b0 cifs_get_smb_ses+0x771/0x900 cifs_mount_get_session+0x7e/0x170 cifs_mount+0x92/0x2d0 cifs_smb3_do_mount+0x161/0x460 smb3_get_tree+0x55/0x90 vfs_get_tree+0x46/0x180 do_new_mount+0x1b0/0x2e0 path_mount+0x6ee/0x740 do_mount+0x98/0xe0 __do_sys_mount+0x148/0x180 do_syscall_64+0xa4/0x260 entry_SYSCALL_64_after_hwframe+0x76/0x7e -> #1 (&ret_buf->ses_lock){+.+.}-{3:3}: validate_chain+0x1cf/0x270 __lock_acquire+0x60e/0x780 lock_acquire.part.0+0xb4/0x1f0 _raw_spin_lock+0x2f/0x40 cifs_match_super+0x101/0x320 sget+0xab/0x270 cifs_smb3_do_mount+0x1e0/0x460 smb3_get_tree+0x55/0x90 vfs_get_tree+0x46/0x180 do_new_mount+0x1b0/0x2e0 path_mount+0x6ee/0x740 do_mount+0x98/0xe0 __do_sys_mount+0x148/0x180 do_syscall_64+0xa4/0x260 entry_SYSCALL_64_after_hwframe+0x76/0x7e -> #0 (&tcp_ses->srv_lock){+.+.}-{3:3}: check_noncircular+0x95/0xc0 check_prev_add+0x115/0x2f0 validate_chain+0x1cf/0x270 __lock_acquire+0x60e/0x780 lock_acquire.part.0+0xb4/0x1f0 _raw_spin_lock+0x2f/0x40 cifs_signal_cifsd_for_reconnect+0x134/0x200 __cifs_reconnect+0x8f/0x500 cifs_handle_standard+0x112/0x280 cifs_demultiplex_thread+0x64d/0xbc0 kthread+0x2f7/0x310 ret_from_fork+0x2a/0x230 ret_from_fork_asm+0x1a/0x30 other info that might help us debug this: Chain exists of: &tcp_ses->srv_lock --> &ret_buf->ses_lock --> &ret_buf->chan_lock Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&ret_buf->chan_lock); lock(&ret_buf->ses_lock); lock(&ret_buf->chan_lock); lock(&tcp_ses->srv_lock); *** DEADLOCK *** 3 locks held by cifsd/6055: #0: ffffffff857de398 (&cifs_tcp_ses_lock){+.+.}-{3:3}, at: cifs_signal_cifsd_for_reconnect+0x7b/0x200 #1: ffff888119c64060 (&ret_buf->ses_lock){+.+.}-{3:3}, at: cifs_signal_cifsd_for_reconnect+0x9c/0x200 #2: ffff888119c64330 (&ret_buf->chan_lock){+.+.}-{3:3}, at: cifs_signal_cifsd_for_reconnect+0xcf/0x200 Cc: [email protected] Reported-by: David Howells <[email protected]> Fixes: d7d7a66 ("cifs: avoid use of global locks for high contention data") Reviewed-by: David Howells <[email protected]> Tested-by: David Howells <[email protected]> Signed-off-by: Paulo Alcantara (Red Hat) <[email protected]> Signed-off-by: David Howells <[email protected]> Signed-off-by: Steve French <[email protected]> Signed-off-by: Sasha Levin <[email protected]>
1 parent 1b12f8d commit 7f3ead8

File tree

2 files changed

+37
-22
lines changed

2 files changed

+37
-22
lines changed

fs/smb/client/cifsglob.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -677,6 +677,7 @@ inc_rfc1001_len(void *buf, int count)
677677
struct TCP_Server_Info {
678678
struct list_head tcp_ses_list;
679679
struct list_head smb_ses_list;
680+
struct list_head rlist; /* reconnect list */
680681
spinlock_t srv_lock; /* protect anything here that is not protected */
681682
__u64 conn_id; /* connection identifier (useful for debugging) */
682683
int srv_count; /* reference counter */

fs/smb/client/connect.c

Lines changed: 36 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,14 @@ static void smb2_query_server_interfaces(struct work_struct *work)
140140
(SMB_INTERFACE_POLL_INTERVAL * HZ));
141141
}
142142

143+
#define set_need_reco(server) \
144+
do { \
145+
spin_lock(&server->srv_lock); \
146+
if (server->tcpStatus != CifsExiting) \
147+
server->tcpStatus = CifsNeedReconnect; \
148+
spin_unlock(&server->srv_lock); \
149+
} while (0)
150+
143151
/*
144152
* Update the tcpStatus for the server.
145153
* This is used to signal the cifsd thread to call cifs_reconnect
@@ -153,39 +161,45 @@ void
153161
cifs_signal_cifsd_for_reconnect(struct TCP_Server_Info *server,
154162
bool all_channels)
155163
{
156-
struct TCP_Server_Info *pserver;
164+
struct TCP_Server_Info *nserver;
157165
struct cifs_ses *ses;
166+
LIST_HEAD(reco);
158167
int i;
159168

160-
/* If server is a channel, select the primary channel */
161-
pserver = SERVER_IS_CHAN(server) ? server->primary_server : server;
162-
163169
/* if we need to signal just this channel */
164170
if (!all_channels) {
165-
spin_lock(&server->srv_lock);
166-
if (server->tcpStatus != CifsExiting)
167-
server->tcpStatus = CifsNeedReconnect;
168-
spin_unlock(&server->srv_lock);
171+
set_need_reco(server);
169172
return;
170173
}
171174

172-
spin_lock(&cifs_tcp_ses_lock);
173-
list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
174-
if (cifs_ses_exiting(ses))
175-
continue;
176-
spin_lock(&ses->chan_lock);
177-
for (i = 0; i < ses->chan_count; i++) {
178-
if (!ses->chans[i].server)
175+
if (SERVER_IS_CHAN(server))
176+
server = server->primary_server;
177+
scoped_guard(spinlock, &cifs_tcp_ses_lock) {
178+
set_need_reco(server);
179+
list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
180+
spin_lock(&ses->ses_lock);
181+
if (ses->ses_status == SES_EXITING) {
182+
spin_unlock(&ses->ses_lock);
179183
continue;
180-
181-
spin_lock(&ses->chans[i].server->srv_lock);
182-
if (ses->chans[i].server->tcpStatus != CifsExiting)
183-
ses->chans[i].server->tcpStatus = CifsNeedReconnect;
184-
spin_unlock(&ses->chans[i].server->srv_lock);
184+
}
185+
spin_lock(&ses->chan_lock);
186+
for (i = 1; i < ses->chan_count; i++) {
187+
nserver = ses->chans[i].server;
188+
if (!nserver)
189+
continue;
190+
nserver->srv_count++;
191+
list_add(&nserver->rlist, &reco);
192+
}
193+
spin_unlock(&ses->chan_lock);
194+
spin_unlock(&ses->ses_lock);
185195
}
186-
spin_unlock(&ses->chan_lock);
187196
}
188-
spin_unlock(&cifs_tcp_ses_lock);
197+
198+
list_for_each_entry_safe(server, nserver, &reco, rlist) {
199+
list_del_init(&server->rlist);
200+
set_need_reco(server);
201+
cifs_put_tcp_session(server, 0);
202+
}
189203
}
190204

191205
/*

0 commit comments

Comments
 (0)