Skip to content

Commit 5e0724d

Browse files
edumazetdavem330
authored andcommitted
tcp/dccp: fix hashdance race for passive sessions
Multiple cpus can process duplicates of incoming ACK messages matching a SYN_RECV request socket. This is a rare event under normal operations, but definitely can happen. Only one must win the race, otherwise corruption would occur. To fix this without adding new atomic ops, we use logic in inet_ehash_nolisten() to detect the request was present in the same ehash bucket where we try to insert the new child. If request socket was not found, we have to undo the child creation. This actually removes a spin_lock()/spin_unlock() pair in reqsk_queue_unlink() for the fast path. Fixes: e994b2f ("tcp: do not lock listener to process SYN packets") Fixes: 079096f ("tcp/dccp: install syn_recv requests into ehash table") Signed-off-by: Eric Dumazet <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 7b13118 commit 5e0724d

File tree

14 files changed

+102
-52
lines changed

14 files changed

+102
-52
lines changed

include/net/inet_connection_sock.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,9 @@ struct inet_connection_sock_af_ops {
4343
int (*conn_request)(struct sock *sk, struct sk_buff *skb);
4444
struct sock *(*syn_recv_sock)(const struct sock *sk, struct sk_buff *skb,
4545
struct request_sock *req,
46-
struct dst_entry *dst);
46+
struct dst_entry *dst,
47+
struct request_sock *req_unhash,
48+
bool *own_req);
4749
u16 net_header_len;
4850
u16 net_frag_header_len;
4951
u16 sockaddr_len;
@@ -272,6 +274,9 @@ void inet_csk_reqsk_queue_add(struct sock *sk, struct request_sock *req,
272274
struct sock *child);
273275
void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
274276
unsigned long timeout);
277+
struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child,
278+
struct request_sock *req,
279+
bool own_req);
275280

276281
static inline void inet_csk_reqsk_queue_added(struct sock *sk)
277282
{

include/net/inet_hashtables.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -205,8 +205,8 @@ void inet_put_port(struct sock *sk);
205205

206206
void inet_hashinfo_init(struct inet_hashinfo *h);
207207

208-
int inet_ehash_insert(struct sock *sk, struct sock *osk);
209-
void __inet_hash_nolisten(struct sock *sk, struct sock *osk);
208+
bool inet_ehash_insert(struct sock *sk, struct sock *osk);
209+
bool inet_ehash_nolisten(struct sock *sk, struct sock *osk);
210210
void __inet_hash(struct sock *sk, struct sock *osk);
211211
void inet_hash(struct sock *sk);
212212
void inet_unhash(struct sock *sk);

include/net/tcp.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -457,7 +457,9 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
457457
void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst);
458458
struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
459459
struct request_sock *req,
460-
struct dst_entry *dst);
460+
struct dst_entry *dst,
461+
struct request_sock *req_unhash,
462+
bool *own_req);
461463
int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb);
462464
int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
463465
int tcp_connect(struct sock *sk);

net/dccp/dccp.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -278,7 +278,9 @@ int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb);
278278

279279
struct sock *dccp_v4_request_recv_sock(const struct sock *sk, struct sk_buff *skb,
280280
struct request_sock *req,
281-
struct dst_entry *dst);
281+
struct dst_entry *dst,
282+
struct request_sock *req_unhash,
283+
bool *own_req);
282284
struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
283285
struct request_sock *req);
284286

net/dccp/ipv4.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -393,7 +393,9 @@ static inline u64 dccp_v4_init_sequence(const struct sk_buff *skb)
393393
struct sock *dccp_v4_request_recv_sock(const struct sock *sk,
394394
struct sk_buff *skb,
395395
struct request_sock *req,
396-
struct dst_entry *dst)
396+
struct dst_entry *dst,
397+
struct request_sock *req_unhash,
398+
bool *own_req)
397399
{
398400
struct inet_request_sock *ireq;
399401
struct inet_sock *newinet;
@@ -426,7 +428,7 @@ struct sock *dccp_v4_request_recv_sock(const struct sock *sk,
426428

427429
if (__inet_inherit_port(sk, newsk) < 0)
428430
goto put_and_exit;
429-
__inet_hash_nolisten(newsk, NULL);
431+
*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
430432

431433
return newsk;
432434

net/dccp/ipv6.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -380,7 +380,9 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
380380
static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
381381
struct sk_buff *skb,
382382
struct request_sock *req,
383-
struct dst_entry *dst)
383+
struct dst_entry *dst,
384+
struct request_sock *req_unhash,
385+
bool *own_req)
384386
{
385387
struct inet_request_sock *ireq = inet_rsk(req);
386388
struct ipv6_pinfo *newnp;
@@ -393,7 +395,8 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
393395
/*
394396
* v6 mapped
395397
*/
396-
newsk = dccp_v4_request_recv_sock(sk, skb, req, dst);
398+
newsk = dccp_v4_request_recv_sock(sk, skb, req, dst,
399+
req_unhash, own_req);
397400
if (newsk == NULL)
398401
return NULL;
399402

@@ -511,7 +514,7 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
511514
dccp_done(newsk);
512515
goto out;
513516
}
514-
__inet_hash(newsk, NULL);
517+
*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
515518

516519
return newsk;
517520

net/dccp/minisocks.c

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,7 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
143143
{
144144
struct sock *child = NULL;
145145
struct dccp_request_sock *dreq = dccp_rsk(req);
146+
bool own_req;
146147

147148
/* Check for retransmitted REQUEST */
148149
if (dccp_hdr(skb)->dccph_type == DCCP_PKT_REQUEST) {
@@ -182,14 +183,13 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
182183
if (dccp_parse_options(sk, dreq, skb))
183184
goto drop;
184185

185-
child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
186-
if (child == NULL)
186+
child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL,
187+
req, &own_req);
188+
if (!child)
187189
goto listen_overflow;
188190

189-
inet_csk_reqsk_queue_drop(sk, req);
190-
inet_csk_reqsk_queue_add(sk, req, child);
191-
out:
192-
return child;
191+
return inet_csk_complete_hashdance(sk, child, req, own_req);
192+
193193
listen_overflow:
194194
dccp_pr_debug("listen_overflow!\n");
195195
DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY;
@@ -198,7 +198,7 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
198198
req->rsk_ops->send_reset(sk, skb);
199199

200200
inet_csk_reqsk_queue_drop(sk, req);
201-
goto out;
201+
return NULL;
202202
}
203203

204204
EXPORT_SYMBOL_GPL(dccp_check_req);

net/ipv4/inet_connection_sock.c

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -523,15 +523,15 @@ static bool reqsk_queue_unlink(struct request_sock_queue *queue,
523523
struct request_sock *req)
524524
{
525525
struct inet_hashinfo *hashinfo = req_to_sk(req)->sk_prot->h.hashinfo;
526-
spinlock_t *lock;
527-
bool found;
526+
bool found = false;
528527

529-
lock = inet_ehash_lockp(hashinfo, req->rsk_hash);
530-
531-
spin_lock(lock);
532-
found = __sk_nulls_del_node_init_rcu(req_to_sk(req));
533-
spin_unlock(lock);
528+
if (sk_hashed(req_to_sk(req))) {
529+
spinlock_t *lock = inet_ehash_lockp(hashinfo, req->rsk_hash);
534530

531+
spin_lock(lock);
532+
found = __sk_nulls_del_node_init_rcu(req_to_sk(req));
533+
spin_unlock(lock);
534+
}
535535
if (timer_pending(&req->rsk_timer) && del_timer_sync(&req->rsk_timer))
536536
reqsk_put(req);
537537
return found;
@@ -811,6 +811,25 @@ void inet_csk_reqsk_queue_add(struct sock *sk, struct request_sock *req,
811811
}
812812
EXPORT_SYMBOL(inet_csk_reqsk_queue_add);
813813

814+
struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child,
815+
struct request_sock *req, bool own_req)
816+
{
817+
if (own_req) {
818+
inet_csk_reqsk_queue_drop(sk, req);
819+
reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req);
820+
inet_csk_reqsk_queue_add(sk, req, child);
821+
/* Warning: caller must not call reqsk_put(req);
822+
* child stole last reference on it.
823+
*/
824+
return child;
825+
}
826+
/* Too bad, another child took ownership of the request, undo. */
827+
bh_unlock_sock(child);
828+
sock_put(child);
829+
return NULL;
830+
}
831+
EXPORT_SYMBOL(inet_csk_complete_hashdance);
832+
814833
/*
815834
* This routine closes sockets which have been at least partially
816835
* opened, but not yet accepted.

net/ipv4/inet_hashtables.c

Lines changed: 25 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -407,13 +407,13 @@ static u32 inet_sk_port_offset(const struct sock *sk)
407407
/* insert a socket into ehash, and eventually remove another one
408408
* (The another one can be a SYN_RECV or TIMEWAIT
409409
*/
410-
int inet_ehash_insert(struct sock *sk, struct sock *osk)
410+
bool inet_ehash_insert(struct sock *sk, struct sock *osk)
411411
{
412412
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
413413
struct hlist_nulls_head *list;
414414
struct inet_ehash_bucket *head;
415415
spinlock_t *lock;
416-
int ret = 0;
416+
bool ret = true;
417417

418418
WARN_ON_ONCE(!sk_unhashed(sk));
419419

@@ -423,30 +423,41 @@ int inet_ehash_insert(struct sock *sk, struct sock *osk)
423423
lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
424424

425425
spin_lock(lock);
426-
__sk_nulls_add_node_rcu(sk, list);
427426
if (osk) {
428-
WARN_ON(sk->sk_hash != osk->sk_hash);
429-
sk_nulls_del_node_init_rcu(osk);
427+
WARN_ON_ONCE(sk->sk_hash != osk->sk_hash);
428+
ret = sk_nulls_del_node_init_rcu(osk);
430429
}
430+
if (ret)
431+
__sk_nulls_add_node_rcu(sk, list);
431432
spin_unlock(lock);
432433
return ret;
433434
}
434435

435-
void __inet_hash_nolisten(struct sock *sk, struct sock *osk)
436+
bool inet_ehash_nolisten(struct sock *sk, struct sock *osk)
436437
{
437-
inet_ehash_insert(sk, osk);
438-
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
438+
bool ok = inet_ehash_insert(sk, osk);
439+
440+
if (ok) {
441+
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
442+
} else {
443+
percpu_counter_inc(sk->sk_prot->orphan_count);
444+
sk->sk_state = TCP_CLOSE;
445+
sock_set_flag(sk, SOCK_DEAD);
446+
inet_csk_destroy_sock(sk);
447+
}
448+
return ok;
439449
}
440-
EXPORT_SYMBOL_GPL(__inet_hash_nolisten);
450+
EXPORT_SYMBOL_GPL(inet_ehash_nolisten);
441451

442452
void __inet_hash(struct sock *sk, struct sock *osk)
443453
{
444454
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
445455
struct inet_listen_hashbucket *ilb;
446456

447-
if (sk->sk_state != TCP_LISTEN)
448-
return __inet_hash_nolisten(sk, osk);
449-
457+
if (sk->sk_state != TCP_LISTEN) {
458+
inet_ehash_nolisten(sk, osk);
459+
return;
460+
}
450461
WARN_ON(!sk_unhashed(sk));
451462
ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
452463

@@ -567,7 +578,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
567578
inet_bind_hash(sk, tb, port);
568579
if (sk_unhashed(sk)) {
569580
inet_sk(sk)->inet_sport = htons(port);
570-
__inet_hash_nolisten(sk, (struct sock *)tw);
581+
inet_ehash_nolisten(sk, (struct sock *)tw);
571582
}
572583
if (tw)
573584
inet_twsk_bind_unhash(tw, hinfo);
@@ -584,7 +595,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
584595
tb = inet_csk(sk)->icsk_bind_hash;
585596
spin_lock_bh(&head->lock);
586597
if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
587-
__inet_hash_nolisten(sk, NULL);
598+
inet_ehash_nolisten(sk, NULL);
588599
spin_unlock_bh(&head->lock);
589600
return 0;
590601
} else {

net/ipv4/syncookies.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -221,8 +221,10 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb,
221221
{
222222
struct inet_connection_sock *icsk = inet_csk(sk);
223223
struct sock *child;
224+
bool own_req;
224225

225-
child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst);
226+
child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst,
227+
NULL, &own_req);
226228
if (child) {
227229
atomic_set(&req->rsk_refcnt, 1);
228230
sock_rps_save_rxhash(child, skb);

0 commit comments

Comments
 (0)