Skip to content

Commit b29e0de

Browse files
committed
Merge branch 'udp-false-sharing'
Paolo Abeni says: ==================== udp: avoid false sharing on receive Under high UDP load, the BH processing and the user-space receiver can run on different cores. The UDP implementation does a lot of effort to avoid false sharing in the receive path, but recent changes to the struct sock layout moved the sk_forward_alloc and the sk_rcvbuf fields on the same cacheline: /* --- cacheline 4 boundary (256 bytes) --- */ struct sk_buff * tail; } sk_backlog; int sk_forward_alloc; unsigned int sk_reserved_mem; unsigned int sk_ll_usec; unsigned int sk_napi_id; int sk_rcvbuf; sk_forward_alloc is updated by the BH, while sk_rcvbuf is accessed by udp_recvmsg(), causing false sharing. A possible solution would be to re-order the struct sock fields to avoid the false sharing. Such change is subject to being invalidated by future changes and could have negative side effects on other workload. Instead this series uses a different approach, touching only the UDP socket layout. The first patch generalizes the custom setsockopt infrastructure, to allow UDP tracking the buffer size, and the second patch addresses the issue, copying the relevant buffer information into an already hot cacheline. Overall the above gives a 10% peek throughput increase under UDP flood. v1 -> v2: - introduce and use a common helper to initialize the UDP v4/v6 sockets (Kuniyuki) ==================== Signed-off-by: David S. Miller <[email protected]>
2 parents ea5ed0f + 8a3854c commit b29e0de

File tree

7 files changed

+35
-12
lines changed

7 files changed

+35
-12
lines changed

include/linux/net.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ struct net;
4141
#define SOCK_NOSPACE 2
4242
#define SOCK_PASSCRED 3
4343
#define SOCK_PASSSEC 4
44+
#define SOCK_CUSTOM_SOCKOPT 5
4445

4546
#ifndef ARCH_HAS_SOCKET_TYPES
4647
/**

include/linux/udp.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,9 @@ struct udp_sock {
8787

8888
/* This field is dirtied by udp_recvmsg() */
8989
int forward_deficit;
90+
91+
/* This fields follows rcvbuf value, and is touched by udp_recvmsg */
92+
int forward_threshold;
9093
};
9194

9295
#define UDP_MAX_SEGMENTS (1 << 6UL)

include/net/udp.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,15 @@ INDIRECT_CALLABLE_DECLARE(int udpv6_rcv(struct sk_buff *));
174174
struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
175175
netdev_features_t features, bool is_ipv6);
176176

177+
static inline void udp_lib_init_sock(struct sock *sk)
178+
{
179+
struct udp_sock *up = udp_sk(sk);
180+
181+
skb_queue_head_init(&up->reader_queue);
182+
up->forward_threshold = sk->sk_rcvbuf >> 2;
183+
set_bit(SOCK_CUSTOM_SOCKOPT, &sk->sk_socket->flags);
184+
}
185+
177186
/* hash routines shared between UDPv4/6 and UDP-Litev4/6 */
178187
static inline int udp_lib_hash(struct sock *sk)
179188
{

net/ipv4/udp.c

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1448,7 +1448,7 @@ static void udp_rmem_release(struct sock *sk, int size, int partial,
14481448
if (likely(partial)) {
14491449
up->forward_deficit += size;
14501450
size = up->forward_deficit;
1451-
if (size < (sk->sk_rcvbuf >> 2) &&
1451+
if (size < READ_ONCE(up->forward_threshold) &&
14521452
!skb_queue_empty(&up->reader_queue))
14531453
return;
14541454
} else {
@@ -1622,7 +1622,7 @@ static void udp_destruct_sock(struct sock *sk)
16221622

16231623
int udp_init_sock(struct sock *sk)
16241624
{
1625-
skb_queue_head_init(&udp_sk(sk)->reader_queue);
1625+
udp_lib_init_sock(sk);
16261626
sk->sk_destruct = udp_destruct_sock;
16271627
return 0;
16281628
}
@@ -2671,6 +2671,18 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
26712671
int err = 0;
26722672
int is_udplite = IS_UDPLITE(sk);
26732673

2674+
if (level == SOL_SOCKET) {
2675+
err = sk_setsockopt(sk, level, optname, optval, optlen);
2676+
2677+
if (optname == SO_RCVBUF || optname == SO_RCVBUFFORCE) {
2678+
sockopt_lock_sock(sk);
2679+
/* paired with READ_ONCE in udp_rmem_release() */
2680+
WRITE_ONCE(up->forward_threshold, sk->sk_rcvbuf >> 2);
2681+
sockopt_release_sock(sk);
2682+
}
2683+
return err;
2684+
}
2685+
26742686
if (optlen < sizeof(int))
26752687
return -EINVAL;
26762688

@@ -2784,7 +2796,7 @@ EXPORT_SYMBOL(udp_lib_setsockopt);
27842796
int udp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
27852797
unsigned int optlen)
27862798
{
2787-
if (level == SOL_UDP || level == SOL_UDPLITE)
2799+
if (level == SOL_UDP || level == SOL_UDPLITE || level == SOL_SOCKET)
27882800
return udp_lib_setsockopt(sk, level, optname,
27892801
optval, optlen,
27902802
udp_push_pending_frames);

net/ipv6/udp.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ static void udpv6_destruct_sock(struct sock *sk)
6464

6565
int udpv6_init_sock(struct sock *sk)
6666
{
67-
skb_queue_head_init(&udp_sk(sk)->reader_queue);
67+
udp_lib_init_sock(sk);
6868
sk->sk_destruct = udpv6_destruct_sock;
6969
return 0;
7070
}
@@ -1669,7 +1669,7 @@ void udpv6_destroy_sock(struct sock *sk)
16691669
int udpv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
16701670
unsigned int optlen)
16711671
{
1672-
if (level == SOL_UDP || level == SOL_UDPLITE)
1672+
if (level == SOL_UDP || level == SOL_UDPLITE || level == SOL_SOCKET)
16731673
return udp_lib_setsockopt(sk, level, optname,
16741674
optval, optlen,
16751675
udp_v6_push_pending_frames);

net/mptcp/protocol.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2708,6 +2708,8 @@ static int mptcp_init_sock(struct sock *sk)
27082708
if (ret)
27092709
return ret;
27102710

2711+
set_bit(SOCK_CUSTOM_SOCKOPT, &sk->sk_socket->flags);
2712+
27112713
/* fetch the ca name; do it outside __mptcp_init_sock(), so that clone will
27122714
* propagate the correct value
27132715
*/
@@ -3684,6 +3686,8 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
36843686
struct mptcp_subflow_context *subflow;
36853687
struct sock *newsk = newsock->sk;
36863688

3689+
set_bit(SOCK_CUSTOM_SOCKOPT, &newsock->flags);
3690+
36873691
lock_sock(newsk);
36883692

36893693
/* PM/worker can now acquire the first subflow socket

net/socket.c

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2199,13 +2199,7 @@ SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
21992199

22002200
static bool sock_use_custom_sol_socket(const struct socket *sock)
22012201
{
2202-
const struct sock *sk = sock->sk;
2203-
2204-
/* Use sock->ops->setsockopt() for MPTCP */
2205-
return IS_ENABLED(CONFIG_MPTCP) &&
2206-
sk->sk_protocol == IPPROTO_MPTCP &&
2207-
sk->sk_type == SOCK_STREAM &&
2208-
(sk->sk_family == AF_INET || sk->sk_family == AF_INET6);
2202+
return test_bit(SOCK_CUSTOM_SOCKOPT, &sock->flags);
22092203
}
22102204

22112205
/*

0 commit comments

Comments
 (0)