Skip to content

Commit 90361ff

Browse files
mrpreKernel Patches Daemon
authored andcommitted
bpf, sockmap: Fix incorrect copied_seq calculation
A socket using sockmap has its own independent receive queue: ingress_msg. This queue may contain data from its own protocol stack or from other sockets. The issue is that when reading from ingress_msg, we update tp->copied_seq by default. However, if the data is not from its own protocol stack, tcp->rcv_nxt is not increased. Later, if we convert this socket to a native socket, reading from this socket may fail because copied_seq might be significantly larger than rcv_nxt. This fix also addresses the syzkaller-reported bug referenced in the Closes tag. This patch marks the skmsg objects in ingress_msg. When reading, we update copied_seq only if the data is from its own protocol stack. FD1:read() -- FD1->copied_seq++ | [read data] | [enqueue data] v [sockmap] -> ingress to self -> ingress_msg queue FD1 native stack ------> ^ -- FD1->rcv_nxt++ -> redirect to other | [enqueue data] | | | ingress to FD1 v ^ ... | [sockmap] FD2 native stack Closes: https://syzkaller.appspot.com/bug?extid=06dbd397158ec0ea4983 Fixes: 04919be ("tcp: Introduce tcp_read_skb()") Signed-off-by: Jiayuan Chen <[email protected]>
1 parent 1efb39d commit 90361ff

File tree

3 files changed

+43
-6
lines changed

3 files changed

+43
-6
lines changed

include/linux/skmsg.h

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,17 @@ enum __sk_action {
2323
__SK_NONE,
2424
};
2525

26+
/* The BPF program sets BPF_F_INGRESS on sk_msg to indicate data needs to be
27+
* redirected to the ingress queue of a specified socket. Since BPF_F_INGRESS is
28+
* defined in UAPI so that we can't extend this enum for our internal flags. We
29+
* define some internal flags here while inheriting BPF_F_INGRESS.
30+
*/
31+
enum {
32+
SK_MSG_F_INGRESS = BPF_F_INGRESS, /* (1ULL << 0) */
33+
/* internal flag */
34+
SK_MSG_F_INGRESS_SELF = (1ULL << 1)
35+
};
36+
2637
struct sk_msg_sg {
2738
u32 start;
2839
u32 curr;
@@ -141,8 +152,20 @@ int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from,
141152
struct sk_msg *msg, u32 bytes);
142153
int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
143154
int len, int flags);
155+
int __sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
156+
int len, int flags, int *from_self_copied);
144157
bool sk_msg_is_readable(struct sock *sk);
145158

159+
static inline bool sk_msg_is_to_self(struct sk_msg *msg)
160+
{
161+
return msg->flags & SK_MSG_F_INGRESS_SELF;
162+
}
163+
164+
static inline void sk_msg_set_to_self(struct sk_msg *msg)
165+
{
166+
msg->flags |= SK_MSG_F_INGRESS_SELF;
167+
}
168+
146169
static inline void sk_msg_check_to_free(struct sk_msg *msg, u32 i, u32 bytes)
147170
{
148171
WARN_ON(i == msg->sg.end && bytes);
@@ -235,7 +258,7 @@ static inline struct page *sk_msg_page(struct sk_msg *msg, int which)
235258

236259
static inline bool sk_msg_to_ingress(const struct sk_msg *msg)
237260
{
238-
return msg->flags & BPF_F_INGRESS;
261+
return msg->flags & SK_MSG_F_INGRESS;
239262
}
240263

241264
static inline void sk_msg_compute_data_pointers(struct sk_msg *msg)

net/core/skmsg.c

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -409,14 +409,14 @@ int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from,
409409
}
410410
EXPORT_SYMBOL_GPL(sk_msg_memcopy_from_iter);
411411

412-
/* Receive sk_msg from psock->ingress_msg to @msg. */
413-
int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
414-
int len, int flags)
412+
int __sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
413+
int len, int flags, int *from_self_copied)
415414
{
416415
struct iov_iter *iter = &msg->msg_iter;
417416
int peek = flags & MSG_PEEK;
418417
struct sk_msg *msg_rx;
419418
int i, copied = 0;
419+
bool to_self;
420420

421421
msg_rx = sk_psock_peek_msg(psock);
422422
while (copied != len) {
@@ -425,6 +425,7 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
425425
if (unlikely(!msg_rx))
426426
break;
427427

428+
to_self = sk_msg_is_to_self(msg_rx);
428429
i = msg_rx->sg.start;
429430
do {
430431
struct page *page;
@@ -443,6 +444,9 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
443444
}
444445

445446
copied += copy;
447+
if (to_self && from_self_copied)
448+
*from_self_copied += copy;
449+
446450
if (likely(!peek)) {
447451
sge->offset += copy;
448452
sge->length -= copy;
@@ -487,6 +491,14 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
487491
out:
488492
return copied;
489493
}
494+
EXPORT_SYMBOL_GPL(__sk_msg_recvmsg);
495+
496+
/* Receive sk_msg from psock->ingress_msg to @msg. */
497+
int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
498+
int len, int flags)
499+
{
500+
return __sk_msg_recvmsg(sk, psock, msg, len, flags, NULL);
501+
}
490502
EXPORT_SYMBOL_GPL(sk_msg_recvmsg);
491503

492504
bool sk_msg_is_readable(struct sock *sk)
@@ -616,6 +628,7 @@ static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb
616628
if (unlikely(!msg))
617629
return -EAGAIN;
618630
skb_set_owner_r(skb, sk);
631+
sk_msg_set_to_self(msg);
619632
err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg, take_ref);
620633
if (err < 0)
621634
kfree(msg);

net/ipv4/tcp_bpf.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,7 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk,
226226
int peek = flags & MSG_PEEK;
227227
struct sk_psock *psock;
228228
struct tcp_sock *tcp;
229+
int from_self_copied = 0;
229230
int copied = 0;
230231
u32 seq;
231232

@@ -262,7 +263,7 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk,
262263
}
263264

264265
msg_bytes_ready:
265-
copied = sk_msg_recvmsg(sk, psock, msg, len, flags);
266+
copied = __sk_msg_recvmsg(sk, psock, msg, len, flags, &from_self_copied);
266267
/* The typical case for EFAULT is the socket was gracefully
267268
* shutdown with a FIN pkt. So check here the other case is
268269
* some error on copy_page_to_iter which would be unexpected.
@@ -277,7 +278,7 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk,
277278
goto out;
278279
}
279280
}
280-
seq += copied;
281+
seq += from_self_copied;
281282
if (!copied) {
282283
long timeo;
283284
int data;

0 commit comments

Comments
 (0)