Skip to content

Commit 270a1c3

Browse files
dhowellskuba-moo
authored andcommitted
tcp: Support MSG_SPLICE_PAGES
Make TCP's sendmsg() support MSG_SPLICE_PAGES. This causes pages to be spliced or copied (if it cannot be spliced) from the source iterator. This allows ->sendpage() to be replaced by something that can handle multiple multipage folios in a single transaction. Signed-off-by: David Howells <[email protected]> cc: David Ahern <[email protected]> cc: Jens Axboe <[email protected]> cc: Matthew Wilcox <[email protected]> Signed-off-by: Jakub Kicinski <[email protected]>
1 parent 2e910b9 commit 270a1c3

File tree

1 file changed

+36
-7
lines changed

1 file changed

+36
-7
lines changed

net/ipv4/tcp.c

Lines changed: 36 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1223,26 +1223,31 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
12231223
int flags, err, copied = 0;
12241224
int mss_now = 0, size_goal, copied_syn = 0;
12251225
int process_backlog = 0;
1226-
bool zc = false;
1226+
int zc = 0;
12271227
long timeo;
12281228

12291229
flags = msg->msg_flags;
12301230

12311231
if ((flags & MSG_ZEROCOPY) && size) {
12321232
if (msg->msg_ubuf) {
12331233
uarg = msg->msg_ubuf;
1234-
zc = sk->sk_route_caps & NETIF_F_SG;
1234+
if (sk->sk_route_caps & NETIF_F_SG)
1235+
zc = MSG_ZEROCOPY;
12351236
} else if (sock_flag(sk, SOCK_ZEROCOPY)) {
12361237
skb = tcp_write_queue_tail(sk);
12371238
uarg = msg_zerocopy_realloc(sk, size, skb_zcopy(skb));
12381239
if (!uarg) {
12391240
err = -ENOBUFS;
12401241
goto out_err;
12411242
}
1242-
zc = sk->sk_route_caps & NETIF_F_SG;
1243-
if (!zc)
1243+
if (sk->sk_route_caps & NETIF_F_SG)
1244+
zc = MSG_ZEROCOPY;
1245+
else
12441246
uarg_to_msgzc(uarg)->zerocopy = 0;
12451247
}
1248+
} else if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES) && size) {
1249+
if (sk->sk_route_caps & NETIF_F_SG)
1250+
zc = MSG_SPLICE_PAGES;
12461251
}
12471252

12481253
if (unlikely(flags & MSG_FASTOPEN || inet_sk(sk)->defer_connect) &&
@@ -1305,7 +1310,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
13051310
goto do_error;
13061311

13071312
while (msg_data_left(msg)) {
1308-
int copy = 0;
1313+
ssize_t copy = 0;
13091314

13101315
skb = tcp_write_queue_tail(sk);
13111316
if (skb)
@@ -1346,7 +1351,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
13461351
if (copy > msg_data_left(msg))
13471352
copy = msg_data_left(msg);
13481353

1349-
if (!zc) {
1354+
if (zc == 0) {
13501355
bool merge = true;
13511356
int i = skb_shinfo(skb)->nr_frags;
13521357
struct page_frag *pfrag = sk_page_frag(sk);
@@ -1391,7 +1396,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
13911396
page_ref_inc(pfrag->page);
13921397
}
13931398
pfrag->offset += copy;
1394-
} else {
1399+
} else if (zc == MSG_ZEROCOPY) {
13951400
/* First append to a fragless skb builds initial
13961401
* pure zerocopy skb
13971402
*/
@@ -1412,6 +1417,30 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
14121417
if (err < 0)
14131418
goto do_error;
14141419
copy = err;
1420+
} else if (zc == MSG_SPLICE_PAGES) {
1421+
/* Splice in data if we can; copy if we can't. */
1422+
if (tcp_downgrade_zcopy_pure(sk, skb))
1423+
goto wait_for_space;
1424+
copy = tcp_wmem_schedule(sk, copy);
1425+
if (!copy)
1426+
goto wait_for_space;
1427+
1428+
err = skb_splice_from_iter(skb, &msg->msg_iter, copy,
1429+
sk->sk_allocation);
1430+
if (err < 0) {
1431+
if (err == -EMSGSIZE) {
1432+
tcp_mark_push(tp, skb);
1433+
goto new_segment;
1434+
}
1435+
goto do_error;
1436+
}
1437+
copy = err;
1438+
1439+
if (!(flags & MSG_NO_SHARED_FRAGS))
1440+
skb_shinfo(skb)->flags |= SKBFL_SHARED_FRAG;
1441+
1442+
sk_wmem_queued_add(sk, copy);
1443+
sk_mem_charge(sk, copy);
14151444
}
14161445

14171446
if (!copied)

0 commit comments

Comments
 (0)