11// SPDX-License-Identifier: GPL-2.0
22// Copyright (c) 2020 Cloudflare
33#include <error.h>
4- #include <netinet/tcp.h>
4+ #include <linux/tcp.h>
5+ #include <linux/socket.h>
56#include <sys/epoll.h>
67
78#include "test_progs.h"
2223#define TCP_REPAIR_ON 1
2324#define TCP_REPAIR_OFF_NO_WP -1 /* Turn off without window probes */
2425
26+ /**
27+ * SOL_TCP is defined in <netinet/tcp.h> while field
28+ * copybuf_address of tcp_zerocopy_receive is not in it
29+ * Although glibc has merged my patch to sync headers,
30+ * the fix will take time to propagate, hence this workaround.
31+ */
32+ #ifndef SOL_TCP
33+ #define SOL_TCP 6
34+ #endif
35+
2536static int connected_socket_v4 (void )
2637{
2738 struct sockaddr_in addr = {
@@ -536,21 +547,25 @@ static void test_sockmap_skb_verdict_shutdown(void)
536547}
537548
538549
539- static void test_sockmap_skb_verdict_fionread ( bool pass_prog )
550+ static void do_test_sockmap_skb_verdict_fionread ( int sotype , bool pass_prog )
540551{
541552 int err , map , verdict , c0 = -1 , c1 = -1 , p0 = -1 , p1 = -1 ;
542553 int expected , zero = 0 , sent , recvd , avail ;
543554 struct test_sockmap_pass_prog * pass = NULL ;
544555 struct test_sockmap_drop_prog * drop = NULL ;
545556 char buf [256 ] = "0123456789" ;
557+ int split_len = sizeof (buf ) / 2 ;
546558
547559 if (pass_prog ) {
548560 pass = test_sockmap_pass_prog__open_and_load ();
549561 if (!ASSERT_OK_PTR (pass , "open_and_load" ))
550562 return ;
551563 verdict = bpf_program__fd (pass -> progs .prog_skb_verdict );
552564 map = bpf_map__fd (pass -> maps .sock_map_rx );
553- expected = sizeof (buf );
565+ if (sotype == SOCK_DGRAM )
566+ expected = split_len ; /* FIONREAD for UDP is different from TCP */
567+ else
568+ expected = sizeof (buf );
554569 } else {
555570 drop = test_sockmap_drop_prog__open_and_load ();
556571 if (!ASSERT_OK_PTR (drop , "open_and_load" ))
@@ -566,16 +581,17 @@ static void test_sockmap_skb_verdict_fionread(bool pass_prog)
566581 if (!ASSERT_OK (err , "bpf_prog_attach" ))
567582 goto out ;
568583
569- err = create_socket_pairs (AF_INET , SOCK_STREAM , & c0 , & c1 , & p0 , & p1 );
584+ err = create_socket_pairs (AF_INET , sotype , & c0 , & c1 , & p0 , & p1 );
570585 if (!ASSERT_OK (err , "create_socket_pairs()" ))
571586 goto out ;
572587
573588 err = bpf_map_update_elem (map , & zero , & c1 , BPF_NOEXIST );
574589 if (!ASSERT_OK (err , "bpf_map_update_elem(c1)" ))
575590 goto out_close ;
576591
577- sent = xsend (p1 , & buf , sizeof (buf ), 0 );
578- ASSERT_EQ (sent , sizeof (buf ), "xsend(p0)" );
592+ sent = xsend (p1 , & buf , split_len , 0 );
593+ sent += xsend (p1 , & buf , sizeof (buf ) - split_len , 0 );
594+ ASSERT_EQ (sent , sizeof (buf ), "xsend(p1)" );
579595 err = ioctl (c1 , FIONREAD , & avail );
580596 ASSERT_OK (err , "ioctl(FIONREAD) error" );
581597 ASSERT_EQ (avail , expected , "ioctl(FIONREAD)" );
@@ -597,6 +613,12 @@ static void test_sockmap_skb_verdict_fionread(bool pass_prog)
597613 test_sockmap_drop_prog__destroy (drop );
598614}
599615
616+ static void test_sockmap_skb_verdict_fionread (bool pass_prog )
617+ {
618+ do_test_sockmap_skb_verdict_fionread (SOCK_STREAM , pass_prog );
619+ do_test_sockmap_skb_verdict_fionread (SOCK_DGRAM , pass_prog );
620+ }
621+
600622static void test_sockmap_skb_verdict_change_tail (void )
601623{
602624 struct test_sockmap_change_tail * skel ;
@@ -1042,6 +1064,160 @@ static void test_sockmap_vsock_unconnected(void)
10421064 xclose (map );
10431065}
10441066
1067+ /* it used to reproduce WARNING */
1068+ static void test_sockmap_zc (void )
1069+ {
1070+ int map , err , sent , recvd , zero = 0 , one = 1 , on = 1 ;
1071+ char buf [10 ] = "0123456789" , rcv [11 ], addr [100 ];
1072+ struct test_sockmap_pass_prog * skel = NULL ;
1073+ int c0 = -1 , p0 = -1 , c1 = -1 , p1 = -1 ;
1074+ struct tcp_zerocopy_receive zc ;
1075+ socklen_t zc_len = sizeof (zc );
1076+ struct bpf_program * prog ;
1077+
1078+ skel = test_sockmap_pass_prog__open_and_load ();
1079+ if (!ASSERT_OK_PTR (skel , "open_and_load" ))
1080+ return ;
1081+
1082+ if (create_socket_pairs (AF_INET , SOCK_STREAM , & c0 , & c1 , & p0 , & p1 ))
1083+ goto end ;
1084+
1085+ prog = skel -> progs .prog_skb_verdict_ingress ;
1086+ map = bpf_map__fd (skel -> maps .sock_map_rx );
1087+
1088+ err = bpf_prog_attach (bpf_program__fd (prog ), map , BPF_SK_SKB_STREAM_VERDICT , 0 );
1089+ if (!ASSERT_OK (err , "bpf_prog_attach" ))
1090+ goto end ;
1091+
1092+ err = bpf_map_update_elem (map , & zero , & p0 , BPF_ANY );
1093+ if (!ASSERT_OK (err , "bpf_map_update_elem" ))
1094+ goto end ;
1095+
1096+ err = bpf_map_update_elem (map , & one , & p1 , BPF_ANY );
1097+ if (!ASSERT_OK (err , "bpf_map_update_elem" ))
1098+ goto end ;
1099+
1100+ sent = xsend (c0 , buf , sizeof (buf ), 0 );
1101+ if (!ASSERT_EQ (sent , sizeof (buf ), "xsend" ))
1102+ goto end ;
1103+
1104+ /* trigger tcp_bpf_recvmsg_parser and inc copied_seq of p1 */
1105+ recvd = recv_timeout (p1 , rcv , sizeof (rcv ), MSG_DONTWAIT , 1 );
1106+ if (!ASSERT_EQ (recvd , sent , "recv_timeout(p1)" ))
1107+ goto end ;
1108+
1109+ /* uninstall sockmap of p1 */
1110+ bpf_map_delete_elem (map , & one );
1111+
1112+ /* trigger tcp stack and the rcv_nxt of p1 is less than copied_seq */
1113+ sent = xsend (c1 , buf , sizeof (buf ) - 1 , 0 );
1114+ if (!ASSERT_EQ (sent , sizeof (buf ) - 1 , "xsend" ))
1115+ goto end ;
1116+
1117+ err = setsockopt (p1 , SOL_SOCKET , SO_ZEROCOPY , & on , sizeof (on ));
1118+ if (!ASSERT_OK (err , "setsockopt" ))
1119+ goto end ;
1120+
1121+ memset (& zc , 0 , sizeof (zc ));
1122+ zc .copybuf_address = (__u64 )((unsigned long )addr );
1123+ zc .copybuf_len = sizeof (addr );
1124+
1125+ err = getsockopt (p1 , IPPROTO_TCP , TCP_ZEROCOPY_RECEIVE , & zc , & zc_len );
1126+ if (!ASSERT_OK (err , "getsockopt" ))
1127+ goto end ;
1128+
1129+ end :
1130+ if (c0 >= 0 )
1131+ close (c0 );
1132+ if (p0 >= 0 )
1133+ close (p0 );
1134+ if (c1 >= 0 )
1135+ close (c1 );
1136+ if (p1 >= 0 )
1137+ close (p1 );
1138+ test_sockmap_pass_prog__destroy (skel );
1139+ }
1140+
1141+ /* it used to check whether copied_seq of sk is correct */
1142+ static void test_sockmap_copied_seq (void )
1143+ {
1144+ int map , err , sent , recvd , zero = 0 , one = 1 ;
1145+ struct test_sockmap_pass_prog * skel = NULL ;
1146+ int c0 = -1 , p0 = -1 , c1 = -1 , p1 = -1 ;
1147+ char buf [10 ] = "0123456789" , rcv [11 ];
1148+ struct bpf_program * prog ;
1149+
1150+ skel = test_sockmap_pass_prog__open_and_load ();
1151+ if (!ASSERT_OK_PTR (skel , "open_and_load" ))
1152+ return ;
1153+
1154+ if (create_socket_pairs (AF_INET , SOCK_STREAM , & c0 , & c1 , & p0 , & p1 ))
1155+ goto end ;
1156+
1157+ prog = skel -> progs .prog_skb_verdict_ingress ;
1158+ map = bpf_map__fd (skel -> maps .sock_map_rx );
1159+
1160+ err = bpf_prog_attach (bpf_program__fd (prog ), map , BPF_SK_SKB_STREAM_VERDICT , 0 );
1161+ if (!ASSERT_OK (err , "bpf_prog_attach" ))
1162+ goto end ;
1163+
1164+ err = bpf_map_update_elem (map , & zero , & p0 , BPF_ANY );
1165+ if (!ASSERT_OK (err , "bpf_map_update_elem(p0)" ))
1166+ goto end ;
1167+
1168+ err = bpf_map_update_elem (map , & one , & p1 , BPF_ANY );
1169+ if (!ASSERT_OK (err , "bpf_map_update_elem(p1)" ))
1170+ goto end ;
1171+
1172+ /* just trigger sockamp: data sent by c0 will be received by p1 */
1173+ sent = xsend (c0 , buf , sizeof (buf ), 0 );
1174+ if (!ASSERT_EQ (sent , sizeof (buf ), "xsend(c0), bpf" ))
1175+ goto end ;
1176+
1177+ recvd = recv_timeout (p1 , rcv , sizeof (rcv ), MSG_DONTWAIT , 1 );
1178+ if (!ASSERT_EQ (recvd , sent , "recv_timeout(p1), bpf" ))
1179+ goto end ;
1180+
1181+ /* uninstall sockmap of p1 and p0 */
1182+ err = bpf_map_delete_elem (map , & one );
1183+ if (!ASSERT_OK (err , "bpf_map_delete_elem(1)" ))
1184+ goto end ;
1185+ err = bpf_map_delete_elem (map , & zero );
1186+ if (!ASSERT_OK (err , "bpf_map_delete_elem(0)" ))
1187+ goto end ;
1188+
1189+ /* now all sockets become plain socket, they should work */
1190+
1191+ /* test copied_seq of p1 by running tcp native stack */
1192+ sent = xsend (c1 , buf , sizeof (buf ), 0 );
1193+ if (!ASSERT_EQ (sent , sizeof (buf ), "xsend(c1), native" ))
1194+ goto end ;
1195+
1196+ recvd = recv (p1 , rcv , sizeof (rcv ), MSG_DONTWAIT );
1197+ if (!ASSERT_EQ (recvd , sent , "recv_timeout(p1), native" ))
1198+ goto end ;
1199+
1200+ /* p0 previously redirected skb to p1, we also check copied_seq of p0 */
1201+ sent = xsend (c0 , buf , sizeof (buf ), 0 );
1202+ if (!ASSERT_EQ (sent , sizeof (buf ), "xsend(c0), native" ))
1203+ goto end ;
1204+
1205+ recvd = recv (p0 , rcv , sizeof (rcv ), MSG_DONTWAIT );
1206+ if (!ASSERT_EQ (recvd , sent , "recv_timeout(p0), native" ))
1207+ goto end ;
1208+
1209+ end :
1210+ if (c0 >= 0 )
1211+ close (c0 );
1212+ if (p0 >= 0 )
1213+ close (p0 );
1214+ if (c1 >= 0 )
1215+ close (c1 );
1216+ if (p1 >= 0 )
1217+ close (p1 );
1218+ test_sockmap_pass_prog__destroy (skel );
1219+ }
1220+
10451221void test_sockmap_basic (void )
10461222{
10471223 if (test__start_subtest ("sockmap create_update_free" ))
@@ -1108,4 +1284,8 @@ void test_sockmap_basic(void)
11081284 test_sockmap_skb_verdict_vsock_poll ();
11091285 if (test__start_subtest ("sockmap vsock unconnected" ))
11101286 test_sockmap_vsock_unconnected ();
1287+ if (test__start_subtest ("sockmap with zc" ))
1288+ test_sockmap_zc ();
1289+ if (test__start_subtest ("sockmap recover" ))
1290+ test_sockmap_copied_seq ();
11111291}
0 commit comments