Skip to content

Commit 6942241

Browse files
committed
Merge branch 'net-timestamp'
Willem de Bruijn says: ==================== net-timestamp: blinding Changes (v2 -> v3) - rebase only: v2 did not make it to patchwork / netdev (v1 -> v2) - fix capability check in patch 2 this could be moved into net/core/sock.c as sk_capable_nouser() (rfc -> v1) - dropped patch 4: timestamp batching due to complexity, as discussed - dropped patch 5: default mode because it does not really cover all use cases, as discussed - added documentation - minor fix, see patch 2 Two issues were raised during recent timestamping discussions: 1. looping full packets on the error queue exposes packet headers 2. TCP timestamping with retransmissions generates many timestamps This RFC patchset is an attempt at addressing both without breaking legacy behavior. Patch 1 reintroduces the "no payload" timestamp option, which loops timestamps onto an empty skb. This reduces the pressure on SO_RCVBUF from looping many timestamps. It does not reduce the number of recv() calls needed to process them. The timestamp cookie mechanism developed in http://patchwork.ozlabs.org/patch/427213/ did, but this is considerably simpler. Patch 2 then gives administrators the power to block all timestamp requests that contain data by unprivileged users. I proposed this earlier as a backward compatible workaround in the discussion of net-timestamp: pull headers for SOCK_STREAM http://patchwork.ozlabs.org/patch/414810/ Patch 3 only updates the txtimestamp example to test this option. Verified that with option '-n', length is zero in all cases and option '-I' (PKTINFO) stops working. ==================== Acked-by: Richard Cochran <[email protected]> Signed-off-by: David S. Miller <[email protected]>
2 parents 9766e97 + 2368592 commit 6942241

File tree

11 files changed

+113
-17
lines changed

11 files changed

+113
-17
lines changed

Documentation/networking/timestamping.txt

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,27 @@ SOF_TIMESTAMPING_OPT_CMSG:
162162
option IP_PKTINFO simultaneously.
163163

164164

165+
SOF_TIMESTAMPING_OPT_TSONLY:
166+
167+
Applies to transmit timestamps only. Makes the kernel return the
168+
timestamp as a cmsg alongside an empty packet, as opposed to
169+
alongside the original packet. This reduces the amount of memory
170+
charged to the socket's receive budget (SO_RCVBUF) and delivers
171+
the timestamp even if sysctl net.core.tstamp_allow_data is 0.
172+
This option disables SOF_TIMESTAMPING_OPT_CMSG.
173+
174+
175+
New applications are encouraged to pass SOF_TIMESTAMPING_OPT_ID to
176+
disambiguate timestamps and SOF_TIMESTAMPING_OPT_TSONLY to operate
177+
regardless of the setting of sysctl net.core.tstamp_allow_data.
178+
179+
An exception is when a process needs additional cmsg data, for
180+
instance SOL_IP/IP_PKTINFO to detect the egress network interface.
181+
Then pass option SOF_TIMESTAMPING_OPT_CMSG. This option depends on
182+
having access to the contents of the original packet, so cannot be
183+
combined with SOF_TIMESTAMPING_OPT_TSONLY.
184+
185+
165186
1.4 Bytestream Timestamps
166187

167188
The SO_TIMESTAMPING interface supports timestamping of bytes in a

Documentation/networking/timestamping/txtimestamp.c

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ static int do_ipv6 = 1;
7070
static int cfg_payload_len = 10;
7171
static bool cfg_show_payload;
7272
static bool cfg_do_pktinfo;
73+
static bool cfg_loop_nodata;
7374
static uint16_t dest_port = 9000;
7475

7576
static struct sockaddr_in daddr;
@@ -141,6 +142,9 @@ static void print_payload(char *data, int len)
141142
{
142143
int i;
143144

145+
if (!len)
146+
return;
147+
144148
if (len > 70)
145149
len = 70;
146150

@@ -177,6 +181,7 @@ static void __recv_errmsg_cmsg(struct msghdr *msg, int payload_len)
177181
struct sock_extended_err *serr = NULL;
178182
struct scm_timestamping *tss = NULL;
179183
struct cmsghdr *cm;
184+
int batch = 0;
180185

181186
for (cm = CMSG_FIRSTHDR(msg);
182187
cm && cm->cmsg_len;
@@ -209,10 +214,18 @@ static void __recv_errmsg_cmsg(struct msghdr *msg, int payload_len)
209214
} else
210215
fprintf(stderr, "unknown cmsg %d,%d\n",
211216
cm->cmsg_level, cm->cmsg_type);
217+
218+
if (serr && tss) {
219+
print_timestamp(tss, serr->ee_info, serr->ee_data,
220+
payload_len);
221+
serr = NULL;
222+
tss = NULL;
223+
batch++;
224+
}
212225
}
213226

214-
if (serr && tss)
215-
print_timestamp(tss, serr->ee_info, serr->ee_data, payload_len);
227+
if (batch > 1)
228+
fprintf(stderr, "batched %d timestamps\n", batch);
216229
}
217230

218231
static int recv_errmsg(int fd)
@@ -244,7 +257,7 @@ static int recv_errmsg(int fd)
244257
if (ret == -1 && errno != EAGAIN)
245258
error(1, errno, "recvmsg");
246259

247-
if (ret > 0) {
260+
if (ret >= 0) {
248261
__recv_errmsg_cmsg(&msg, ret);
249262
if (cfg_show_payload)
250263
print_payload(data, cfg_payload_len);
@@ -309,6 +322,9 @@ static void do_test(int family, unsigned int opt)
309322
opt |= SOF_TIMESTAMPING_SOFTWARE |
310323
SOF_TIMESTAMPING_OPT_CMSG |
311324
SOF_TIMESTAMPING_OPT_ID;
325+
if (cfg_loop_nodata)
326+
opt |= SOF_TIMESTAMPING_OPT_TSONLY;
327+
312328
if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING,
313329
(char *) &opt, sizeof(opt)))
314330
error(1, 0, "setsockopt timestamping");
@@ -378,6 +394,7 @@ static void __attribute__((noreturn)) usage(const char *filepath)
378394
" -h: show this message\n"
379395
" -I: request PKTINFO\n"
380396
" -l N: send N bytes at a time\n"
397+
" -n: set no-payload option\n"
381398
" -r: use raw\n"
382399
" -R: use raw (IP_HDRINCL)\n"
383400
" -p N: connect to port N\n"
@@ -392,7 +409,7 @@ static void parse_opt(int argc, char **argv)
392409
int proto_count = 0;
393410
char c;
394411

395-
while ((c = getopt(argc, argv, "46hIl:p:rRux")) != -1) {
412+
while ((c = getopt(argc, argv, "46hIl:np:rRux")) != -1) {
396413
switch (c) {
397414
case '4':
398415
do_ipv6 = 0;
@@ -403,6 +420,9 @@ static void parse_opt(int argc, char **argv)
403420
case 'I':
404421
cfg_do_pktinfo = true;
405422
break;
423+
case 'n':
424+
cfg_loop_nodata = true;
425+
break;
406426
case 'r':
407427
proto_count++;
408428
cfg_proto = SOCK_RAW;

Documentation/sysctl/net.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,14 @@ rmem_max
9797

9898
The maximum receive socket buffer size in bytes.
9999

100+
tstamp_allow_data
101+
-----------------
102+
Allow processes to receive tx timestamps looped together with the original
103+
packet contents. If disabled, transmit timestamp requests from unprivileged
104+
processes are dropped unless socket option SOF_TIMESTAMPING_OPT_TSONLY is set.
105+
Default: 1 (on)
106+
107+
100108
wmem_default
101109
------------
102110

include/net/sock.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2239,6 +2239,7 @@ bool sk_net_capable(const struct sock *sk, int cap);
22392239
extern __u32 sysctl_wmem_max;
22402240
extern __u32 sysctl_rmem_max;
22412241

2242+
extern int sysctl_tstamp_allow_data;
22422243
extern int sysctl_optmem_max;
22432244

22442245
extern __u32 sysctl_wmem_default;

include/uapi/linux/net_tstamp.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,9 @@ enum {
2424
SOF_TIMESTAMPING_TX_SCHED = (1<<8),
2525
SOF_TIMESTAMPING_TX_ACK = (1<<9),
2626
SOF_TIMESTAMPING_OPT_CMSG = (1<<10),
27+
SOF_TIMESTAMPING_OPT_TSONLY = (1<<11),
2728

28-
SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_CMSG,
29+
SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_TSONLY,
2930
SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) |
3031
SOF_TIMESTAMPING_LAST
3132
};

net/core/skbuff.c

Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,8 @@
7474
#include <asm/uaccess.h>
7575
#include <trace/events/skb.h>
7676
#include <linux/highmem.h>
77+
#include <linux/capability.h>
78+
#include <linux/user_namespace.h>
7779

7880
struct kmem_cache *skbuff_head_cache __read_mostly;
7981
static struct kmem_cache *skbuff_fclone_cache __read_mostly;
@@ -3690,11 +3692,28 @@ static void __skb_complete_tx_timestamp(struct sk_buff *skb,
36903692
kfree_skb(skb);
36913693
}
36923694

3695+
static bool skb_may_tx_timestamp(struct sock *sk, bool tsonly)
3696+
{
3697+
bool ret;
3698+
3699+
if (likely(sysctl_tstamp_allow_data || tsonly))
3700+
return true;
3701+
3702+
read_lock_bh(&sk->sk_callback_lock);
3703+
ret = sk->sk_socket && sk->sk_socket->file &&
3704+
file_ns_capable(sk->sk_socket->file, &init_user_ns, CAP_NET_RAW);
3705+
read_unlock_bh(&sk->sk_callback_lock);
3706+
return ret;
3707+
}
3708+
36933709
void skb_complete_tx_timestamp(struct sk_buff *skb,
36943710
struct skb_shared_hwtstamps *hwtstamps)
36953711
{
36963712
struct sock *sk = skb->sk;
36973713

3714+
if (!skb_may_tx_timestamp(sk, false))
3715+
return;
3716+
36983717
/* take a reference to prevent skb_orphan() from freeing the socket */
36993718
sock_hold(sk);
37003719

@@ -3710,19 +3729,28 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
37103729
struct sock *sk, int tstype)
37113730
{
37123731
struct sk_buff *skb;
3732+
bool tsonly = sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TSONLY;
37133733

3714-
if (!sk)
3734+
if (!sk || !skb_may_tx_timestamp(sk, tsonly))
37153735
return;
37163736

3717-
if (hwtstamps)
3718-
*skb_hwtstamps(orig_skb) = *hwtstamps;
3737+
if (tsonly)
3738+
skb = alloc_skb(0, GFP_ATOMIC);
37193739
else
3720-
orig_skb->tstamp = ktime_get_real();
3721-
3722-
skb = skb_clone(orig_skb, GFP_ATOMIC);
3740+
skb = skb_clone(orig_skb, GFP_ATOMIC);
37233741
if (!skb)
37243742
return;
37253743

3744+
if (tsonly) {
3745+
skb_shinfo(skb)->tx_flags = skb_shinfo(orig_skb)->tx_flags;
3746+
skb_shinfo(skb)->tskey = skb_shinfo(orig_skb)->tskey;
3747+
}
3748+
3749+
if (hwtstamps)
3750+
*skb_hwtstamps(skb) = *hwtstamps;
3751+
else
3752+
skb->tstamp = ktime_get_real();
3753+
37263754
__skb_complete_tx_timestamp(skb, sk, tstype);
37273755
}
37283756
EXPORT_SYMBOL_GPL(__skb_tstamp_tx);

net/core/sock.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -325,6 +325,8 @@ __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
325325
int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
326326
EXPORT_SYMBOL(sysctl_optmem_max);
327327

328+
int sysctl_tstamp_allow_data __read_mostly = 1;
329+
328330
struct static_key memalloc_socks = STATIC_KEY_INIT_FALSE;
329331
EXPORT_SYMBOL_GPL(memalloc_socks);
330332

@@ -840,6 +842,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
840842
ret = -EINVAL;
841843
break;
842844
}
845+
843846
if (val & SOF_TIMESTAMPING_OPT_ID &&
844847
!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) {
845848
if (sk->sk_protocol == IPPROTO_TCP) {

net/core/sysctl_net_core.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -321,6 +321,15 @@ static struct ctl_table net_core_table[] = {
321321
.mode = 0644,
322322
.proc_handler = proc_dointvec
323323
},
324+
{
325+
.procname = "tstamp_allow_data",
326+
.data = &sysctl_tstamp_allow_data,
327+
.maxlen = sizeof(int),
328+
.mode = 0644,
329+
.proc_handler = proc_dointvec_minmax,
330+
.extra1 = &zero,
331+
.extra2 = &one
332+
},
324333
#ifdef CONFIG_RPS
325334
{
326335
.procname = "rps_sock_flow_entries",

net/ipv4/ip_sockglue.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -483,7 +483,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
483483

484484
serr = SKB_EXT_ERR(skb);
485485

486-
if (sin) {
486+
if (sin && skb->len) {
487487
sin->sin_family = AF_INET;
488488
sin->sin_addr.s_addr = *(__be32 *)(skb_network_header(skb) +
489489
serr->addr_offset);
@@ -496,8 +496,9 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
496496
sin = &errhdr.offender;
497497
memset(sin, 0, sizeof(*sin));
498498

499-
if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP ||
500-
ipv4_pktinfo_prepare_errqueue(sk, skb, serr->ee.ee_origin)) {
499+
if (skb->len &&
500+
(serr->ee.ee_origin == SO_EE_ORIGIN_ICMP ||
501+
ipv4_pktinfo_prepare_errqueue(sk, skb, serr->ee.ee_origin))) {
501502
sin->sin_family = AF_INET;
502503
sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
503504
if (inet_sk(sk)->cmsg_flags)

net/ipv6/datagram.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -369,7 +369,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
369369

370370
serr = SKB_EXT_ERR(skb);
371371

372-
if (sin) {
372+
if (sin && skb->len) {
373373
const unsigned char *nh = skb_network_header(skb);
374374
sin->sin6_family = AF_INET6;
375375
sin->sin6_flowinfo = 0;
@@ -394,8 +394,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
394394
memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err));
395395
sin = &errhdr.offender;
396396
memset(sin, 0, sizeof(*sin));
397-
398-
if (serr->ee.ee_origin != SO_EE_ORIGIN_LOCAL) {
397+
if (serr->ee.ee_origin != SO_EE_ORIGIN_LOCAL && skb->len) {
399398
sin->sin6_family = AF_INET6;
400399
if (np->rxopt.all) {
401400
if (serr->ee.ee_origin != SO_EE_ORIGIN_ICMP &&

0 commit comments

Comments
 (0)