Skip to content

Commit 628a5c5

Browse files
John HeffnerDavid S. Miller
authored andcommitted
[INET]: Add IP(V6)_PMTUDISC_RPOBE
Add IP(V6)_PMTUDISC_PROBE value for IP(V6)_MTU_DISCOVER. This option forces us not to fragment, but does not make use of the kernel path MTU discovery. That is, it allows for user-mode MTU probing (or, packetization-layer path MTU discovery). This is particularly useful for diagnostic utilities, like traceroute/tracepath. Signed-off-by: John Heffner <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent b881ef7 commit 628a5c5

File tree

6 files changed

+31
-10
lines changed

6 files changed

+31
-10
lines changed

include/linux/in.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ struct in_addr {
8383
#define IP_PMTUDISC_DONT 0 /* Never send DF frames */
8484
#define IP_PMTUDISC_WANT 1 /* Use per route hints */
8585
#define IP_PMTUDISC_DO 2 /* Always DF */
86+
#define IP_PMTUDISC_PROBE 3 /* Ignore dst pmtu */
8687

8788
#define IP_MULTICAST_IF 32
8889
#define IP_MULTICAST_TTL 33

include/linux/in6.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,7 @@ struct in6_flowlabel_req
179179
#define IPV6_PMTUDISC_DONT 0
180180
#define IPV6_PMTUDISC_WANT 1
181181
#define IPV6_PMTUDISC_DO 2
182+
#define IPV6_PMTUDISC_PROBE 3
182183

183184
/* Flowlabel */
184185
#define IPV6_FLOWLABEL_MGR 32

net/ipv4/ip_output.c

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,14 @@ static inline int ip_finish_output2(struct sk_buff *skb)
189189
return -EINVAL;
190190
}
191191

192+
static inline int ip_skb_dst_mtu(struct sk_buff *skb)
193+
{
194+
struct inet_sock *inet = skb->sk ? inet_sk(skb->sk) : NULL;
195+
196+
return (inet && inet->pmtudisc == IP_PMTUDISC_PROBE) ?
197+
skb->dst->dev->mtu : dst_mtu(skb->dst);
198+
}
199+
192200
static inline int ip_finish_output(struct sk_buff *skb)
193201
{
194202
#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
@@ -198,7 +206,7 @@ static inline int ip_finish_output(struct sk_buff *skb)
198206
return dst_output(skb);
199207
}
200208
#endif
201-
if (skb->len > dst_mtu(skb->dst) && !skb_is_gso(skb))
209+
if (skb->len > ip_skb_dst_mtu(skb) && !skb_is_gso(skb))
202210
return ip_fragment(skb, ip_finish_output2);
203211
else
204212
return ip_finish_output2(skb);
@@ -422,7 +430,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
422430
if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) {
423431
IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
424432
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
425-
htonl(dst_mtu(&rt->u.dst)));
433+
htonl(ip_skb_dst_mtu(skb)));
426434
kfree_skb(skb);
427435
return -EMSGSIZE;
428436
}
@@ -787,7 +795,9 @@ int ip_append_data(struct sock *sk,
787795
inet->cork.addr = ipc->addr;
788796
}
789797
dst_hold(&rt->u.dst);
790-
inet->cork.fragsize = mtu = dst_mtu(rt->u.dst.path);
798+
inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ?
799+
rt->u.dst.dev->mtu :
800+
dst_mtu(rt->u.dst.path);
791801
inet->cork.rt = rt;
792802
inet->cork.length = 0;
793803
sk->sk_sndmsg_page = NULL;
@@ -1203,13 +1213,13 @@ int ip_push_pending_frames(struct sock *sk)
12031213
* to fragment the frame generated here. No matter, what transforms
12041214
* how transforms change size of the packet, it will come out.
12051215
*/
1206-
if (inet->pmtudisc != IP_PMTUDISC_DO)
1216+
if (inet->pmtudisc < IP_PMTUDISC_DO)
12071217
skb->local_df = 1;
12081218

12091219
/* DF bit is set when we want to see DF on outgoing frames.
12101220
* If local_df is set too, we still allow to fragment this frame
12111221
* locally. */
1212-
if (inet->pmtudisc == IP_PMTUDISC_DO ||
1222+
if (inet->pmtudisc >= IP_PMTUDISC_DO ||
12131223
(skb->len <= dst_mtu(&rt->u.dst) &&
12141224
ip_dont_fragment(sk, &rt->u.dst)))
12151225
df = htons(IP_DF);

net/ipv4/ip_sockglue.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -542,7 +542,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
542542
inet->hdrincl = val ? 1 : 0;
543543
break;
544544
case IP_MTU_DISCOVER:
545-
if (val<0 || val>2)
545+
if (val<0 || val>3)
546546
goto e_inval;
547547
inet->pmtudisc = val;
548548
break;

net/ipv6/ip6_output.c

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -137,9 +137,17 @@ static int ip6_output2(struct sk_buff *skb)
137137
return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish);
138138
}
139139

140+
static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
141+
{
142+
struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
143+
144+
return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
145+
skb->dst->dev->mtu : dst_mtu(skb->dst);
146+
}
147+
140148
int ip6_output(struct sk_buff *skb)
141149
{
142-
if ((skb->len > dst_mtu(skb->dst) && !skb_is_gso(skb)) ||
150+
if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
143151
dst_allfrag(skb->dst))
144152
return ip6_fragment(skb, ip6_output2);
145153
else
@@ -566,7 +574,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
566574
hlen = ip6_find_1stfragopt(skb, &prevhdr);
567575
nexthdr = *prevhdr;
568576

569-
mtu = dst_mtu(&rt->u.dst);
577+
mtu = ip6_skb_dst_mtu(skb);
570578

571579
/* We must not fragment if the socket is set to force MTU discovery
572580
* or if the skb it not generated by a local socket. (This last
@@ -1063,7 +1071,8 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
10631071
inet->cork.fl = *fl;
10641072
np->cork.hop_limit = hlimit;
10651073
np->cork.tclass = tclass;
1066-
mtu = dst_mtu(rt->u.dst.path);
1074+
mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1075+
rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
10671076
if (np->frag_size < mtu) {
10681077
if (np->frag_size)
10691078
mtu = np->frag_size;

net/ipv6/ipv6_sockglue.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -694,7 +694,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
694694
retv = ip6_ra_control(sk, val, NULL);
695695
break;
696696
case IPV6_MTU_DISCOVER:
697-
if (val<0 || val>2)
697+
if (val<0 || val>3)
698698
goto e_inval;
699699
np->pmtudisc = val;
700700
retv = 0;

0 commit comments

Comments
 (0)