Skip to content

Commit 14972cb

Browse files
roopa-prabhudavem330
authored andcommitted
net: lwtunnel: Handle fragmentation
Today mpls iptunnel lwtunnel_output redirect expects the tunnel output function to handle fragmentation. This is ok but can be avoided if we did not do the mpls output redirect too early. ie we could wait until ip fragmentation is done and then call mpls output for each ip fragment. To make this work we will need, 1) the lwtunnel state to carry encap headroom 2) and do the redirect to the encap output handler on the ip fragment (essentially do the output redirect after fragmentation) This patch adds tunnel headroom in lwtstate to make sure we account for tunnel data in mtu calculations during fragmentation and adds new xmit redirect handler to redirect to lwtunnel xmit func after ip fragmentation. This includes IPV6 and some mtu fixes and testing from David Ahern. Signed-off-by: Roopa Prabhu <[email protected]> Signed-off-by: David Ahern <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 4185249 commit 14972cb

File tree

7 files changed

+106
-6
lines changed

7 files changed

+106
-6
lines changed

include/net/lwtunnel.h

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,13 @@
1313
/* lw tunnel state flags */
1414
#define LWTUNNEL_STATE_OUTPUT_REDIRECT BIT(0)
1515
#define LWTUNNEL_STATE_INPUT_REDIRECT BIT(1)
16+
#define LWTUNNEL_STATE_XMIT_REDIRECT BIT(2)
17+
18+
enum {
19+
LWTUNNEL_XMIT_DONE,
20+
LWTUNNEL_XMIT_CONTINUE,
21+
};
22+
1623

1724
struct lwtunnel_state {
1825
__u16 type;
@@ -21,6 +28,7 @@ struct lwtunnel_state {
2128
int (*orig_output)(struct net *net, struct sock *sk, struct sk_buff *skb);
2229
int (*orig_input)(struct sk_buff *);
2330
int len;
31+
__u16 headroom;
2432
__u8 data[0];
2533
};
2634

@@ -34,6 +42,7 @@ struct lwtunnel_encap_ops {
3442
struct lwtunnel_state *lwtstate);
3543
int (*get_encap_size)(struct lwtunnel_state *lwtstate);
3644
int (*cmp_encap)(struct lwtunnel_state *a, struct lwtunnel_state *b);
45+
int (*xmit)(struct sk_buff *skb);
3746
};
3847

3948
#ifdef CONFIG_LWTUNNEL
@@ -75,6 +84,24 @@ static inline bool lwtunnel_input_redirect(struct lwtunnel_state *lwtstate)
7584

7685
return false;
7786
}
87+
88+
static inline bool lwtunnel_xmit_redirect(struct lwtunnel_state *lwtstate)
89+
{
90+
if (lwtstate && (lwtstate->flags & LWTUNNEL_STATE_XMIT_REDIRECT))
91+
return true;
92+
93+
return false;
94+
}
95+
96+
static inline unsigned int lwtunnel_headroom(struct lwtunnel_state *lwtstate,
97+
unsigned int mtu)
98+
{
99+
if (lwtunnel_xmit_redirect(lwtstate) && lwtstate->headroom < mtu)
100+
return lwtstate->headroom;
101+
102+
return 0;
103+
}
104+
78105
int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *op,
79106
unsigned int num);
80107
int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *op,
@@ -90,6 +117,7 @@ struct lwtunnel_state *lwtunnel_state_alloc(int hdr_len);
90117
int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b);
91118
int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb);
92119
int lwtunnel_input(struct sk_buff *skb);
120+
int lwtunnel_xmit(struct sk_buff *skb);
93121

94122
#else
95123

@@ -117,6 +145,17 @@ static inline bool lwtunnel_input_redirect(struct lwtunnel_state *lwtstate)
117145
return false;
118146
}
119147

148+
static inline bool lwtunnel_xmit_redirect(struct lwtunnel_state *lwtstate)
149+
{
150+
return false;
151+
}
152+
153+
static inline unsigned int lwtunnel_headroom(struct lwtunnel_state *lwtstate,
154+
unsigned int mtu)
155+
{
156+
return 0;
157+
}
158+
120159
static inline int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *op,
121160
unsigned int num)
122161
{
@@ -170,6 +209,11 @@ static inline int lwtunnel_input(struct sk_buff *skb)
170209
return -EOPNOTSUPP;
171210
}
172211

212+
static inline int lwtunnel_xmit(struct sk_buff *skb)
213+
{
214+
return -EOPNOTSUPP;
215+
}
216+
173217
#endif /* CONFIG_LWTUNNEL */
174218

175219
#define MODULE_ALIAS_RTNL_LWT(encap_type) MODULE_ALIAS("rtnl-lwt-" __stringify(encap_type))

net/core/lwtunnel.c

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,41 @@ int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb)
251251
}
252252
EXPORT_SYMBOL(lwtunnel_output);
253253

254+
int lwtunnel_xmit(struct sk_buff *skb)
255+
{
256+
struct dst_entry *dst = skb_dst(skb);
257+
const struct lwtunnel_encap_ops *ops;
258+
struct lwtunnel_state *lwtstate;
259+
int ret = -EINVAL;
260+
261+
if (!dst)
262+
goto drop;
263+
264+
lwtstate = dst->lwtstate;
265+
266+
if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
267+
lwtstate->type > LWTUNNEL_ENCAP_MAX)
268+
return 0;
269+
270+
ret = -EOPNOTSUPP;
271+
rcu_read_lock();
272+
ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
273+
if (likely(ops && ops->xmit))
274+
ret = ops->xmit(skb);
275+
rcu_read_unlock();
276+
277+
if (ret == -EOPNOTSUPP)
278+
goto drop;
279+
280+
return ret;
281+
282+
drop:
283+
kfree_skb(skb);
284+
285+
return ret;
286+
}
287+
EXPORT_SYMBOL(lwtunnel_xmit);
288+
254289
int lwtunnel_input(struct sk_buff *skb)
255290
{
256291
struct dst_entry *dst = skb_dst(skb);

net/ipv4/ip_output.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@
7373
#include <net/icmp.h>
7474
#include <net/checksum.h>
7575
#include <net/inetpeer.h>
76+
#include <net/lwtunnel.h>
7677
#include <linux/igmp.h>
7778
#include <linux/netfilter_ipv4.h>
7879
#include <linux/netfilter_bridge.h>
@@ -197,6 +198,13 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s
197198
skb = skb2;
198199
}
199200

201+
if (lwtunnel_xmit_redirect(dst->lwtstate)) {
202+
int res = lwtunnel_xmit(skb);
203+
204+
if (res < 0 || res == LWTUNNEL_XMIT_DONE)
205+
return res;
206+
}
207+
200208
rcu_read_lock_bh();
201209
nexthop = (__force u32) rt_nexthop(rt, ip_hdr(skb)->daddr);
202210
neigh = __ipv4_neigh_lookup_noref(dev, nexthop);

net/ipv4/route.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1246,7 +1246,9 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
12461246
mtu = 576;
12471247
}
12481248

1249-
return min_t(unsigned int, mtu, IP_MAX_MTU);
1249+
mtu = min_t(unsigned int, mtu, IP_MAX_MTU);
1250+
1251+
return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
12501252
}
12511253

12521254
static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)

net/ipv6/ip6_output.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@
5656
#include <net/checksum.h>
5757
#include <linux/mroute6.h>
5858
#include <net/l3mdev.h>
59+
#include <net/lwtunnel.h>
5960

6061
static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
6162
{
@@ -104,6 +105,13 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
104105
}
105106
}
106107

108+
if (lwtunnel_xmit_redirect(dst->lwtstate)) {
109+
int res = lwtunnel_xmit(skb);
110+
111+
if (res < 0 || res == LWTUNNEL_XMIT_DONE)
112+
return res;
113+
}
114+
107115
rcu_read_lock_bh();
108116
nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
109117
neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);

net/ipv6/route.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1604,7 +1604,9 @@ static unsigned int ip6_mtu(const struct dst_entry *dst)
16041604
rcu_read_unlock();
16051605

16061606
out:
1607-
return min_t(unsigned int, mtu, IP6_MAX_MTU);
1607+
mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1608+
1609+
return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
16081610
}
16091611

16101612
static struct dst_entry *icmp6_dst_gc_list;

net/mpls/mpls_iptunnel.c

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ static unsigned int mpls_encap_size(struct mpls_iptunnel_encap *en)
3737
return en->labels * sizeof(struct mpls_shim_hdr);
3838
}
3939

40-
static int mpls_output(struct net *net, struct sock *sk, struct sk_buff *skb)
40+
static int mpls_xmit(struct sk_buff *skb)
4141
{
4242
struct mpls_iptunnel_encap *tun_encap_info;
4343
struct mpls_shim_hdr *hdr;
@@ -115,7 +115,7 @@ static int mpls_output(struct net *net, struct sock *sk, struct sk_buff *skb)
115115
net_dbg_ratelimited("%s: packet transmission failed: %d\n",
116116
__func__, err);
117117

118-
return 0;
118+
return LWTUNNEL_XMIT_DONE;
119119

120120
drop:
121121
kfree_skb(skb);
@@ -153,7 +153,8 @@ static int mpls_build_state(struct net_device *dev, struct nlattr *nla,
153153
if (ret)
154154
goto errout;
155155
newts->type = LWTUNNEL_ENCAP_MPLS;
156-
newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT;
156+
newts->flags |= LWTUNNEL_STATE_XMIT_REDIRECT;
157+
newts->headroom = mpls_encap_size(tun_encap_info);
157158

158159
*ts = newts;
159160

@@ -209,7 +210,7 @@ static int mpls_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
209210

210211
static const struct lwtunnel_encap_ops mpls_iptun_ops = {
211212
.build_state = mpls_build_state,
212-
.output = mpls_output,
213+
.xmit = mpls_xmit,
213214
.fill_encap = mpls_fill_encap_info,
214215
.get_encap_size = mpls_encap_nlsize,
215216
.cmp_encap = mpls_encap_cmp,

0 commit comments

Comments
 (0)