Skip to content

Commit 0995210

Browse files
committed
netfilter: flow table support for IPv6
This patch adds the IPv6 flow table type, that implements the datapath flow table to forward IPv6 traffic. This patch exports ip6_dst_mtu_forward() that is required to check for mtu to pass up packets that need PMTUD handling to the classic forwarding path. Signed-off-by: Pablo Neira Ayuso <[email protected]>
1 parent 97add9f commit 0995210

File tree

5 files changed

+292
-1
lines changed

5 files changed

+292
-1
lines changed

include/net/ipv6.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -969,6 +969,8 @@ static inline struct sk_buff *ip6_finish_skb(struct sock *sk)
969969
&inet6_sk(sk)->cork);
970970
}
971971

972+
unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst);
973+
972974
int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
973975
struct flowi6 *fl6);
974976
struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,

net/ipv6/ip6_output.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -378,7 +378,7 @@ static inline int ip6_forward_finish(struct net *net, struct sock *sk,
378378
return dst_output(net, sk, skb);
379379
}
380380

381-
static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
381+
unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
382382
{
383383
unsigned int mtu;
384384
struct inet6_dev *idev;
@@ -398,6 +398,7 @@ static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
398398

399399
return mtu;
400400
}
401+
EXPORT_SYMBOL_GPL(ip6_dst_mtu_forward);
401402

402403
static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
403404
{

net/ipv6/netfilter/Kconfig

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,14 @@ config NFT_FIB_IPV6
7171
endif # NF_TABLES_IPV6
7272
endif # NF_TABLES
7373

74+
config NF_FLOW_TABLE_IPV6
75+
select NF_FLOW_TABLE
76+
tristate "Netfilter flow table IPv6 module"
77+
help
78+
This option adds the flow table IPv6 support.
79+
80+
To compile it as a module, choose M here.
81+
7482
config NF_DUP_IPV6
7583
tristate "Netfilter IPv6 packet duplication to alternate destination"
7684
depends on !NF_CONNTRACK || NF_CONNTRACK

net/ipv6/netfilter/Makefile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@ obj-$(CONFIG_NFT_REDIR_IPV6) += nft_redir_ipv6.o
4545
obj-$(CONFIG_NFT_DUP_IPV6) += nft_dup_ipv6.o
4646
obj-$(CONFIG_NFT_FIB_IPV6) += nft_fib_ipv6.o
4747

48+
# flow table support
49+
obj-$(CONFIG_NF_FLOW_TABLE_IPV6) += nf_flow_table_ipv6.o
50+
4851
# matches
4952
obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o
5053
obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o
Lines changed: 277 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,277 @@
1+
#include <linux/kernel.h>
2+
#include <linux/init.h>
3+
#include <linux/module.h>
4+
#include <linux/netfilter.h>
5+
#include <linux/rhashtable.h>
6+
#include <linux/ipv6.h>
7+
#include <linux/netdevice.h>
8+
#include <linux/ipv6.h>
9+
#include <net/ipv6.h>
10+
#include <net/ip6_route.h>
11+
#include <net/neighbour.h>
12+
#include <net/netfilter/nf_flow_table.h>
13+
#include <net/netfilter/nf_tables.h>
14+
/* For layer 4 checksum field offset. */
15+
#include <linux/tcp.h>
16+
#include <linux/udp.h>
17+
18+
static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
19+
struct in6_addr *addr,
20+
struct in6_addr *new_addr)
21+
{
22+
struct tcphdr *tcph;
23+
24+
if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
25+
skb_try_make_writable(skb, thoff + sizeof(*tcph)))
26+
return -1;
27+
28+
tcph = (void *)(skb_network_header(skb) + thoff);
29+
inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32,
30+
new_addr->s6_addr32, true);
31+
32+
return 0;
33+
}
34+
35+
static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
36+
struct in6_addr *addr,
37+
struct in6_addr *new_addr)
38+
{
39+
struct udphdr *udph;
40+
41+
if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
42+
skb_try_make_writable(skb, thoff + sizeof(*udph)))
43+
return -1;
44+
45+
udph = (void *)(skb_network_header(skb) + thoff);
46+
if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
47+
inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32,
48+
new_addr->s6_addr32, true);
49+
if (!udph->check)
50+
udph->check = CSUM_MANGLED_0;
51+
}
52+
53+
return 0;
54+
}
55+
56+
static int nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
57+
unsigned int thoff, struct in6_addr *addr,
58+
struct in6_addr *new_addr)
59+
{
60+
switch (ip6h->nexthdr) {
61+
case IPPROTO_TCP:
62+
if (nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr) < 0)
63+
return NF_DROP;
64+
break;
65+
case IPPROTO_UDP:
66+
if (nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr) < 0)
67+
return NF_DROP;
68+
break;
69+
}
70+
71+
return 0;
72+
}
73+
74+
static int nf_flow_snat_ipv6(const struct flow_offload *flow,
75+
struct sk_buff *skb, struct ipv6hdr *ip6h,
76+
unsigned int thoff,
77+
enum flow_offload_tuple_dir dir)
78+
{
79+
struct in6_addr addr, new_addr;
80+
81+
switch (dir) {
82+
case FLOW_OFFLOAD_DIR_ORIGINAL:
83+
addr = ip6h->saddr;
84+
new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6;
85+
ip6h->saddr = new_addr;
86+
break;
87+
case FLOW_OFFLOAD_DIR_REPLY:
88+
addr = ip6h->daddr;
89+
new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6;
90+
ip6h->daddr = new_addr;
91+
break;
92+
default:
93+
return -1;
94+
}
95+
96+
return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
97+
}
98+
99+
static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
100+
struct sk_buff *skb, struct ipv6hdr *ip6h,
101+
unsigned int thoff,
102+
enum flow_offload_tuple_dir dir)
103+
{
104+
struct in6_addr addr, new_addr;
105+
106+
switch (dir) {
107+
case FLOW_OFFLOAD_DIR_ORIGINAL:
108+
addr = ip6h->daddr;
109+
new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6;
110+
ip6h->daddr = new_addr;
111+
break;
112+
case FLOW_OFFLOAD_DIR_REPLY:
113+
addr = ip6h->saddr;
114+
new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6;
115+
ip6h->saddr = new_addr;
116+
break;
117+
default:
118+
return -1;
119+
}
120+
121+
return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
122+
}
123+
124+
static int nf_flow_nat_ipv6(const struct flow_offload *flow,
125+
struct sk_buff *skb,
126+
enum flow_offload_tuple_dir dir)
127+
{
128+
struct ipv6hdr *ip6h = ipv6_hdr(skb);
129+
unsigned int thoff = sizeof(*ip6h);
130+
131+
if (flow->flags & FLOW_OFFLOAD_SNAT &&
132+
(nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
133+
nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
134+
return -1;
135+
if (flow->flags & FLOW_OFFLOAD_DNAT &&
136+
(nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
137+
nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
138+
return -1;
139+
140+
return 0;
141+
}
142+
143+
static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
144+
struct flow_offload_tuple *tuple)
145+
{
146+
struct flow_ports *ports;
147+
struct ipv6hdr *ip6h;
148+
unsigned int thoff;
149+
150+
if (!pskb_may_pull(skb, sizeof(*ip6h)))
151+
return -1;
152+
153+
ip6h = ipv6_hdr(skb);
154+
155+
if (ip6h->nexthdr != IPPROTO_TCP &&
156+
ip6h->nexthdr != IPPROTO_UDP)
157+
return -1;
158+
159+
thoff = sizeof(*ip6h);
160+
if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
161+
return -1;
162+
163+
ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
164+
165+
tuple->src_v6 = ip6h->saddr;
166+
tuple->dst_v6 = ip6h->daddr;
167+
tuple->src_port = ports->source;
168+
tuple->dst_port = ports->dest;
169+
tuple->l3proto = AF_INET6;
170+
tuple->l4proto = ip6h->nexthdr;
171+
tuple->iifidx = dev->ifindex;
172+
173+
return 0;
174+
}
175+
176+
/* Based on ip_exceeds_mtu(). */
177+
static bool __nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
178+
{
179+
if (skb->len <= mtu)
180+
return false;
181+
182+
if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
183+
return false;
184+
185+
return true;
186+
}
187+
188+
static bool nf_flow_exceeds_mtu(struct sk_buff *skb, const struct rt6_info *rt)
189+
{
190+
u32 mtu;
191+
192+
mtu = ip6_dst_mtu_forward(&rt->dst);
193+
if (__nf_flow_exceeds_mtu(skb, mtu))
194+
return true;
195+
196+
return false;
197+
}
198+
199+
static unsigned int
200+
nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
201+
const struct nf_hook_state *state)
202+
{
203+
struct flow_offload_tuple_rhash *tuplehash;
204+
struct nf_flowtable *flow_table = priv;
205+
struct flow_offload_tuple tuple = {};
206+
enum flow_offload_tuple_dir dir;
207+
struct flow_offload *flow;
208+
struct net_device *outdev;
209+
struct in6_addr *nexthop;
210+
struct ipv6hdr *ip6h;
211+
struct rt6_info *rt;
212+
213+
if (skb->protocol != htons(ETH_P_IPV6))
214+
return NF_ACCEPT;
215+
216+
if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0)
217+
return NF_ACCEPT;
218+
219+
tuplehash = flow_offload_lookup(flow_table, &tuple);
220+
if (tuplehash == NULL)
221+
return NF_ACCEPT;
222+
223+
outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx);
224+
if (!outdev)
225+
return NF_ACCEPT;
226+
227+
dir = tuplehash->tuple.dir;
228+
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
229+
230+
rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache;
231+
if (unlikely(nf_flow_exceeds_mtu(skb, rt)))
232+
return NF_ACCEPT;
233+
234+
if (skb_try_make_writable(skb, sizeof(*ip6h)))
235+
return NF_DROP;
236+
237+
if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
238+
nf_flow_nat_ipv6(flow, skb, dir) < 0)
239+
return NF_DROP;
240+
241+
flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
242+
ip6h = ipv6_hdr(skb);
243+
ip6h->hop_limit--;
244+
245+
skb->dev = outdev;
246+
nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
247+
neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
248+
249+
return NF_STOLEN;
250+
}
251+
252+
static struct nf_flowtable_type flowtable_ipv6 = {
253+
.family = NFPROTO_IPV6,
254+
.params = &nf_flow_offload_rhash_params,
255+
.gc = nf_flow_offload_work_gc,
256+
.hook = nf_flow_offload_ipv6_hook,
257+
.owner = THIS_MODULE,
258+
};
259+
260+
static int __init nf_flow_ipv6_module_init(void)
261+
{
262+
nft_register_flowtable_type(&flowtable_ipv6);
263+
264+
return 0;
265+
}
266+
267+
static void __exit nf_flow_ipv6_module_exit(void)
268+
{
269+
nft_unregister_flowtable_type(&flowtable_ipv6);
270+
}
271+
272+
module_init(nf_flow_ipv6_module_init);
273+
module_exit(nf_flow_ipv6_module_exit);
274+
275+
MODULE_LICENSE("GPL");
276+
MODULE_AUTHOR("Pablo Neira Ayuso <[email protected]>");
277+
MODULE_ALIAS_NF_FLOWTABLE(AF_INET6);

0 commit comments

Comments
 (0)