Skip to content

Commit 9dc20a6

Browse files
committed
Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next
Pablo Neira Ayuso says: ==================== Netfilter updates for net-next The following patchset contains Netfilter updates for net-next, they are: 1) A couple of cleanups for the netfilter core hook from Eric Biederman. 2) Net namespace hook registration, also from Eric. This adds a dependency with the rtnl_lock. This should be fine by now but we have to keep an eye on this because if we ever get the per-subsys nfnl_lock before rtnl we have may problems in the future. But we have room to remove this in the future by propagating the complexity to the clients, by registering hooks for the init netns functions. 3) Update nf_tables to use the new net namespace hook infrastructure, also from Eric. 4) Three patches to refine and to address problems from the new net namespace hook infrastructure. 5) Switch to alternate jumpstack in xtables iff the packet is reentering. This only applies to a very special case, the TEE target, but Eric Dumazet reports that this is slowing down things for everyone else. So let's only switch to the alternate jumpstack if the tee target is in used through a static key. This batch also comes with offline precalculation of the jumpstack based on the callchain depth. From Florian Westphal. 6) Minimal SCTP multihoming support for our conntrack helper, from Michal Kubecek. 7) Reduce nf_bridge_info per skbuff scratchpad area to 32 bytes, from Florian Westphal. 8) Fix several checkpatch errors in bridge netfilter, from Bernhard Thaler. 9) Get rid of useless debug message in ip6t_REJECT, from Subash Abhinov. ==================== Signed-off-by: David S. Miller <[email protected]>
2 parents d1b22e4 + a6cd379 commit 9dc20a6

24 files changed

+461
-217
lines changed

include/linux/netfilter.h

Lines changed: 29 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
#include <linux/list.h>
1212
#include <linux/static_key.h>
1313
#include <linux/netfilter_defs.h>
14+
#include <linux/netdevice.h>
15+
#include <net/net_namespace.h>
1416

1517
#ifdef CONFIG_NETFILTER
1618
static inline int NF_DROP_GETERR(int verdict)
@@ -118,6 +120,13 @@ struct nf_sockopt_ops {
118120
};
119121

120122
/* Function to register/unregister hook points. */
123+
int nf_register_net_hook(struct net *net, const struct nf_hook_ops *ops);
124+
void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *ops);
125+
int nf_register_net_hooks(struct net *net, const struct nf_hook_ops *reg,
126+
unsigned int n);
127+
void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg,
128+
unsigned int n);
129+
121130
int nf_register_hook(struct nf_hook_ops *reg);
122131
void nf_unregister_hook(struct nf_hook_ops *reg);
123132
int nf_register_hooks(struct nf_hook_ops *reg, unsigned int n);
@@ -128,33 +137,26 @@ void nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n);
128137
int nf_register_sockopt(struct nf_sockopt_ops *reg);
129138
void nf_unregister_sockopt(struct nf_sockopt_ops *reg);
130139

131-
extern struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
132-
133140
#ifdef HAVE_JUMP_LABEL
134141
extern struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
135142

136-
static inline bool nf_hook_list_active(struct list_head *nf_hook_list,
143+
static inline bool nf_hook_list_active(struct list_head *hook_list,
137144
u_int8_t pf, unsigned int hook)
138145
{
139146
if (__builtin_constant_p(pf) &&
140147
__builtin_constant_p(hook))
141148
return static_key_false(&nf_hooks_needed[pf][hook]);
142149

143-
return !list_empty(nf_hook_list);
150+
return !list_empty(hook_list);
144151
}
145152
#else
146-
static inline bool nf_hook_list_active(struct list_head *nf_hook_list,
153+
static inline bool nf_hook_list_active(struct list_head *hook_list,
147154
u_int8_t pf, unsigned int hook)
148155
{
149-
return !list_empty(nf_hook_list);
156+
return !list_empty(hook_list);
150157
}
151158
#endif
152159

153-
static inline bool nf_hooks_active(u_int8_t pf, unsigned int hook)
154-
{
155-
return nf_hook_list_active(&nf_hooks[pf][hook], pf, hook);
156-
}
157-
158160
int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state);
159161

160162
/**
@@ -172,10 +174,13 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook,
172174
int (*okfn)(struct sock *, struct sk_buff *),
173175
int thresh)
174176
{
175-
if (nf_hooks_active(pf, hook)) {
177+
struct net *net = dev_net(indev ? indev : outdev);
178+
struct list_head *hook_list = &net->nf.hooks[pf][hook];
179+
180+
if (nf_hook_list_active(hook_list, pf, hook)) {
176181
struct nf_hook_state state;
177182

178-
nf_hook_state_init(&state, &nf_hooks[pf][hook], hook, thresh,
183+
nf_hook_state_init(&state, hook_list, hook, thresh,
179184
pf, indev, outdev, sk, okfn);
180185
return nf_hook_slow(skb, &state);
181186
}
@@ -385,4 +390,15 @@ extern struct nfq_ct_hook __rcu *nfq_ct_hook;
385390
static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {}
386391
#endif
387392

393+
/**
394+
* nf_skb_duplicated - TEE target has sent a packet
395+
*
396+
* When a xtables target sends a packet, the OUTPUT and POSTROUTING
397+
* hooks are traversed again, i.e. nft and xtables are invoked recursively.
398+
*
399+
* This is used by xtables TEE target to prevent the duplicated skb from
400+
* being duplicated again.
401+
*/
402+
DECLARE_PER_CPU(bool, nf_skb_duplicated);
403+
388404
#endif /*__LINUX_NETFILTER_H*/

include/linux/netfilter/x_tables.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44

55
#include <linux/netdevice.h>
6+
#include <linux/static_key.h>
67
#include <uapi/linux/netfilter/x_tables.h>
78

89
/**
@@ -222,7 +223,6 @@ struct xt_table_info {
222223
* @stacksize jumps (number of user chains) can possibly be made.
223224
*/
224225
unsigned int stacksize;
225-
unsigned int __percpu *stackptr;
226226
void ***jumpstack;
227227

228228
unsigned char entries[0] __aligned(8);
@@ -281,6 +281,12 @@ void xt_free_table_info(struct xt_table_info *info);
281281
*/
282282
DECLARE_PER_CPU(seqcount_t, xt_recseq);
283283

284+
/* xt_tee_enabled - true if x_tables needs to handle reentrancy
285+
*
286+
* Enabled if current ip(6)tables ruleset has at least one -j TEE rule.
287+
*/
288+
extern struct static_key xt_tee_enabled;
289+
284290
/**
285291
* xt_write_recseq_begin - start of a write section
286292
*

include/linux/netfilter_bridge.h

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,6 @@ enum nf_br_hook_priorities {
1717

1818
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
1919

20-
#define BRNF_BRIDGED_DNAT 0x02
21-
#define BRNF_NF_BRIDGE_PREROUTING 0x08
22-
2320
int br_handle_frame_finish(struct sock *sk, struct sk_buff *skb);
2421

2522
static inline void br_drop_fake_rtable(struct sk_buff *skb)
@@ -63,8 +60,17 @@ nf_bridge_get_physoutdev(const struct sk_buff *skb)
6360
{
6461
return skb->nf_bridge ? skb->nf_bridge->physoutdev : NULL;
6562
}
63+
64+
static inline bool nf_bridge_in_prerouting(const struct sk_buff *skb)
65+
{
66+
return skb->nf_bridge && skb->nf_bridge->in_prerouting;
67+
}
6668
#else
6769
#define br_drop_fake_rtable(skb) do { } while (0)
70+
static inline bool nf_bridge_in_prerouting(const struct sk_buff *skb)
71+
{
72+
return false;
73+
}
6874
#endif /* CONFIG_BRIDGE_NETFILTER */
6975

7076
#endif

include/linux/skbuff.h

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -174,17 +174,24 @@ struct nf_bridge_info {
174174
BRNF_PROTO_8021Q,
175175
BRNF_PROTO_PPPOE
176176
} orig_proto:8;
177-
bool pkt_otherhost;
177+
u8 pkt_otherhost:1;
178+
u8 in_prerouting:1;
179+
u8 bridged_dnat:1;
178180
__u16 frag_max_size;
179-
unsigned int mask;
180181
struct net_device *physindev;
181182
union {
182-
struct net_device *physoutdev;
183-
char neigh_header[8];
184-
};
185-
union {
183+
/* prerouting: detect dnat in orig/reply direction */
186184
__be32 ipv4_daddr;
187185
struct in6_addr ipv6_daddr;
186+
187+
/* after prerouting + nat detected: store original source
188+
* mac since neigh resolution overwrites it, only used while
189+
* skb is out in neigh layer.
190+
*/
191+
char neigh_header[8];
192+
193+
/* always valid & non-NULL from FORWARD on, for physdev match */
194+
struct net_device *physoutdev;
188195
};
189196
};
190197
#endif

include/net/netns/netfilter.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,5 +14,6 @@ struct netns_nf {
1414
#ifdef CONFIG_SYSCTL
1515
struct ctl_table_header *nf_log_dir_header;
1616
#endif
17+
struct list_head hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
1718
};
1819
#endif

include/uapi/linux/netfilter/nf_conntrack_sctp.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ enum sctp_conntrack {
1313
SCTP_CONNTRACK_SHUTDOWN_SENT,
1414
SCTP_CONNTRACK_SHUTDOWN_RECD,
1515
SCTP_CONNTRACK_SHUTDOWN_ACK_SENT,
16+
SCTP_CONNTRACK_HEARTBEAT_SENT,
17+
SCTP_CONNTRACK_HEARTBEAT_ACKED,
1618
SCTP_CONNTRACK_MAX
1719
};
1820

include/uapi/linux/netfilter/nfnetlink_cttimeout.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,8 @@ enum ctattr_timeout_sctp {
9292
CTA_TIMEOUT_SCTP_SHUTDOWN_SENT,
9393
CTA_TIMEOUT_SCTP_SHUTDOWN_RECD,
9494
CTA_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT,
95+
CTA_TIMEOUT_SCTP_HEARTBEAT_SENT,
96+
CTA_TIMEOUT_SCTP_HEARTBEAT_ACKED,
9597
__CTA_TIMEOUT_SCTP_MAX
9698
};
9799
#define CTA_TIMEOUT_SCTP_MAX (__CTA_TIMEOUT_SCTP_MAX - 1)

net/bridge/br_netfilter_hooks.c

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,9 @@ static struct ctl_table_header *brnf_sysctl_header;
4949
static int brnf_call_iptables __read_mostly = 1;
5050
static int brnf_call_ip6tables __read_mostly = 1;
5151
static int brnf_call_arptables __read_mostly = 1;
52-
static int brnf_filter_vlan_tagged __read_mostly = 0;
53-
static int brnf_filter_pppoe_tagged __read_mostly = 0;
54-
static int brnf_pass_vlan_indev __read_mostly = 0;
52+
static int brnf_filter_vlan_tagged __read_mostly;
53+
static int brnf_filter_pppoe_tagged __read_mostly;
54+
static int brnf_pass_vlan_indev __read_mostly;
5555
#else
5656
#define brnf_call_iptables 1
5757
#define brnf_call_ip6tables 1
@@ -284,7 +284,7 @@ int br_nf_pre_routing_finish_bridge(struct sock *sk, struct sk_buff *skb)
284284
nf_bridge->neigh_header,
285285
ETH_HLEN-ETH_ALEN);
286286
/* tell br_dev_xmit to continue with forwarding */
287-
nf_bridge->mask |= BRNF_BRIDGED_DNAT;
287+
nf_bridge->bridged_dnat = 1;
288288
/* FIXME Need to refragment */
289289
ret = neigh->output(neigh, skb);
290290
}
@@ -356,7 +356,7 @@ static int br_nf_pre_routing_finish(struct sock *sk, struct sk_buff *skb)
356356
skb->pkt_type = PACKET_OTHERHOST;
357357
nf_bridge->pkt_otherhost = false;
358358
}
359-
nf_bridge->mask &= ~BRNF_NF_BRIDGE_PREROUTING;
359+
nf_bridge->in_prerouting = 0;
360360
if (br_nf_ipv4_daddr_was_changed(skb, nf_bridge)) {
361361
if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) {
362362
struct in_device *in_dev = __in_dev_get_rcu(dev);
@@ -444,7 +444,7 @@ struct net_device *setup_pre_routing(struct sk_buff *skb)
444444
nf_bridge->pkt_otherhost = true;
445445
}
446446

447-
nf_bridge->mask |= BRNF_NF_BRIDGE_PREROUTING;
447+
nf_bridge->in_prerouting = 1;
448448
nf_bridge->physindev = skb->dev;
449449
skb->dev = brnf_get_logical_dev(skb, skb->dev);
450450

@@ -850,10 +850,8 @@ static unsigned int ip_sabotage_in(const struct nf_hook_ops *ops,
850850
struct sk_buff *skb,
851851
const struct nf_hook_state *state)
852852
{
853-
if (skb->nf_bridge &&
854-
!(skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)) {
853+
if (skb->nf_bridge && !skb->nf_bridge->in_prerouting)
855854
return NF_STOP;
856-
}
857855

858856
return NF_ACCEPT;
859857
}
@@ -872,7 +870,7 @@ static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb)
872870
struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
873871

874872
skb_pull(skb, ETH_HLEN);
875-
nf_bridge->mask &= ~BRNF_BRIDGED_DNAT;
873+
nf_bridge->bridged_dnat = 0;
876874

877875
BUILD_BUG_ON(sizeof(nf_bridge->neigh_header) != (ETH_HLEN - ETH_ALEN));
878876

@@ -887,7 +885,7 @@ static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb)
887885

888886
static int br_nf_dev_xmit(struct sk_buff *skb)
889887
{
890-
if (skb->nf_bridge && (skb->nf_bridge->mask & BRNF_BRIDGED_DNAT)) {
888+
if (skb->nf_bridge && skb->nf_bridge->bridged_dnat) {
891889
br_nf_pre_routing_finish_bridge_slow(skb);
892890
return 1;
893891
}

net/bridge/br_netfilter_ipv6.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb)
174174
skb->pkt_type = PACKET_OTHERHOST;
175175
nf_bridge->pkt_otherhost = false;
176176
}
177-
nf_bridge->mask &= ~BRNF_NF_BRIDGE_PREROUTING;
177+
nf_bridge->in_prerouting = 0;
178178
if (br_nf_ipv6_daddr_was_changed(skb, nf_bridge)) {
179179
skb_dst_drop(skb);
180180
v6ops->route_input(skb);

net/ipv4/netfilter/arp_tables.c

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,7 @@ get_entry(const void *base, unsigned int offset)
240240
return (struct arpt_entry *)(base + offset);
241241
}
242242

243-
static inline __pure
243+
static inline
244244
struct arpt_entry *arpt_next_entry(const struct arpt_entry *entry)
245245
{
246246
return (void *)entry + entry->next_offset;
@@ -280,6 +280,9 @@ unsigned int arpt_do_table(struct sk_buff *skb,
280280
table_base = private->entries;
281281
jumpstack = (struct arpt_entry **)private->jumpstack[cpu];
282282

283+
/* No TEE support for arptables, so no need to switch to alternate
284+
* stack. All targets that reenter must return absolute verdicts.
285+
*/
283286
e = get_entry(table_base, private->hook_entry[hook]);
284287

285288
acpar.in = state->in;
@@ -325,21 +328,13 @@ unsigned int arpt_do_table(struct sk_buff *skb,
325328
}
326329
if (table_base + v
327330
!= arpt_next_entry(e)) {
328-
329-
if (stackidx >= private->stacksize) {
330-
verdict = NF_DROP;
331-
break;
332-
}
333331
jumpstack[stackidx++] = e;
334332
}
335333

336334
e = get_entry(table_base, v);
337335
continue;
338336
}
339337

340-
/* Targets which reenter must return
341-
* abs. verdicts
342-
*/
343338
acpar.target = t->u.kernel.target;
344339
acpar.targinfo = t->data;
345340
verdict = t->u.kernel.target->target(skb, &acpar);
@@ -372,10 +367,13 @@ static inline bool unconditional(const struct arpt_arp *arp)
372367

373368
/* Figures out from what hook each rule can be called: returns 0 if
374369
* there are loops. Puts hook bitmask in comefrom.
370+
*
371+
* Keeps track of largest call depth seen and stores it in newinfo->stacksize.
375372
*/
376-
static int mark_source_chains(const struct xt_table_info *newinfo,
373+
static int mark_source_chains(struct xt_table_info *newinfo,
377374
unsigned int valid_hooks, void *entry0)
378375
{
376+
unsigned int calldepth, max_calldepth = 0;
379377
unsigned int hook;
380378

381379
/* No recursion; use packet counter to save back ptrs (reset
@@ -391,6 +389,7 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
391389

392390
/* Set initial back pointer. */
393391
e->counters.pcnt = pos;
392+
calldepth = 0;
394393

395394
for (;;) {
396395
const struct xt_standard_target *t
@@ -445,6 +444,8 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
445444
(entry0 + pos + size);
446445
e->counters.pcnt = pos;
447446
pos += size;
447+
if (calldepth > 0)
448+
--calldepth;
448449
} else {
449450
int newpos = t->verdict;
450451

@@ -459,6 +460,10 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
459460
return 0;
460461
}
461462

463+
if (entry0 + newpos != arpt_next_entry(e) &&
464+
++calldepth > max_calldepth)
465+
max_calldepth = calldepth;
466+
462467
/* This a jump; chase it. */
463468
duprintf("Jump rule %u -> %u\n",
464469
pos, newpos);
@@ -475,6 +480,7 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
475480
next:
476481
duprintf("Finished chain %u\n", hook);
477482
}
483+
newinfo->stacksize = max_calldepth;
478484
return 1;
479485
}
480486

@@ -664,9 +670,6 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
664670
if (ret != 0)
665671
break;
666672
++i;
667-
if (strcmp(arpt_get_target(iter)->u.user.name,
668-
XT_ERROR_TARGET) == 0)
669-
++newinfo->stacksize;
670673
}
671674
duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret);
672675
if (ret != 0)
@@ -1439,9 +1442,6 @@ static int translate_compat_table(const char *name,
14391442
break;
14401443
}
14411444
++i;
1442-
if (strcmp(arpt_get_target(iter1)->u.user.name,
1443-
XT_ERROR_TARGET) == 0)
1444-
++newinfo->stacksize;
14451445
}
14461446
if (ret) {
14471447
/*

0 commit comments

Comments
 (0)