Skip to content

Commit e687ad6

Browse files
ummakynesdavem330
authored andcommitted
netfilter: add netfilter ingress hook after handle_ing() under unique static key
This patch adds the Netfilter ingress hook just after the existing tc ingress hook, that seems to be the consensus solution for this. Note that the Netfilter hook resides under the global static key that enables ingress filtering. Nonetheless, Netfilter still also has its own static key for minimal impact on the existing handle_ing(). * Without this patch: Result: OK: 6216490(c6216338+d152) usec, 100000000 (60byte,0frags) 16086246pps 7721Mb/sec (7721398080bps) errors: 100000000 42.46% kpktgend_0 [kernel.kallsyms] [k] __netif_receive_skb_core 25.92% kpktgend_0 [kernel.kallsyms] [k] kfree_skb 7.81% kpktgend_0 [pktgen] [k] pktgen_thread_worker 5.62% kpktgend_0 [kernel.kallsyms] [k] ip_rcv 2.70% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_internal 2.34% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_sk 1.44% kpktgend_0 [kernel.kallsyms] [k] __build_skb * With this patch: Result: OK: 6214833(c6214731+d101) usec, 100000000 (60byte,0frags) 16090536pps 7723Mb/sec (7723457280bps) errors: 100000000 41.23% kpktgend_0 [kernel.kallsyms] [k] __netif_receive_skb_core 26.57% kpktgend_0 [kernel.kallsyms] [k] kfree_skb 7.72% kpktgend_0 [pktgen] [k] pktgen_thread_worker 5.55% kpktgend_0 [kernel.kallsyms] [k] ip_rcv 2.78% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_internal 2.06% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_sk 1.43% kpktgend_0 [kernel.kallsyms] [k] __build_skb * Without this patch + tc ingress: tc filter add dev eth4 parent ffff: protocol ip prio 1 \ u32 match ip dst 4.3.2.1/32 Result: OK: 9269001(c9268821+d179) usec, 100000000 (60byte,0frags) 10788648pps 5178Mb/sec (5178551040bps) errors: 100000000 40.99% kpktgend_0 [kernel.kallsyms] [k] __netif_receive_skb_core 17.50% kpktgend_0 [kernel.kallsyms] [k] kfree_skb 11.77% kpktgend_0 [cls_u32] [k] u32_classify 5.62% kpktgend_0 [kernel.kallsyms] [k] tc_classify_compat 5.18% kpktgend_0 [pktgen] [k] pktgen_thread_worker 3.23% kpktgend_0 [kernel.kallsyms] [k] tc_classify 2.97% kpktgend_0 [kernel.kallsyms] [k] ip_rcv 1.83% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_internal 1.50% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_sk 0.99% kpktgend_0 [kernel.kallsyms] [k] __build_skb * With this patch + tc ingress: tc filter add dev eth4 parent ffff: protocol ip prio 1 \ u32 match ip dst 4.3.2.1/32 Result: OK: 9308218(c9308091+d126) usec, 100000000 (60byte,0frags) 10743194pps 5156Mb/sec (5156733120bps) errors: 100000000 42.01% kpktgend_0 [kernel.kallsyms] [k] __netif_receive_skb_core 17.78% kpktgend_0 [kernel.kallsyms] [k] kfree_skb 11.70% kpktgend_0 [cls_u32] [k] u32_classify 5.46% kpktgend_0 [kernel.kallsyms] [k] tc_classify_compat 5.16% kpktgend_0 [pktgen] [k] pktgen_thread_worker 2.98% kpktgend_0 [kernel.kallsyms] [k] ip_rcv 2.84% kpktgend_0 [kernel.kallsyms] [k] tc_classify 1.96% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_internal 1.57% kpktgend_0 [kernel.kallsyms] [k] netif_receive_skb_sk Note that the results are very similar before and after. I can see gcc gets the code under the ingress static key out of the hot path. Then, on that cold branch, it generates the code to accomodate the netfilter ingress static key. My explanation for this is that this reduces the pressure on the instruction cache for non-users as the new code is out of the hot path, and it comes with minimal impact for tc ingress users. Using gcc version 4.8.4 on: Architecture: x86_64 CPU op-mode(s): 32-bit, 64-bit Byte Order: Little Endian CPU(s): 8 [...] L1d cache: 16K L1i cache: 64K L2 cache: 2048K L3 cache: 8192K Signed-off-by: Pablo Neira Ayuso <[email protected]> Acked-by: Alexei Starovoitov <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 1cf5190 commit e687ad6

File tree

7 files changed

+124
-1
lines changed

7 files changed

+124
-1
lines changed

include/linux/netdevice.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1656,6 +1656,9 @@ struct net_device {
16561656
struct tcf_proto __rcu *ingress_cl_list;
16571657
#endif
16581658
struct netdev_queue __rcu *ingress_queue;
1659+
#ifdef CONFIG_NETFILTER_INGRESS
1660+
struct list_head nf_hooks_ingress;
1661+
#endif
16591662

16601663
unsigned char broadcast[MAX_ADDR_LEN];
16611664
#ifdef CONFIG_RFS_ACCEL

include/linux/netfilter.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ struct nf_hook_ops {
8686

8787
/* User fills in from here down. */
8888
nf_hookfn *hook;
89+
struct net_device *dev;
8990
struct module *owner;
9091
void *priv;
9192
u_int8_t pf;

include/linux/netfilter_ingress.h

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
#ifndef _NETFILTER_INGRESS_H_
2+
#define _NETFILTER_INGRESS_H_
3+
4+
#include <linux/netfilter.h>
5+
#include <linux/netdevice.h>
6+
7+
#ifdef CONFIG_NETFILTER_INGRESS
8+
static inline int nf_hook_ingress_active(struct sk_buff *skb)
9+
{
10+
return nf_hook_list_active(&skb->dev->nf_hooks_ingress,
11+
NFPROTO_NETDEV, NF_NETDEV_INGRESS);
12+
}
13+
14+
static inline int nf_hook_ingress(struct sk_buff *skb)
15+
{
16+
struct nf_hook_state state;
17+
18+
nf_hook_state_init(&state, &skb->dev->nf_hooks_ingress,
19+
NF_NETDEV_INGRESS, INT_MIN, NFPROTO_NETDEV, NULL,
20+
skb->dev, NULL, NULL);
21+
return nf_hook_slow(skb, &state);
22+
}
23+
24+
static inline void nf_hook_ingress_init(struct net_device *dev)
25+
{
26+
INIT_LIST_HEAD(&dev->nf_hooks_ingress);
27+
}
28+
#else /* CONFIG_NETFILTER_INGRESS */
29+
static inline int nf_hook_ingress_active(struct sk_buff *skb)
30+
{
31+
return 0;
32+
}
33+
34+
static inline int nf_hook_ingress(struct sk_buff *skb)
35+
{
36+
return 0;
37+
}
38+
39+
static inline void nf_hook_ingress_init(struct net_device *dev) {}
40+
#endif /* CONFIG_NETFILTER_INGRESS */
41+
#endif /* _NETFILTER_INGRESS_H_ */

include/uapi/linux/netfilter.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,11 +51,17 @@ enum nf_inet_hooks {
5151
NF_INET_NUMHOOKS
5252
};
5353

54+
enum nf_dev_hooks {
55+
NF_NETDEV_INGRESS,
56+
NF_NETDEV_NUMHOOKS
57+
};
58+
5459
enum {
5560
NFPROTO_UNSPEC = 0,
5661
NFPROTO_INET = 1,
5762
NFPROTO_IPV4 = 2,
5863
NFPROTO_ARP = 3,
64+
NFPROTO_NETDEV = 5,
5965
NFPROTO_BRIDGE = 7,
6066
NFPROTO_IPV6 = 10,
6167
NFPROTO_DECNET = 12,

net/core/dev.c

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@
135135
#include <linux/if_macvlan.h>
136136
#include <linux/errqueue.h>
137137
#include <linux/hrtimer.h>
138+
#include <linux/netfilter_ingress.h>
138139

139140
#include "net-sysfs.h"
140141

@@ -3666,6 +3667,13 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
36663667

36673668
return skb;
36683669
}
3670+
#else
3671+
static inline struct sk_buff *handle_ing(struct sk_buff *skb,
3672+
struct packet_type **pt_prev,
3673+
int *ret, struct net_device *orig_dev)
3674+
{
3675+
return skb;
3676+
}
36693677
#endif
36703678

36713679
/**
@@ -3739,6 +3747,28 @@ static bool skb_pfmemalloc_protocol(struct sk_buff *skb)
37393747
}
37403748
}
37413749

3750+
#ifdef CONFIG_NETFILTER_INGRESS
3751+
static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev,
3752+
int *ret, struct net_device *orig_dev)
3753+
{
3754+
if (nf_hook_ingress_active(skb)) {
3755+
if (*pt_prev) {
3756+
*ret = deliver_skb(skb, *pt_prev, orig_dev);
3757+
*pt_prev = NULL;
3758+
}
3759+
3760+
return nf_hook_ingress(skb);
3761+
}
3762+
return 0;
3763+
}
3764+
#else
3765+
static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev,
3766+
int *ret, struct net_device *orig_dev)
3767+
{
3768+
return 0;
3769+
}
3770+
#endif
3771+
37423772
static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
37433773
{
37443774
struct packet_type *ptype, *pt_prev;
@@ -3803,6 +3833,9 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
38033833
skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
38043834
if (!skb)
38053835
goto unlock;
3836+
3837+
if (nf_ingress(skb, &pt_prev, &ret, orig_dev) < 0)
3838+
goto unlock;
38063839
}
38073840
#endif
38083841
#ifdef CONFIG_NET_CLS_ACT
@@ -6968,6 +7001,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
69687001
dev->group = INIT_NETDEV_GROUP;
69697002
if (!dev->ethtool_ops)
69707003
dev->ethtool_ops = &default_ethtool_ops;
7004+
7005+
nf_hook_ingress_init(dev);
7006+
69717007
return dev;
69727008

69737009
free_all:

net/netfilter/Kconfig

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
11
menu "Core Netfilter Configuration"
22
depends on NET && INET && NETFILTER
33

4+
config NETFILTER_INGRESS
5+
bool "Netfilter ingress support"
6+
select NET_INGRESS
7+
help
8+
This allows you to classify packets from ingress using the Netfilter
9+
infrastructure.
10+
411
config NETFILTER_NETLINK
512
tristate
613

net/netfilter/core.c

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,10 +64,27 @@ static DEFINE_MUTEX(nf_hook_mutex);
6464

6565
int nf_register_hook(struct nf_hook_ops *reg)
6666
{
67+
struct list_head *nf_hook_list;
6768
struct nf_hook_ops *elem;
6869

6970
mutex_lock(&nf_hook_mutex);
70-
list_for_each_entry(elem, &nf_hooks[reg->pf][reg->hooknum], list) {
71+
switch (reg->pf) {
72+
case NFPROTO_NETDEV:
73+
#ifdef CONFIG_NETFILTER_INGRESS
74+
if (reg->hooknum == NF_NETDEV_INGRESS) {
75+
BUG_ON(reg->dev == NULL);
76+
nf_hook_list = &reg->dev->nf_hooks_ingress;
77+
net_inc_ingress_queue();
78+
break;
79+
}
80+
#endif
81+
/* Fall through. */
82+
default:
83+
nf_hook_list = &nf_hooks[reg->pf][reg->hooknum];
84+
break;
85+
}
86+
87+
list_for_each_entry(elem, nf_hook_list, list) {
7188
if (reg->priority < elem->priority)
7289
break;
7390
}
@@ -85,6 +102,18 @@ void nf_unregister_hook(struct nf_hook_ops *reg)
85102
mutex_lock(&nf_hook_mutex);
86103
list_del_rcu(&reg->list);
87104
mutex_unlock(&nf_hook_mutex);
105+
switch (reg->pf) {
106+
case NFPROTO_NETDEV:
107+
#ifdef CONFIG_NETFILTER_INGRESS
108+
if (reg->hooknum == NF_NETDEV_INGRESS) {
109+
net_dec_ingress_queue();
110+
break;
111+
}
112+
break;
113+
#endif
114+
default:
115+
break;
116+
}
88117
#ifdef HAVE_JUMP_LABEL
89118
static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]);
90119
#endif

0 commit comments

Comments
 (0)