Skip to content

Commit 7b4858d

Browse files
idoschkuba-moo
authored andcommitted
skbuff: bridge: Add layer 2 miss indication
For EVPN non-DF (Designated Forwarder) filtering we need to be able to prevent decapsulated traffic from being flooded to a multi-homed host. Filtering of multicast and broadcast traffic can be achieved using the following flower filter: # tc filter add dev bond0 egress pref 1 proto all flower indev vxlan0 dst_mac 01:00:00:00:00:00/01:00:00:00:00:00 action drop Unlike broadcast and multicast traffic, it is not currently possible to filter unknown unicast traffic. The classification into unknown unicast is performed by the bridge driver, but is not visible to other layers such as tc. Solve this by adding a new 'l2_miss' bit to the tc skb extension. Clear the bit whenever a packet enters the bridge (received from a bridge port or transmitted via the bridge) and set it if the packet did not match an FDB or MDB entry. If there is no skb extension and the bit needs to be cleared, then do not allocate one as no extension is equivalent to the bit being cleared. The bit is not set for broadcast packets as they never perform a lookup and therefore never incur a miss. A bit that is set for every flooded packet would also work for the current use case, but it does not allow us to differentiate between registered and unregistered multicast traffic, which might be useful in the future. To keep the performance impact to a minimum, the marking of packets is guarded by the 'tc_skb_ext_tc' static key. When 'false', the skb is not touched and an skb extension is not allocated. Instead, only a 5 bytes nop is executed, as demonstrated below for the call site in br_handle_frame(). Before the patch: ``` memset(skb->cb, 0, sizeof(struct br_input_skb_cb)); c37b09: 49 c7 44 24 28 00 00 movq $0x0,0x28(%r12) c37b10: 00 00 p = br_port_get_rcu(skb->dev); c37b12: 49 8b 44 24 10 mov 0x10(%r12),%rax memset(skb->cb, 0, sizeof(struct br_input_skb_cb)); c37b17: 49 c7 44 24 30 00 00 movq $0x0,0x30(%r12) c37b1e: 00 00 c37b20: 49 c7 44 24 38 00 00 movq $0x0,0x38(%r12) c37b27: 00 00 ``` After the patch (when static key is disabled): ``` memset(skb->cb, 0, sizeof(struct br_input_skb_cb)); c37c29: 49 c7 44 24 28 00 00 movq $0x0,0x28(%r12) c37c30: 00 00 c37c32: 49 8d 44 24 28 lea 0x28(%r12),%rax c37c37: 48 c7 40 08 00 00 00 movq $0x0,0x8(%rax) c37c3e: 00 c37c3f: 48 c7 40 10 00 00 00 movq $0x0,0x10(%rax) c37c46: 00 #ifdef CONFIG_HAVE_JUMP_LABEL_HACK static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { asm_volatile_goto("1:" c37c47: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1) br_tc_skb_miss_set(skb, false); p = br_port_get_rcu(skb->dev); c37c4c: 49 8b 44 24 10 mov 0x10(%r12),%rax ``` Subsequent patches will extend the flower classifier to be able to match on the new 'l2_miss' bit and enable / disable the static key when filters that match on it are added / deleted. Signed-off-by: Ido Schimmel <[email protected]> Acked-by: Nikolay Aleksandrov <[email protected]> Acked-by: Jakub Kicinski <[email protected]> Signed-off-by: Jakub Kicinski <[email protected]>
1 parent 2e246bc commit 7b4858d

File tree

5 files changed

+33
-0
lines changed

5 files changed

+33
-0
lines changed

include/linux/skbuff.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -330,6 +330,7 @@ struct tc_skb_ext {
330330
u8 post_ct_snat:1;
331331
u8 post_ct_dnat:1;
332332
u8 act_miss:1; /* Set if act_miss_cookie is used */
333+
u8 l2_miss:1; /* Set by bridge upon FDB or MDB miss */
333334
};
334335
#endif
335336

net/bridge/br_device.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
3939
u16 vid = 0;
4040

4141
memset(skb->cb, 0, sizeof(struct br_input_skb_cb));
42+
br_tc_skb_miss_set(skb, false);
4243

4344
rcu_read_lock();
4445
nf_ops = rcu_dereference(nf_br_ops);

net/bridge/br_forward.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,8 @@ void br_flood(struct net_bridge *br, struct sk_buff *skb,
203203
struct net_bridge_port *prev = NULL;
204204
struct net_bridge_port *p;
205205

206+
br_tc_skb_miss_set(skb, pkt_type != BR_PKT_BROADCAST);
207+
206208
list_for_each_entry_rcu(p, &br->port_list, list) {
207209
/* Do not flood unicast traffic to ports that turn it off, nor
208210
* other traffic if flood off, except for traffic we originate
@@ -295,6 +297,7 @@ void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
295297
allow_mode_include = false;
296298
} else {
297299
p = NULL;
300+
br_tc_skb_miss_set(skb, true);
298301
}
299302

300303
while (p || rp) {

net/bridge/br_input.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -334,6 +334,7 @@ static rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
334334
return RX_HANDLER_CONSUMED;
335335

336336
memset(skb->cb, 0, sizeof(struct br_input_skb_cb));
337+
br_tc_skb_miss_set(skb, false);
337338

338339
p = br_port_get_rcu(skb->dev);
339340
if (p->flags & BR_VLAN_TUNNEL)

net/bridge/br_private.h

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include <linux/u64_stats_sync.h>
1616
#include <net/route.h>
1717
#include <net/ip6_fib.h>
18+
#include <net/pkt_cls.h>
1819
#include <linux/if_vlan.h>
1920
#include <linux/rhashtable.h>
2021
#include <linux/refcount.h>
@@ -754,6 +755,32 @@ void br_boolopt_multi_get(const struct net_bridge *br,
754755
struct br_boolopt_multi *bm);
755756
void br_opt_toggle(struct net_bridge *br, enum net_bridge_opts opt, bool on);
756757

758+
#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
759+
static inline void br_tc_skb_miss_set(struct sk_buff *skb, bool miss)
760+
{
761+
struct tc_skb_ext *ext;
762+
763+
if (!tc_skb_ext_tc_enabled())
764+
return;
765+
766+
ext = skb_ext_find(skb, TC_SKB_EXT);
767+
if (ext) {
768+
ext->l2_miss = miss;
769+
return;
770+
}
771+
if (!miss)
772+
return;
773+
ext = tc_skb_ext_alloc(skb);
774+
if (!ext)
775+
return;
776+
ext->l2_miss = true;
777+
}
778+
#else
779+
static inline void br_tc_skb_miss_set(struct sk_buff *skb, bool miss)
780+
{
781+
}
782+
#endif
783+
757784
/* br_device.c */
758785
void br_dev_setup(struct net_device *dev);
759786
void br_dev_delete(struct net_device *dev, struct list_head *list);

0 commit comments

Comments
 (0)