Skip to content

Commit 11941f8

Browse files
kkdwivediAlexei Starovoitov
authored andcommitted
bpf: cpumap: Implement generic cpumap
This change implements CPUMAP redirect support for generic XDP programs. The idea is to reuse the cpu map entry's queue that is used to push native xdp frames for redirecting skb to a different CPU. This will match native XDP behavior (in that RPS is invoked again for packet reinjected into networking stack). To be able to determine whether the incoming skb is from the driver or cpumap, we reuse skb->redirected bit that skips generic XDP processing when it is set. To always make use of this, CONFIG_NET_REDIRECT guard on it has been lifted and it is always available. >From the redirect side, we add the skb to ptr_ring with its lowest bit set to 1. This should be safe as skb is not 1-byte aligned. This allows kthread to discern between xdp_frames and sk_buff. On consumption of the ptr_ring item, the lowest bit is unset. In the end, the skb is simply added to the list that kthread is anyway going to maintain for xdp_frames converted to skb, and then received again by using netif_receive_skb_list. Bulking optimization for generic cpumap is left as an exercise for a future patch for now. Since cpumap entry progs are now supported, also remove check in generic_xdp_install for the cpumap. Signed-off-by: Kumar Kartikeya Dwivedi <[email protected]> Signed-off-by: Alexei Starovoitov <[email protected]> Reviewed-by: Toke Høiland-Jørgensen <[email protected]> Acked-by: Jesper Dangaard Brouer <[email protected]> Link: https://lore.kernel.org/bpf/[email protected]
1 parent cb0f800 commit 11941f8

File tree

5 files changed

+114
-30
lines changed

5 files changed

+114
-30
lines changed

include/linux/bpf.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1513,7 +1513,8 @@ bool dev_map_can_have_prog(struct bpf_map *map);
15131513
void __cpu_map_flush(void);
15141514
int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
15151515
struct net_device *dev_rx);
1516-
bool cpu_map_prog_allowed(struct bpf_map *map);
1516+
int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu,
1517+
struct sk_buff *skb);
15171518

15181519
/* Return map's numa specified by userspace */
15191520
static inline int bpf_map_attr_numa_node(const union bpf_attr *attr)
@@ -1710,6 +1711,12 @@ static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu,
17101711
return 0;
17111712
}
17121713

1714+
static inline int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu,
1715+
struct sk_buff *skb)
1716+
{
1717+
return -EOPNOTSUPP;
1718+
}
1719+
17131720
static inline bool cpu_map_prog_allowed(struct bpf_map *map)
17141721
{
17151722
return false;

include/linux/skbuff.h

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -863,8 +863,8 @@ struct sk_buff {
863863
__u8 tc_skip_classify:1;
864864
__u8 tc_at_ingress:1;
865865
#endif
866-
#ifdef CONFIG_NET_REDIRECT
867866
__u8 redirected:1;
867+
#ifdef CONFIG_NET_REDIRECT
868868
__u8 from_ingress:1;
869869
#endif
870870
#ifdef CONFIG_TLS_DEVICE
@@ -4664,17 +4664,13 @@ static inline __wsum lco_csum(struct sk_buff *skb)
46644664

46654665
static inline bool skb_is_redirected(const struct sk_buff *skb)
46664666
{
4667-
#ifdef CONFIG_NET_REDIRECT
46684667
return skb->redirected;
4669-
#else
4670-
return false;
4671-
#endif
46724668
}
46734669

46744670
static inline void skb_set_redirected(struct sk_buff *skb, bool from_ingress)
46754671
{
4676-
#ifdef CONFIG_NET_REDIRECT
46774672
skb->redirected = 1;
4673+
#ifdef CONFIG_NET_REDIRECT
46784674
skb->from_ingress = from_ingress;
46794675
if (skb->from_ingress)
46804676
skb->tstamp = 0;
@@ -4683,9 +4679,7 @@ static inline void skb_set_redirected(struct sk_buff *skb, bool from_ingress)
46834679

46844680
static inline void skb_reset_redirect(struct sk_buff *skb)
46854681
{
4686-
#ifdef CONFIG_NET_REDIRECT
46874682
skb->redirected = 0;
4688-
#endif
46894683
}
46904684

46914685
static inline bool skb_csum_is_sctp(struct sk_buff *skb)

kernel/bpf/cpumap.c

Lines changed: 98 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
* netstack, and assigning dedicated CPUs for this stage. This
1717
* basically allows for 10G wirespeed pre-filtering via bpf.
1818
*/
19+
#include <linux/bitops.h>
1920
#include <linux/bpf.h>
2021
#include <linux/filter.h>
2122
#include <linux/ptr_ring.h>
@@ -168,6 +169,46 @@ static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
168169
}
169170
}
170171

172+
static void cpu_map_bpf_prog_run_skb(struct bpf_cpu_map_entry *rcpu,
173+
struct list_head *listp,
174+
struct xdp_cpumap_stats *stats)
175+
{
176+
struct sk_buff *skb, *tmp;
177+
struct xdp_buff xdp;
178+
u32 act;
179+
int err;
180+
181+
list_for_each_entry_safe(skb, tmp, listp, list) {
182+
act = bpf_prog_run_generic_xdp(skb, &xdp, rcpu->prog);
183+
switch (act) {
184+
case XDP_PASS:
185+
break;
186+
case XDP_REDIRECT:
187+
skb_list_del_init(skb);
188+
err = xdp_do_generic_redirect(skb->dev, skb, &xdp,
189+
rcpu->prog);
190+
if (unlikely(err)) {
191+
kfree_skb(skb);
192+
stats->drop++;
193+
} else {
194+
stats->redirect++;
195+
}
196+
return;
197+
default:
198+
bpf_warn_invalid_xdp_action(act);
199+
fallthrough;
200+
case XDP_ABORTED:
201+
trace_xdp_exception(skb->dev, rcpu->prog, act);
202+
fallthrough;
203+
case XDP_DROP:
204+
skb_list_del_init(skb);
205+
kfree_skb(skb);
206+
stats->drop++;
207+
return;
208+
}
209+
}
210+
}
211+
171212
static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu,
172213
void **frames, int n,
173214
struct xdp_cpumap_stats *stats)
@@ -176,11 +217,6 @@ static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu,
176217
struct xdp_buff xdp;
177218
int i, nframes = 0;
178219

179-
if (!rcpu->prog)
180-
return n;
181-
182-
rcu_read_lock_bh();
183-
184220
xdp_set_return_frame_no_direct();
185221
xdp.rxq = &rxq;
186222

@@ -227,17 +263,37 @@ static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu,
227263
}
228264
}
229265

266+
xdp_clear_return_frame_no_direct();
267+
268+
return nframes;
269+
}
270+
271+
#define CPUMAP_BATCH 8
272+
273+
static int cpu_map_bpf_prog_run(struct bpf_cpu_map_entry *rcpu, void **frames,
274+
int xdp_n, struct xdp_cpumap_stats *stats,
275+
struct list_head *list)
276+
{
277+
int nframes;
278+
279+
if (!rcpu->prog)
280+
return xdp_n;
281+
282+
rcu_read_lock_bh();
283+
284+
nframes = cpu_map_bpf_prog_run_xdp(rcpu, frames, xdp_n, stats);
285+
230286
if (stats->redirect)
231-
xdp_do_flush_map();
287+
xdp_do_flush();
232288

233-
xdp_clear_return_frame_no_direct();
289+
if (unlikely(!list_empty(list)))
290+
cpu_map_bpf_prog_run_skb(rcpu, list, stats);
234291

235292
rcu_read_unlock_bh(); /* resched point, may call do_softirq() */
236293

237294
return nframes;
238295
}
239296

240-
#define CPUMAP_BATCH 8
241297

242298
static int cpu_map_kthread_run(void *data)
243299
{
@@ -254,9 +310,9 @@ static int cpu_map_kthread_run(void *data)
254310
struct xdp_cpumap_stats stats = {}; /* zero stats */
255311
unsigned int kmem_alloc_drops = 0, sched = 0;
256312
gfp_t gfp = __GFP_ZERO | GFP_ATOMIC;
313+
int i, n, m, nframes, xdp_n;
257314
void *frames[CPUMAP_BATCH];
258315
void *skbs[CPUMAP_BATCH];
259-
int i, n, m, nframes;
260316
LIST_HEAD(list);
261317

262318
/* Release CPU reschedule checks */
@@ -280,9 +336,20 @@ static int cpu_map_kthread_run(void *data)
280336
*/
281337
n = __ptr_ring_consume_batched(rcpu->queue, frames,
282338
CPUMAP_BATCH);
283-
for (i = 0; i < n; i++) {
339+
for (i = 0, xdp_n = 0; i < n; i++) {
284340
void *f = frames[i];
285-
struct page *page = virt_to_page(f);
341+
struct page *page;
342+
343+
if (unlikely(__ptr_test_bit(0, &f))) {
344+
struct sk_buff *skb = f;
345+
346+
__ptr_clear_bit(0, &skb);
347+
list_add_tail(&skb->list, &list);
348+
continue;
349+
}
350+
351+
frames[xdp_n++] = f;
352+
page = virt_to_page(f);
286353

287354
/* Bring struct page memory area to curr CPU. Read by
288355
* build_skb_around via page_is_pfmemalloc(), and when
@@ -292,7 +359,7 @@ static int cpu_map_kthread_run(void *data)
292359
}
293360

294361
/* Support running another XDP prog on this CPU */
295-
nframes = cpu_map_bpf_prog_run_xdp(rcpu, frames, n, &stats);
362+
nframes = cpu_map_bpf_prog_run(rcpu, frames, xdp_n, &stats, &list);
296363
if (nframes) {
297364
m = kmem_cache_alloc_bulk(skbuff_head_cache, gfp, nframes, skbs);
298365
if (unlikely(m == 0)) {
@@ -330,12 +397,6 @@ static int cpu_map_kthread_run(void *data)
330397
return 0;
331398
}
332399

333-
bool cpu_map_prog_allowed(struct bpf_map *map)
334-
{
335-
return map->map_type == BPF_MAP_TYPE_CPUMAP &&
336-
map->value_size != offsetofend(struct bpf_cpumap_val, qsize);
337-
}
338-
339400
static int __cpu_map_load_bpf_program(struct bpf_cpu_map_entry *rcpu, int fd)
340401
{
341402
struct bpf_prog *prog;
@@ -701,6 +762,25 @@ int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
701762
return 0;
702763
}
703764

765+
int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu,
766+
struct sk_buff *skb)
767+
{
768+
int ret;
769+
770+
__skb_pull(skb, skb->mac_len);
771+
skb_set_redirected(skb, false);
772+
__ptr_set_bit(0, &skb);
773+
774+
ret = ptr_ring_produce(rcpu->queue, skb);
775+
if (ret < 0)
776+
goto trace;
777+
778+
wake_up_process(rcpu->kthread);
779+
trace:
780+
trace_xdp_cpumap_enqueue(rcpu->map_id, !ret, !!ret, rcpu->cpu);
781+
return ret;
782+
}
783+
704784
void __cpu_map_flush(void)
705785
{
706786
struct list_head *flush_list = this_cpu_ptr(&cpu_map_flush_list);

net/core/dev.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5669,8 +5669,7 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp)
56695669
* have a bpf_prog installed on an entry
56705670
*/
56715671
for (i = 0; i < new->aux->used_map_cnt; i++) {
5672-
if (dev_map_can_have_prog(new->aux->used_maps[i]) ||
5673-
cpu_map_prog_allowed(new->aux->used_maps[i])) {
5672+
if (dev_map_can_have_prog(new->aux->used_maps[i])) {
56745673
mutex_unlock(&new->aux->used_maps_mutex);
56755674
return -EINVAL;
56765675
}

net/core/filter.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4040,8 +4040,12 @@ static int xdp_do_generic_redirect_map(struct net_device *dev,
40404040
goto err;
40414041
consume_skb(skb);
40424042
break;
4043+
case BPF_MAP_TYPE_CPUMAP:
4044+
err = cpu_map_generic_redirect(fwd, skb);
4045+
if (unlikely(err))
4046+
goto err;
4047+
break;
40434048
default:
4044-
/* TODO: Handle BPF_MAP_TYPE_CPUMAP */
40454049
err = -EBADRQC;
40464050
goto err;
40474051
}

0 commit comments

Comments
 (0)