Skip to content

Commit 94033cd

Browse files
magnus-karlssonborkmann
authored andcommitted
xsk: Optimize for aligned case
Optimize for the aligned case by precomputing the parameter values of the xdp_buff_xsk and xdp_buff structures in the heads array. We can do this as the heads array size is equal to the number of chunks in the umem for the aligned case. Then every entry in this array will reflect a certain chunk/frame and can therefore be prepopulated with the correct values and we can drop the use of the free_heads stack. Note that it is not possible to allocate more buffers than what has been allocated in the aligned case since each chunk can only contain a single buffer. We can unfortunately not do this in the unaligned case as one chunk might contain multiple buffers. In this case, we keep the old scheme of populating a heads entry every time it is used and using the free_heads stack. Also move xp_release() and xp_get_handle() to xsk_buff_pool.h. They were for some reason in xsk.c even though they are buffer pool operations. Signed-off-by: Magnus Karlsson <[email protected]> Signed-off-by: Daniel Borkmann <[email protected]> Link: https://lore.kernel.org/bpf/[email protected]
1 parent 6aab0bb commit 94033cd

File tree

3 files changed

+79
-38
lines changed

3 files changed

+79
-38
lines changed

include/net/xsk_buff_pool.h

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include <linux/if_xdp.h>
88
#include <linux/types.h>
99
#include <linux/dma-mapping.h>
10+
#include <linux/bpf.h>
1011
#include <net/xdp.h>
1112

1213
struct xsk_buff_pool;
@@ -66,6 +67,7 @@ struct xsk_buff_pool {
6667
u32 free_heads_cnt;
6768
u32 headroom;
6869
u32 chunk_size;
70+
u32 chunk_shift;
6971
u32 frame_len;
7072
u8 cached_need_wakeup;
7173
bool uses_need_wakeup;
@@ -80,6 +82,13 @@ struct xsk_buff_pool {
8082
struct xdp_buff_xsk *free_heads[];
8183
};
8284

85+
/* Masks for xdp_umem_page flags.
86+
* The low 12-bits of the addr will be 0 since this is the page address, so we
87+
* can use them for flags.
88+
*/
89+
#define XSK_NEXT_PG_CONTIG_SHIFT 0
90+
#define XSK_NEXT_PG_CONTIG_MASK BIT_ULL(XSK_NEXT_PG_CONTIG_SHIFT)
91+
8392
/* AF_XDP core. */
8493
struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
8594
struct xdp_umem *umem);
@@ -88,7 +97,6 @@ int xp_assign_dev(struct xsk_buff_pool *pool, struct net_device *dev,
8897
int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_umem *umem,
8998
struct net_device *dev, u16 queue_id);
9099
void xp_destroy(struct xsk_buff_pool *pool);
91-
void xp_release(struct xdp_buff_xsk *xskb);
92100
void xp_get_pool(struct xsk_buff_pool *pool);
93101
bool xp_put_pool(struct xsk_buff_pool *pool);
94102
void xp_clear_dev(struct xsk_buff_pool *pool);
@@ -98,6 +106,21 @@ void xp_del_xsk(struct xsk_buff_pool *pool, struct xdp_sock *xs);
98106
/* AF_XDP, and XDP core. */
99107
void xp_free(struct xdp_buff_xsk *xskb);
100108

109+
static inline void xp_init_xskb_addr(struct xdp_buff_xsk *xskb, struct xsk_buff_pool *pool,
110+
u64 addr)
111+
{
112+
xskb->orig_addr = addr;
113+
xskb->xdp.data_hard_start = pool->addrs + addr + pool->headroom;
114+
}
115+
116+
static inline void xp_init_xskb_dma(struct xdp_buff_xsk *xskb, struct xsk_buff_pool *pool,
117+
dma_addr_t *dma_pages, u64 addr)
118+
{
119+
xskb->frame_dma = (dma_pages[addr >> PAGE_SHIFT] & ~XSK_NEXT_PG_CONTIG_MASK) +
120+
(addr & ~PAGE_MASK);
121+
xskb->dma = xskb->frame_dma + pool->headroom + XDP_PACKET_HEADROOM;
122+
}
123+
101124
/* AF_XDP ZC drivers, via xdp_sock_buff.h */
102125
void xp_set_rxq_info(struct xsk_buff_pool *pool, struct xdp_rxq_info *rxq);
103126
int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev,
@@ -180,4 +203,25 @@ static inline u64 xp_unaligned_add_offset_to_addr(u64 addr)
180203
xp_unaligned_extract_offset(addr);
181204
}
182205

206+
static inline u32 xp_aligned_extract_idx(struct xsk_buff_pool *pool, u64 addr)
207+
{
208+
return xp_aligned_extract_addr(pool, addr) >> pool->chunk_shift;
209+
}
210+
211+
static inline void xp_release(struct xdp_buff_xsk *xskb)
212+
{
213+
if (xskb->pool->unaligned)
214+
xskb->pool->free_heads[xskb->pool->free_heads_cnt++] = xskb;
215+
}
216+
217+
static inline u64 xp_get_handle(struct xdp_buff_xsk *xskb)
218+
{
219+
u64 offset = xskb->xdp.data - xskb->xdp.data_hard_start;
220+
221+
offset += xskb->pool->headroom;
222+
if (!xskb->pool->unaligned)
223+
return xskb->orig_addr + offset;
224+
return xskb->orig_addr + (offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT);
225+
}
226+
183227
#endif /* XSK_BUFF_POOL_H_ */

net/xdp/xsk.c

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -134,21 +134,6 @@ int xsk_reg_pool_at_qid(struct net_device *dev, struct xsk_buff_pool *pool,
134134
return 0;
135135
}
136136

137-
void xp_release(struct xdp_buff_xsk *xskb)
138-
{
139-
xskb->pool->free_heads[xskb->pool->free_heads_cnt++] = xskb;
140-
}
141-
142-
static u64 xp_get_handle(struct xdp_buff_xsk *xskb)
143-
{
144-
u64 offset = xskb->xdp.data - xskb->xdp.data_hard_start;
145-
146-
offset += xskb->pool->headroom;
147-
if (!xskb->pool->unaligned)
148-
return xskb->orig_addr + offset;
149-
return xskb->orig_addr + (offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT);
150-
}
151-
152137
static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
153138
{
154139
struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);

net/xdp/xsk_buff_pool.c

Lines changed: 34 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -44,12 +44,13 @@ void xp_destroy(struct xsk_buff_pool *pool)
4444
struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
4545
struct xdp_umem *umem)
4646
{
47+
bool unaligned = umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG;
4748
struct xsk_buff_pool *pool;
4849
struct xdp_buff_xsk *xskb;
49-
u32 i;
50+
u32 i, entries;
5051

51-
pool = kvzalloc(struct_size(pool, free_heads, umem->chunks),
52-
GFP_KERNEL);
52+
entries = unaligned ? umem->chunks : 0;
53+
pool = kvzalloc(struct_size(pool, free_heads, entries), GFP_KERNEL);
5354
if (!pool)
5455
goto out;
5556

@@ -63,7 +64,8 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
6364
pool->free_heads_cnt = umem->chunks;
6465
pool->headroom = umem->headroom;
6566
pool->chunk_size = umem->chunk_size;
66-
pool->unaligned = umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG;
67+
pool->chunk_shift = ffs(umem->chunk_size) - 1;
68+
pool->unaligned = unaligned;
6769
pool->frame_len = umem->chunk_size - umem->headroom -
6870
XDP_PACKET_HEADROOM;
6971
pool->umem = umem;
@@ -81,7 +83,10 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
8183
xskb = &pool->heads[i];
8284
xskb->pool = pool;
8385
xskb->xdp.frame_sz = umem->chunk_size - umem->headroom;
84-
pool->free_heads[i] = xskb;
86+
if (pool->unaligned)
87+
pool->free_heads[i] = xskb;
88+
else
89+
xp_init_xskb_addr(xskb, pool, i * pool->chunk_size);
8590
}
8691

8792
return pool;
@@ -406,6 +411,12 @@ int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev,
406411

407412
if (pool->unaligned)
408413
xp_check_dma_contiguity(dma_map);
414+
else
415+
for (i = 0; i < pool->heads_cnt; i++) {
416+
struct xdp_buff_xsk *xskb = &pool->heads[i];
417+
418+
xp_init_xskb_dma(xskb, pool, dma_map->dma_pages, xskb->orig_addr);
419+
}
409420

410421
err = xp_init_dma_info(pool, dma_map);
411422
if (err) {
@@ -448,8 +459,6 @@ static struct xdp_buff_xsk *__xp_alloc(struct xsk_buff_pool *pool)
448459
if (pool->free_heads_cnt == 0)
449460
return NULL;
450461

451-
xskb = pool->free_heads[--pool->free_heads_cnt];
452-
453462
for (;;) {
454463
if (!xskq_cons_peek_addr_unchecked(pool->fq, &addr)) {
455464
pool->fq->queue_empty_descs++;
@@ -466,17 +475,17 @@ static struct xdp_buff_xsk *__xp_alloc(struct xsk_buff_pool *pool)
466475
}
467476
break;
468477
}
469-
xskq_cons_release(pool->fq);
470478

471-
xskb->orig_addr = addr;
472-
xskb->xdp.data_hard_start = pool->addrs + addr + pool->headroom;
473-
if (pool->dma_pages_cnt) {
474-
xskb->frame_dma = (pool->dma_pages[addr >> PAGE_SHIFT] &
475-
~XSK_NEXT_PG_CONTIG_MASK) +
476-
(addr & ~PAGE_MASK);
477-
xskb->dma = xskb->frame_dma + pool->headroom +
478-
XDP_PACKET_HEADROOM;
479+
if (pool->unaligned) {
480+
xskb = pool->free_heads[--pool->free_heads_cnt];
481+
xp_init_xskb_addr(xskb, pool, addr);
482+
if (pool->dma_pages_cnt)
483+
xp_init_xskb_dma(xskb, pool, pool->dma_pages, addr);
484+
} else {
485+
xskb = &pool->heads[xp_aligned_extract_idx(pool, addr)];
479486
}
487+
488+
xskq_cons_release(pool->fq);
480489
return xskb;
481490
}
482491

@@ -533,13 +542,16 @@ static u32 xp_alloc_new_from_fq(struct xsk_buff_pool *pool, struct xdp_buff **xd
533542
continue;
534543
}
535544

536-
xskb = pool->free_heads[--pool->free_heads_cnt];
545+
if (pool->unaligned) {
546+
xskb = pool->free_heads[--pool->free_heads_cnt];
547+
xp_init_xskb_addr(xskb, pool, addr);
548+
if (pool->dma_pages_cnt)
549+
xp_init_xskb_dma(xskb, pool, pool->dma_pages, addr);
550+
} else {
551+
xskb = &pool->heads[xp_aligned_extract_idx(pool, addr)];
552+
}
553+
537554
*xdp = &xskb->xdp;
538-
xskb->orig_addr = addr;
539-
xskb->xdp.data_hard_start = pool->addrs + addr + pool->headroom;
540-
xskb->frame_dma = (pool->dma_pages[addr >> PAGE_SHIFT] &
541-
~XSK_NEXT_PG_CONTIG_MASK) + (addr & ~PAGE_MASK);
542-
xskb->dma = xskb->frame_dma + pool->headroom + XDP_PACKET_HEADROOM;
543555
xdp++;
544556
}
545557

0 commit comments

Comments
 (0)