Skip to content

Commit 3ab1db3

Browse files
committed
io_uring: get rid of remap_pfn_range() for mapping rings/sqes
Rather than use remap_pfn_range() for this and manually free later, switch to using vm_insert_pages() and have it Just Work. If possible, allocate a single compound page that covers the range that is needed. If that works, then we can just use page_address() on that page. If we fail to get a compound page, allocate single pages and use vmap() to map them into the kernel virtual address space. This just covers the rings/sqes, the other remaining user of the mmap remap_pfn_range() user will be converted separately. Once that is done, we can kill the old alloc/free code. Signed-off-by: Jens Axboe <[email protected]>
1 parent 62346c6 commit 3ab1db3

File tree

2 files changed

+133
-8
lines changed

2 files changed

+133
-8
lines changed

io_uring/io_uring.c

Lines changed: 131 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2599,6 +2599,36 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
25992599
return READ_ONCE(rings->cq.head) == READ_ONCE(rings->cq.tail) ? ret : 0;
26002600
}
26012601

2602+
static void io_pages_unmap(void *ptr, struct page ***pages,
2603+
unsigned short *npages)
2604+
{
2605+
bool do_vunmap = false;
2606+
2607+
if (!ptr)
2608+
return;
2609+
2610+
if (*npages) {
2611+
struct page **to_free = *pages;
2612+
int i;
2613+
2614+
/*
2615+
* Only did vmap for the non-compound multiple page case.
2616+
* For the compound page, we just need to put the head.
2617+
*/
2618+
if (PageCompound(to_free[0]))
2619+
*npages = 1;
2620+
else if (*npages > 1)
2621+
do_vunmap = true;
2622+
for (i = 0; i < *npages; i++)
2623+
put_page(to_free[i]);
2624+
}
2625+
if (do_vunmap)
2626+
vunmap(ptr);
2627+
kvfree(*pages);
2628+
*pages = NULL;
2629+
*npages = 0;
2630+
}
2631+
26022632
void io_mem_free(void *ptr)
26032633
{
26042634
if (!ptr)
@@ -2699,8 +2729,8 @@ static void *io_sqes_map(struct io_ring_ctx *ctx, unsigned long uaddr,
26992729
static void io_rings_free(struct io_ring_ctx *ctx)
27002730
{
27012731
if (!(ctx->flags & IORING_SETUP_NO_MMAP)) {
2702-
io_mem_free(ctx->rings);
2703-
io_mem_free(ctx->sq_sqes);
2732+
io_pages_unmap(ctx->rings, &ctx->ring_pages, &ctx->n_ring_pages);
2733+
io_pages_unmap(ctx->sq_sqes, &ctx->sqe_pages, &ctx->n_sqe_pages);
27042734
} else {
27052735
io_pages_free(&ctx->ring_pages, ctx->n_ring_pages);
27062736
ctx->n_ring_pages = 0;
@@ -2712,6 +2742,80 @@ static void io_rings_free(struct io_ring_ctx *ctx)
27122742
ctx->sq_sqes = NULL;
27132743
}
27142744

2745+
static void *io_mem_alloc_compound(struct page **pages, int nr_pages,
2746+
size_t size, gfp_t gfp)
2747+
{
2748+
struct page *page;
2749+
int i, order;
2750+
2751+
order = get_order(size);
2752+
if (order > MAX_PAGE_ORDER)
2753+
return ERR_PTR(-ENOMEM);
2754+
else if (order)
2755+
gfp |= __GFP_COMP;
2756+
2757+
page = alloc_pages(gfp, order);
2758+
if (!page)
2759+
return ERR_PTR(-ENOMEM);
2760+
2761+
for (i = 0; i < nr_pages; i++)
2762+
pages[i] = page + i;
2763+
2764+
return page_address(page);
2765+
}
2766+
2767+
static void *io_mem_alloc_single(struct page **pages, int nr_pages, size_t size,
2768+
gfp_t gfp)
2769+
{
2770+
void *ret;
2771+
int i;
2772+
2773+
for (i = 0; i < nr_pages; i++) {
2774+
pages[i] = alloc_page(gfp);
2775+
if (!pages[i])
2776+
goto err;
2777+
}
2778+
2779+
ret = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
2780+
if (ret)
2781+
return ret;
2782+
err:
2783+
while (i--)
2784+
put_page(pages[i]);
2785+
return ERR_PTR(-ENOMEM);
2786+
}
2787+
2788+
static void *io_pages_map(struct page ***out_pages, unsigned short *npages,
2789+
size_t size)
2790+
{
2791+
gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN;
2792+
struct page **pages;
2793+
int nr_pages;
2794+
void *ret;
2795+
2796+
nr_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
2797+
pages = kvmalloc_array(nr_pages, sizeof(struct page *), gfp);
2798+
if (!pages)
2799+
return ERR_PTR(-ENOMEM);
2800+
2801+
ret = io_mem_alloc_compound(pages, nr_pages, size, gfp);
2802+
if (!IS_ERR(ret))
2803+
goto done;
2804+
2805+
ret = io_mem_alloc_single(pages, nr_pages, size, gfp);
2806+
if (!IS_ERR(ret)) {
2807+
done:
2808+
*out_pages = pages;
2809+
*npages = nr_pages;
2810+
return ret;
2811+
}
2812+
2813+
kvfree(pages);
2814+
*out_pages = NULL;
2815+
*npages = 0;
2816+
return ret;
2817+
}
2818+
27152819
void *io_mem_alloc(size_t size)
27162820
{
27172821
gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN | __GFP_COMP;
@@ -3298,14 +3402,12 @@ static void *io_uring_validate_mmap_request(struct file *file,
32983402
/* Don't allow mmap if the ring was setup without it */
32993403
if (ctx->flags & IORING_SETUP_NO_MMAP)
33003404
return ERR_PTR(-EINVAL);
3301-
ptr = ctx->rings;
3302-
break;
3405+
return ctx->rings;
33033406
case IORING_OFF_SQES:
33043407
/* Don't allow mmap if the ring was setup without it */
33053408
if (ctx->flags & IORING_SETUP_NO_MMAP)
33063409
return ERR_PTR(-EINVAL);
3307-
ptr = ctx->sq_sqes;
3308-
break;
3410+
return ctx->sq_sqes;
33093411
case IORING_OFF_PBUF_RING: {
33103412
struct io_buffer_list *bl;
33113413
unsigned int bgid;
@@ -3329,18 +3431,39 @@ static void *io_uring_validate_mmap_request(struct file *file,
33293431
return ptr;
33303432
}
33313433

3434+
int io_uring_mmap_pages(struct io_ring_ctx *ctx, struct vm_area_struct *vma,
3435+
struct page **pages, int npages)
3436+
{
3437+
unsigned long nr_pages = npages;
3438+
3439+
vm_flags_set(vma, VM_DONTEXPAND);
3440+
return vm_insert_pages(vma, vma->vm_start, pages, &nr_pages);
3441+
}
3442+
33323443
#ifdef CONFIG_MMU
33333444

33343445
static __cold int io_uring_mmap(struct file *file, struct vm_area_struct *vma)
33353446
{
3447+
struct io_ring_ctx *ctx = file->private_data;
33363448
size_t sz = vma->vm_end - vma->vm_start;
3449+
long offset = vma->vm_pgoff << PAGE_SHIFT;
33373450
unsigned long pfn;
33383451
void *ptr;
33393452

33403453
ptr = io_uring_validate_mmap_request(file, vma->vm_pgoff, sz);
33413454
if (IS_ERR(ptr))
33423455
return PTR_ERR(ptr);
33433456

3457+
switch (offset & IORING_OFF_MMAP_MASK) {
3458+
case IORING_OFF_SQ_RING:
3459+
case IORING_OFF_CQ_RING:
3460+
return io_uring_mmap_pages(ctx, vma, ctx->ring_pages,
3461+
ctx->n_ring_pages);
3462+
case IORING_OFF_SQES:
3463+
return io_uring_mmap_pages(ctx, vma, ctx->sqe_pages,
3464+
ctx->n_sqe_pages);
3465+
}
3466+
33443467
pfn = virt_to_phys(ptr) >> PAGE_SHIFT;
33453468
return remap_pfn_range(vma, vma->vm_start, pfn, sz, vma->vm_page_prot);
33463469
}
@@ -3630,7 +3753,7 @@ static __cold int io_allocate_scq_urings(struct io_ring_ctx *ctx,
36303753
return -EOVERFLOW;
36313754

36323755
if (!(ctx->flags & IORING_SETUP_NO_MMAP))
3633-
rings = io_mem_alloc(size);
3756+
rings = io_pages_map(&ctx->ring_pages, &ctx->n_ring_pages, size);
36343757
else
36353758
rings = io_rings_map(ctx, p->cq_off.user_addr, size);
36363759

@@ -3655,7 +3778,7 @@ static __cold int io_allocate_scq_urings(struct io_ring_ctx *ctx,
36553778
}
36563779

36573780
if (!(ctx->flags & IORING_SETUP_NO_MMAP))
3658-
ptr = io_mem_alloc(size);
3781+
ptr = io_pages_map(&ctx->sqe_pages, &ctx->n_sqe_pages, size);
36593782
else
36603783
ptr = io_sqes_map(ctx, p->sq_off.user_addr, size);
36613784

io_uring/io_uring.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,8 @@ bool io_req_post_cqe(struct io_kiocb *req, s32 res, u32 cflags);
7070
void __io_commit_cqring_flush(struct io_ring_ctx *ctx);
7171

7272
struct page **io_pin_pages(unsigned long ubuf, unsigned long len, int *npages);
73+
int io_uring_mmap_pages(struct io_ring_ctx *ctx, struct vm_area_struct *vma,
74+
struct page **pages, int npages);
7375

7476
struct file *io_file_get_normal(struct io_kiocb *req, int fd);
7577
struct file *io_file_get_fixed(struct io_kiocb *req, int fd,

0 commit comments

Comments
 (0)