@@ -2599,6 +2599,36 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
25992599 return READ_ONCE (rings -> cq .head ) == READ_ONCE (rings -> cq .tail ) ? ret : 0 ;
26002600}
26012601
2602+ static void io_pages_unmap (void * ptr , struct page * * * pages ,
2603+ unsigned short * npages )
2604+ {
2605+ bool do_vunmap = false;
2606+
2607+ if (!ptr )
2608+ return ;
2609+
2610+ if (* npages ) {
2611+ struct page * * to_free = * pages ;
2612+ int i ;
2613+
2614+ /*
2615+ * Only did vmap for the non-compound multiple page case.
2616+ * For the compound page, we just need to put the head.
2617+ */
2618+ if (PageCompound (to_free [0 ]))
2619+ * npages = 1 ;
2620+ else if (* npages > 1 )
2621+ do_vunmap = true;
2622+ for (i = 0 ; i < * npages ; i ++ )
2623+ put_page (to_free [i ]);
2624+ }
2625+ if (do_vunmap )
2626+ vunmap (ptr );
2627+ kvfree (* pages );
2628+ * pages = NULL ;
2629+ * npages = 0 ;
2630+ }
2631+
26022632void io_mem_free (void * ptr )
26032633{
26042634 if (!ptr )
@@ -2699,8 +2729,8 @@ static void *io_sqes_map(struct io_ring_ctx *ctx, unsigned long uaddr,
26992729static void io_rings_free (struct io_ring_ctx * ctx )
27002730{
27012731 if (!(ctx -> flags & IORING_SETUP_NO_MMAP )) {
2702- io_mem_free (ctx -> rings );
2703- io_mem_free (ctx -> sq_sqes );
2732+ io_pages_unmap (ctx -> rings , & ctx -> ring_pages , & ctx -> n_ring_pages );
2733+ io_pages_unmap (ctx -> sq_sqes , & ctx -> sqe_pages , & ctx -> n_sqe_pages );
27042734 } else {
27052735 io_pages_free (& ctx -> ring_pages , ctx -> n_ring_pages );
27062736 ctx -> n_ring_pages = 0 ;
@@ -2712,6 +2742,80 @@ static void io_rings_free(struct io_ring_ctx *ctx)
27122742 ctx -> sq_sqes = NULL ;
27132743}
27142744
2745+ static void * io_mem_alloc_compound (struct page * * pages , int nr_pages ,
2746+ size_t size , gfp_t gfp )
2747+ {
2748+ struct page * page ;
2749+ int i , order ;
2750+
2751+ order = get_order (size );
2752+ if (order > MAX_PAGE_ORDER )
2753+ return ERR_PTR (- ENOMEM );
2754+ else if (order )
2755+ gfp |= __GFP_COMP ;
2756+
2757+ page = alloc_pages (gfp , order );
2758+ if (!page )
2759+ return ERR_PTR (- ENOMEM );
2760+
2761+ for (i = 0 ; i < nr_pages ; i ++ )
2762+ pages [i ] = page + i ;
2763+
2764+ return page_address (page );
2765+ }
2766+
2767+ static void * io_mem_alloc_single (struct page * * pages , int nr_pages , size_t size ,
2768+ gfp_t gfp )
2769+ {
2770+ void * ret ;
2771+ int i ;
2772+
2773+ for (i = 0 ; i < nr_pages ; i ++ ) {
2774+ pages [i ] = alloc_page (gfp );
2775+ if (!pages [i ])
2776+ goto err ;
2777+ }
2778+
2779+ ret = vmap (pages , nr_pages , VM_MAP , PAGE_KERNEL );
2780+ if (ret )
2781+ return ret ;
2782+ err :
2783+ while (i -- )
2784+ put_page (pages [i ]);
2785+ return ERR_PTR (- ENOMEM );
2786+ }
2787+
2788+ static void * io_pages_map (struct page * * * out_pages , unsigned short * npages ,
2789+ size_t size )
2790+ {
2791+ gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN ;
2792+ struct page * * pages ;
2793+ int nr_pages ;
2794+ void * ret ;
2795+
2796+ nr_pages = (size + PAGE_SIZE - 1 ) >> PAGE_SHIFT ;
2797+ pages = kvmalloc_array (nr_pages , sizeof (struct page * ), gfp );
2798+ if (!pages )
2799+ return ERR_PTR (- ENOMEM );
2800+
2801+ ret = io_mem_alloc_compound (pages , nr_pages , size , gfp );
2802+ if (!IS_ERR (ret ))
2803+ goto done ;
2804+
2805+ ret = io_mem_alloc_single (pages , nr_pages , size , gfp );
2806+ if (!IS_ERR (ret )) {
2807+ done :
2808+ * out_pages = pages ;
2809+ * npages = nr_pages ;
2810+ return ret ;
2811+ }
2812+
2813+ kvfree (pages );
2814+ * out_pages = NULL ;
2815+ * npages = 0 ;
2816+ return ret ;
2817+ }
2818+
27152819void * io_mem_alloc (size_t size )
27162820{
27172821 gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN | __GFP_COMP ;
@@ -3298,14 +3402,12 @@ static void *io_uring_validate_mmap_request(struct file *file,
32983402 /* Don't allow mmap if the ring was setup without it */
32993403 if (ctx -> flags & IORING_SETUP_NO_MMAP )
33003404 return ERR_PTR (- EINVAL );
3301- ptr = ctx -> rings ;
3302- break ;
3405+ return ctx -> rings ;
33033406 case IORING_OFF_SQES :
33043407 /* Don't allow mmap if the ring was setup without it */
33053408 if (ctx -> flags & IORING_SETUP_NO_MMAP )
33063409 return ERR_PTR (- EINVAL );
3307- ptr = ctx -> sq_sqes ;
3308- break ;
3410+ return ctx -> sq_sqes ;
33093411 case IORING_OFF_PBUF_RING : {
33103412 struct io_buffer_list * bl ;
33113413 unsigned int bgid ;
@@ -3329,18 +3431,39 @@ static void *io_uring_validate_mmap_request(struct file *file,
33293431 return ptr ;
33303432}
33313433
3434+ int io_uring_mmap_pages (struct io_ring_ctx * ctx , struct vm_area_struct * vma ,
3435+ struct page * * pages , int npages )
3436+ {
3437+ unsigned long nr_pages = npages ;
3438+
3439+ vm_flags_set (vma , VM_DONTEXPAND );
3440+ return vm_insert_pages (vma , vma -> vm_start , pages , & nr_pages );
3441+ }
3442+
33323443#ifdef CONFIG_MMU
33333444
33343445static __cold int io_uring_mmap (struct file * file , struct vm_area_struct * vma )
33353446{
3447+ struct io_ring_ctx * ctx = file -> private_data ;
33363448 size_t sz = vma -> vm_end - vma -> vm_start ;
3449+ long offset = vma -> vm_pgoff << PAGE_SHIFT ;
33373450 unsigned long pfn ;
33383451 void * ptr ;
33393452
33403453 ptr = io_uring_validate_mmap_request (file , vma -> vm_pgoff , sz );
33413454 if (IS_ERR (ptr ))
33423455 return PTR_ERR (ptr );
33433456
3457+ switch (offset & IORING_OFF_MMAP_MASK ) {
3458+ case IORING_OFF_SQ_RING :
3459+ case IORING_OFF_CQ_RING :
3460+ return io_uring_mmap_pages (ctx , vma , ctx -> ring_pages ,
3461+ ctx -> n_ring_pages );
3462+ case IORING_OFF_SQES :
3463+ return io_uring_mmap_pages (ctx , vma , ctx -> sqe_pages ,
3464+ ctx -> n_sqe_pages );
3465+ }
3466+
33443467 pfn = virt_to_phys (ptr ) >> PAGE_SHIFT ;
33453468 return remap_pfn_range (vma , vma -> vm_start , pfn , sz , vma -> vm_page_prot );
33463469}
@@ -3630,7 +3753,7 @@ static __cold int io_allocate_scq_urings(struct io_ring_ctx *ctx,
36303753 return - EOVERFLOW ;
36313754
36323755 if (!(ctx -> flags & IORING_SETUP_NO_MMAP ))
3633- rings = io_mem_alloc ( size );
3756+ rings = io_pages_map ( & ctx -> ring_pages , & ctx -> n_ring_pages , size );
36343757 else
36353758 rings = io_rings_map (ctx , p -> cq_off .user_addr , size );
36363759
@@ -3655,7 +3778,7 @@ static __cold int io_allocate_scq_urings(struct io_ring_ctx *ctx,
36553778 }
36563779
36573780 if (!(ctx -> flags & IORING_SETUP_NO_MMAP ))
3658- ptr = io_mem_alloc ( size );
3781+ ptr = io_pages_map ( & ctx -> sqe_pages , & ctx -> n_sqe_pages , size );
36593782 else
36603783 ptr = io_sqes_map (ctx , p -> sq_off .user_addr , size );
36613784
0 commit comments