88/**
99 * DOC: page_pool allocator
1010 *
11- * The page_pool allocator is optimized for the XDP mode that
12- * uses one frame per-page, but it can fallback on the
13- * regular page allocator APIs.
14- *
15- * Basic use involves replacing alloc_pages() calls with the
16- * page_pool_alloc_pages() call. Drivers should use
17- * page_pool_dev_alloc_pages() replacing dev_alloc_pages().
18- *
19- * The API keeps track of in-flight pages, in order to let API users know
20- * when it is safe to free a page_pool object. Thus, API users
21- * must call page_pool_put_page() to free the page, or attach
22- * the page to a page_pool-aware object like skbs marked with
11+ * The page_pool allocator is optimized for recycling page or page fragment used
12+ * by skb packet and xdp frame.
13+ *
14+ * Basic use involves replacing and alloc_pages() calls with page_pool_alloc(),
15+ * which allocate memory with or without page splitting depending on the
16+ * requested memory size.
17+ *
18+ * If the driver knows that it always requires full pages or its allocations are
19+ * always smaller than half a page, it can use one of the more specific API
20+ * calls:
21+ *
22+ * 1. page_pool_alloc_pages(): allocate memory without page splitting when
23+ * driver knows that the memory it need is always bigger than half of the page
24+ * allocated from page pool. There is no cache line dirtying for 'struct page'
25+ * when a page is recycled back to the page pool.
26+ *
27+ * 2. page_pool_alloc_frag(): allocate memory with page splitting when driver
28+ * knows that the memory it need is always smaller than or equal to half of the
29+ * page allocated from page pool. Page splitting enables memory saving and thus
30+ * avoids TLB/cache miss for data access, but there also is some cost to
31+ * implement page splitting, mainly some cache line dirtying/bouncing for
32+ * 'struct page' and atomic operation for page->pp_frag_count.
33+ *
34+ * The API keeps track of in-flight pages, in order to let API users know when
35+ * it is safe to free a page_pool object, the API users must call
36+ * page_pool_put_page() or page_pool_free_va() to free the page_pool object, or
37+ * attach the page_pool object to a page_pool-aware object like skbs marked with
2338 * skb_mark_for_recycle().
2439 *
25- * API users must call page_pool_put_page() once on a page, as it
26- * will either recycle the page, or in case of refcnt > 1, it will
27- * release the DMA mapping and in-flight state accounting.
40+ * page_pool_put_page() may be called multi times on the same page if a page is
41+ * split into multi fragments. For the last fragment, it will either recycle the
42+ * page, or in case of page->_refcount > 1, it will release the DMA mapping and
43+ * in-flight state accounting.
44+ *
45+ * dma_sync_single_range_for_device() is only called for the last fragment when
46+ * page_pool is created with PP_FLAG_DMA_SYNC_DEV flag, so it depends on the
47+ * last freed fragment to do the sync_for_device operation for all fragments in
48+ * the same page when a page is split, the API user must setup pool->p.max_len
49+ * and pool->p.offset correctly and ensure that page_pool_put_page() is called
50+ * with dma_sync_size being -1 for fragment API.
2851 */
2952#ifndef _NET_PAGE_POOL_HELPERS_H
3053#define _NET_PAGE_POOL_HELPERS_H
@@ -73,6 +96,17 @@ static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool)
7396 return page_pool_alloc_pages (pool , gfp );
7497}
7598
99+ /**
100+ * page_pool_dev_alloc_frag() - allocate a page fragment.
101+ * @pool: pool from which to allocate
102+ * @offset: offset to the allocated page
103+ * @size: requested size
104+ *
105+ * Get a page fragment from the page allocator or page_pool caches.
106+ *
107+ * Return:
108+ * Return allocated page fragment, otherwise return NULL.
109+ */
76110static inline struct page * page_pool_dev_alloc_frag (struct page_pool * pool ,
77111 unsigned int * offset ,
78112 unsigned int size )
@@ -82,6 +116,91 @@ static inline struct page *page_pool_dev_alloc_frag(struct page_pool *pool,
82116 return page_pool_alloc_frag (pool , offset , size , gfp );
83117}
84118
119+ static inline struct page * page_pool_alloc (struct page_pool * pool ,
120+ unsigned int * offset ,
121+ unsigned int * size , gfp_t gfp )
122+ {
123+ unsigned int max_size = PAGE_SIZE << pool -> p .order ;
124+ struct page * page ;
125+
126+ if ((* size << 1 ) > max_size ) {
127+ * size = max_size ;
128+ * offset = 0 ;
129+ return page_pool_alloc_pages (pool , gfp );
130+ }
131+
132+ page = page_pool_alloc_frag (pool , offset , * size , gfp );
133+ if (unlikely (!page ))
134+ return NULL ;
135+
136+ /* There is very likely not enough space for another fragment, so append
137+ * the remaining size to the current fragment to avoid truesize
138+ * underestimate problem.
139+ */
140+ if (pool -> frag_offset + * size > max_size ) {
141+ * size = max_size - * offset ;
142+ pool -> frag_offset = max_size ;
143+ }
144+
145+ return page ;
146+ }
147+
148+ /**
149+ * page_pool_dev_alloc() - allocate a page or a page fragment.
150+ * @pool: pool from which to allocate
151+ * @offset: offset to the allocated page
152+ * @size: in as the requested size, out as the allocated size
153+ *
154+ * Get a page or a page fragment from the page allocator or page_pool caches
155+ * depending on the requested size in order to allocate memory with least memory
156+ * utilization and performance penalty.
157+ *
158+ * Return:
159+ * Return allocated page or page fragment, otherwise return NULL.
160+ */
161+ static inline struct page * page_pool_dev_alloc (struct page_pool * pool ,
162+ unsigned int * offset ,
163+ unsigned int * size )
164+ {
165+ gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN );
166+
167+ return page_pool_alloc (pool , offset , size , gfp );
168+ }
169+
170+ static inline void * page_pool_alloc_va (struct page_pool * pool ,
171+ unsigned int * size , gfp_t gfp )
172+ {
173+ unsigned int offset ;
174+ struct page * page ;
175+
176+ /* Mask off __GFP_HIGHMEM to ensure we can use page_address() */
177+ page = page_pool_alloc (pool , & offset , size , gfp & ~__GFP_HIGHMEM );
178+ if (unlikely (!page ))
179+ return NULL ;
180+
181+ return page_address (page ) + offset ;
182+ }
183+
184+ /**
185+ * page_pool_dev_alloc_va() - allocate a page or a page fragment and return its
186+ * va.
187+ * @pool: pool from which to allocate
188+ * @size: in as the requested size, out as the allocated size
189+ *
190+ * This is just a thin wrapper around the page_pool_alloc() API, and
191+ * it returns va of the allocated page or page fragment.
192+ *
193+ * Return:
194+ * Return the va for the allocated page or page fragment, otherwise return NULL.
195+ */
196+ static inline void * page_pool_dev_alloc_va (struct page_pool * pool ,
197+ unsigned int * size )
198+ {
199+ gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN );
200+
201+ return page_pool_alloc_va (pool , size , gfp );
202+ }
203+
85204/**
86205 * page_pool_get_dma_dir() - Retrieve the stored DMA direction.
87206 * @pool: pool from which page was allocated
@@ -115,28 +234,49 @@ static inline long page_pool_defrag_page(struct page *page, long nr)
115234 long ret ;
116235
117236 /* If nr == pp_frag_count then we have cleared all remaining
118- * references to the page. No need to actually overwrite it, instead
119- * we can leave this to be overwritten by the calling function.
237+ * references to the page:
238+ * 1. 'n == 1': no need to actually overwrite it.
239+ * 2. 'n != 1': overwrite it with one, which is the rare case
240+ * for pp_frag_count draining.
120241 *
121- * The main advantage to doing this is that an atomic_read is
122- * generally a much cheaper operation than an atomic update,
123- * especially when dealing with a page that may be partitioned
124- * into only 2 or 3 pieces.
242+ * The main advantage to doing this is that not only we avoid a atomic
243+ * update, as an atomic_read is generally a much cheaper operation than
244+ * an atomic update, especially when dealing with a page that may be
245+ * partitioned into only 2 or 3 pieces; but also unify the pp_frag_count
246+ * handling by ensuring all pages have partitioned into only 1 piece
247+ * initially, and only overwrite it when the page is partitioned into
248+ * more than one piece.
125249 */
126- if (atomic_long_read (& page -> pp_frag_count ) == nr )
250+ if (atomic_long_read (& page -> pp_frag_count ) == nr ) {
251+ /* As we have ensured nr is always one for constant case using
252+ * the BUILD_BUG_ON(), only need to handle the non-constant case
253+ * here for pp_frag_count draining, which is a rare case.
254+ */
255+ BUILD_BUG_ON (__builtin_constant_p (nr ) && nr != 1 );
256+ if (!__builtin_constant_p (nr ))
257+ atomic_long_set (& page -> pp_frag_count , 1 );
258+
127259 return 0 ;
260+ }
128261
129262 ret = atomic_long_sub_return (nr , & page -> pp_frag_count );
130263 WARN_ON (ret < 0 );
264+
265+ /* We are the last user here too, reset pp_frag_count back to 1 to
266+ * ensure all pages have been partitioned into 1 piece initially,
267+ * this should be the rare case when the last two fragment users call
268+ * page_pool_defrag_page() currently.
269+ */
270+ if (unlikely (!ret ))
271+ atomic_long_set (& page -> pp_frag_count , 1 );
272+
131273 return ret ;
132274}
133275
134- static inline bool page_pool_is_last_frag (struct page_pool * pool ,
135- struct page * page )
276+ static inline bool page_pool_is_last_frag (struct page * page )
136277{
137- /* If fragments aren't enabled or count is 0 we were the last user */
138- return !(pool -> p .flags & PP_FLAG_PAGE_FRAG ) ||
139- (page_pool_defrag_page (page , 1 ) == 0 );
278+ /* If page_pool_defrag_page() returns 0, we were the last user */
279+ return page_pool_defrag_page (page , 1 ) == 0 ;
140280}
141281
142282/**
@@ -161,7 +301,7 @@ static inline void page_pool_put_page(struct page_pool *pool,
161301 * allow registering MEM_TYPE_PAGE_POOL, but shield linker.
162302 */
163303#ifdef CONFIG_PAGE_POOL
164- if (!page_pool_is_last_frag (pool , page ))
304+ if (!page_pool_is_last_frag (page ))
165305 return ;
166306
167307 page_pool_put_defragged_page (pool , page , dma_sync_size , allow_direct );
@@ -200,6 +340,20 @@ static inline void page_pool_recycle_direct(struct page_pool *pool,
200340#define PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA \
201341 (sizeof(dma_addr_t) > sizeof(unsigned long))
202342
343+ /**
344+ * page_pool_free_va() - free a va into the page_pool
345+ * @pool: pool from which va was allocated
346+ * @va: va to be freed
347+ * @allow_direct: freed by the consumer, allow lockless caching
348+ *
349+ * Free a va allocated from page_pool_allo_va().
350+ */
351+ static inline void page_pool_free_va (struct page_pool * pool , void * va ,
352+ bool allow_direct )
353+ {
354+ page_pool_put_page (pool , virt_to_head_page (va ), -1 , allow_direct );
355+ }
356+
203357/**
204358 * page_pool_get_dma_addr() - Retrieve the stored DMA address.
205359 * @page: page allocated from a page pool
0 commit comments