@@ -220,31 +220,6 @@ static inline struct hugepage_subpool *subpool_vma(struct vm_area_struct *vma)
220220 return subpool_inode (file_inode (vma -> vm_file ));
221221}
222222
223- /*
224- * Region tracking -- allows tracking of reservations and instantiated pages
225- * across the pages in a mapping.
226- *
227- * The region data structures are embedded into a resv_map and protected
228- * by a resv_map's lock. The set of regions within the resv_map represent
229- * reservations for huge pages, or huge pages that have already been
230- * instantiated within the map. The from and to elements are huge page
231- * indicies into the associated mapping. from indicates the starting index
232- * of the region. to represents the first index past the end of the region.
233- *
234- * For example, a file region structure with from == 0 and to == 4 represents
235- * four huge pages in a mapping. It is important to note that the to element
236- * represents the first element past the end of the region. This is used in
237- * arithmetic as 4(to) - 0(from) = 4 huge pages in the region.
238- *
239- * Interval notation of the form [from, to) will be used to indicate that
240- * the endpoint from is inclusive and to is exclusive.
241- */
242- struct file_region {
243- struct list_head link ;
244- long from ;
245- long to ;
246- };
247-
248223/* Helper that removes a struct file_region from the resv_map cache and returns
249224 * it for use.
250225 */
@@ -266,14 +241,51 @@ get_file_region_entry_from_cache(struct resv_map *resv, long from, long to)
266241 return nrg ;
267242}
268243
244+ static void copy_hugetlb_cgroup_uncharge_info (struct file_region * nrg ,
245+ struct file_region * rg )
246+ {
247+ #ifdef CONFIG_CGROUP_HUGETLB
248+ nrg -> reservation_counter = rg -> reservation_counter ;
249+ nrg -> css = rg -> css ;
250+ if (rg -> css )
251+ css_get (rg -> css );
252+ #endif
253+ }
254+
255+ /* Helper that records hugetlb_cgroup uncharge info. */
256+ static void record_hugetlb_cgroup_uncharge_info (struct hugetlb_cgroup * h_cg ,
257+ struct hstate * h ,
258+ struct resv_map * resv ,
259+ struct file_region * nrg )
260+ {
261+ #ifdef CONFIG_CGROUP_HUGETLB
262+ if (h_cg ) {
263+ nrg -> reservation_counter =
264+ & h_cg -> rsvd_hugepage [hstate_index (h )];
265+ nrg -> css = & h_cg -> css ;
266+ if (!resv -> pages_per_hpage )
267+ resv -> pages_per_hpage = pages_per_huge_page (h );
268+ /* pages_per_hpage should be the same for all entries in
269+ * a resv_map.
270+ */
271+ VM_BUG_ON (resv -> pages_per_hpage != pages_per_huge_page (h ));
272+ } else {
273+ nrg -> reservation_counter = NULL ;
274+ nrg -> css = NULL ;
275+ }
276+ #endif
277+ }
278+
269279/* Must be called with resv->lock held. Calling this with count_only == true
270280 * will count the number of pages to be added but will not modify the linked
271281 * list. If regions_needed != NULL and count_only == true, then regions_needed
272282 * will indicate the number of file_regions needed in the cache to carry out to
273283 * add the regions for this range.
274284 */
275285static long add_reservation_in_range (struct resv_map * resv , long f , long t ,
276- long * regions_needed , bool count_only )
286+ struct hugetlb_cgroup * h_cg ,
287+ struct hstate * h , long * regions_needed ,
288+ bool count_only )
277289{
278290 long add = 0 ;
279291 struct list_head * head = & resv -> regions ;
@@ -312,6 +324,8 @@ static long add_reservation_in_range(struct resv_map *resv, long f, long t,
312324 if (!count_only ) {
313325 nrg = get_file_region_entry_from_cache (
314326 resv , last_accounted_offset , rg -> from );
327+ record_hugetlb_cgroup_uncharge_info (h_cg , h ,
328+ resv , nrg );
315329 list_add (& nrg -> link , rg -> link .prev );
316330 } else if (regions_needed )
317331 * regions_needed += 1 ;
@@ -328,6 +342,7 @@ static long add_reservation_in_range(struct resv_map *resv, long f, long t,
328342 if (!count_only ) {
329343 nrg = get_file_region_entry_from_cache (
330344 resv , last_accounted_offset , t );
345+ record_hugetlb_cgroup_uncharge_info (h_cg , h , resv , nrg );
331346 list_add (& nrg -> link , rg -> link .prev );
332347 } else if (regions_needed )
333348 * regions_needed += 1 ;
@@ -416,15 +431,17 @@ static int allocate_file_region_entries(struct resv_map *resv,
416431 * 1 page will only require at most 1 entry.
417432 */
418433static long region_add (struct resv_map * resv , long f , long t ,
419- long in_regions_needed )
434+ long in_regions_needed , struct hstate * h ,
435+ struct hugetlb_cgroup * h_cg )
420436{
421437 long add = 0 , actual_regions_needed = 0 ;
422438
423439 spin_lock (& resv -> lock );
424440retry :
425441
426442 /* Count how many regions are actually needed to execute this add. */
427- add_reservation_in_range (resv , f , t , & actual_regions_needed , true);
443+ add_reservation_in_range (resv , f , t , NULL , NULL , & actual_regions_needed ,
444+ true);
428445
429446 /*
430447 * Check for sufficient descriptors in the cache to accommodate
@@ -452,7 +469,7 @@ static long region_add(struct resv_map *resv, long f, long t,
452469 goto retry ;
453470 }
454471
455- add = add_reservation_in_range (resv , f , t , NULL , false);
472+ add = add_reservation_in_range (resv , f , t , h_cg , h , NULL , false);
456473
457474 resv -> adds_in_progress -= in_regions_needed ;
458475
@@ -489,7 +506,8 @@ static long region_chg(struct resv_map *resv, long f, long t,
489506 spin_lock (& resv -> lock );
490507
491508 /* Count how many hugepages in this range are NOT respresented. */
492- chg = add_reservation_in_range (resv , f , t , out_regions_needed , true);
509+ chg = add_reservation_in_range (resv , f , t , NULL , NULL ,
510+ out_regions_needed , true);
493511
494512 if (* out_regions_needed == 0 )
495513 * out_regions_needed = 1 ;
@@ -589,18 +607,26 @@ static long region_del(struct resv_map *resv, long f, long t)
589607 /* New entry for end of split region */
590608 nrg -> from = t ;
591609 nrg -> to = rg -> to ;
610+
611+ copy_hugetlb_cgroup_uncharge_info (nrg , rg );
612+
592613 INIT_LIST_HEAD (& nrg -> link );
593614
594615 /* Original entry is trimmed */
595616 rg -> to = f ;
596617
618+ hugetlb_cgroup_uncharge_file_region (
619+ resv , rg , nrg -> to - nrg -> from );
620+
597621 list_add (& nrg -> link , & rg -> link );
598622 nrg = NULL ;
599623 break ;
600624 }
601625
602626 if (f <= rg -> from && t >= rg -> to ) { /* Remove entire region */
603627 del += rg -> to - rg -> from ;
628+ hugetlb_cgroup_uncharge_file_region (resv , rg ,
629+ rg -> to - rg -> from );
604630 list_del (& rg -> link );
605631 kfree (rg );
606632 continue ;
@@ -609,9 +635,15 @@ static long region_del(struct resv_map *resv, long f, long t)
609635 if (f <= rg -> from ) { /* Trim beginning of region */
610636 del += t - rg -> from ;
611637 rg -> from = t ;
638+
639+ hugetlb_cgroup_uncharge_file_region (resv , rg ,
640+ t - rg -> from );
612641 } else { /* Trim end of region */
613642 del += rg -> to - f ;
614643 rg -> to = f ;
644+
645+ hugetlb_cgroup_uncharge_file_region (resv , rg ,
646+ rg -> to - f );
615647 }
616648 }
617649
@@ -2124,7 +2156,7 @@ static long __vma_reservation_common(struct hstate *h,
21242156 VM_BUG_ON (dummy_out_regions_needed != 1 );
21252157 break ;
21262158 case VMA_COMMIT_RESV :
2127- ret = region_add (resv , idx , idx + 1 , 1 );
2159+ ret = region_add (resv , idx , idx + 1 , 1 , NULL , NULL );
21282160 /* region_add calls of range 1 should never fail. */
21292161 VM_BUG_ON (ret < 0 );
21302162 break ;
@@ -2134,7 +2166,7 @@ static long __vma_reservation_common(struct hstate *h,
21342166 break ;
21352167 case VMA_ADD_RESV :
21362168 if (vma -> vm_flags & VM_MAYSHARE ) {
2137- ret = region_add (resv , idx , idx + 1 , 1 );
2169+ ret = region_add (resv , idx , idx + 1 , 1 , NULL , NULL );
21382170 /* region_add calls of range 1 should never fail. */
21392171 VM_BUG_ON (ret < 0 );
21402172 } else {
@@ -4830,7 +4862,7 @@ int hugetlb_reserve_pages(struct inode *inode,
48304862 struct hstate * h = hstate_inode (inode );
48314863 struct hugepage_subpool * spool = subpool_inode (inode );
48324864 struct resv_map * resv_map ;
4833- struct hugetlb_cgroup * h_cg ;
4865+ struct hugetlb_cgroup * h_cg = NULL ;
48344866 long gbl_reserve , regions_needed = 0 ;
48354867
48364868 /* This should never happen */
@@ -4871,19 +4903,6 @@ int hugetlb_reserve_pages(struct inode *inode,
48714903
48724904 chg = to - from ;
48734905
4874- if (hugetlb_cgroup_charge_cgroup_rsvd (
4875- hstate_index (h ), chg * pages_per_huge_page (h ),
4876- & h_cg )) {
4877- kref_put (& resv_map -> refs , resv_map_release );
4878- return - ENOMEM ;
4879- }
4880-
4881- /*
4882- * Since this branch handles private mappings, we attach the
4883- * counter to uncharge for this reservation off resv_map.
4884- */
4885- resv_map_set_hugetlb_cgroup_uncharge_info (resv_map , h_cg , h );
4886-
48874906 set_vma_resv_map (vma , resv_map );
48884907 set_vma_resv_flags (vma , HPAGE_RESV_OWNER );
48894908 }
@@ -4893,6 +4912,21 @@ int hugetlb_reserve_pages(struct inode *inode,
48934912 goto out_err ;
48944913 }
48954914
4915+ ret = hugetlb_cgroup_charge_cgroup_rsvd (
4916+ hstate_index (h ), chg * pages_per_huge_page (h ), & h_cg );
4917+
4918+ if (ret < 0 ) {
4919+ ret = - ENOMEM ;
4920+ goto out_err ;
4921+ }
4922+
4923+ if (vma && !(vma -> vm_flags & VM_MAYSHARE ) && h_cg ) {
4924+ /* For private mappings, the hugetlb_cgroup uncharge info hangs
4925+ * of the resv_map.
4926+ */
4927+ resv_map_set_hugetlb_cgroup_uncharge_info (resv_map , h_cg , h );
4928+ }
4929+
48964930 /*
48974931 * There must be enough pages in the subpool for the mapping. If
48984932 * the subpool has a minimum size, there may be some global
@@ -4901,7 +4935,7 @@ int hugetlb_reserve_pages(struct inode *inode,
49014935 gbl_reserve = hugepage_subpool_get_pages (spool , chg );
49024936 if (gbl_reserve < 0 ) {
49034937 ret = - ENOSPC ;
4904- goto out_err ;
4938+ goto out_uncharge_cgroup ;
49054939 }
49064940
49074941 /*
@@ -4910,9 +4944,7 @@ int hugetlb_reserve_pages(struct inode *inode,
49104944 */
49114945 ret = hugetlb_acct_memory (h , gbl_reserve );
49124946 if (ret < 0 ) {
4913- /* put back original number of pages, chg */
4914- (void )hugepage_subpool_put_pages (spool , chg );
4915- goto out_err ;
4947+ goto out_put_pages ;
49164948 }
49174949
49184950 /*
@@ -4927,13 +4959,11 @@ int hugetlb_reserve_pages(struct inode *inode,
49274959 * else has to be done for private mappings here
49284960 */
49294961 if (!vma || vma -> vm_flags & VM_MAYSHARE ) {
4930- add = region_add (resv_map , from , to , regions_needed );
4962+ add = region_add (resv_map , from , to , regions_needed , h , h_cg );
49314963
49324964 if (unlikely (add < 0 )) {
49334965 hugetlb_acct_memory (h , - gbl_reserve );
4934- /* put back original number of pages, chg */
4935- (void )hugepage_subpool_put_pages (spool , chg );
4936- goto out_err ;
4966+ goto out_put_pages ;
49374967 } else if (unlikely (chg > add )) {
49384968 /*
49394969 * pages in this range were added to the reserve
@@ -4944,12 +4974,22 @@ int hugetlb_reserve_pages(struct inode *inode,
49444974 */
49454975 long rsv_adjust ;
49464976
4977+ hugetlb_cgroup_uncharge_cgroup_rsvd (
4978+ hstate_index (h ),
4979+ (chg - add ) * pages_per_huge_page (h ), h_cg );
4980+
49474981 rsv_adjust = hugepage_subpool_put_pages (spool ,
49484982 chg - add );
49494983 hugetlb_acct_memory (h , - rsv_adjust );
49504984 }
49514985 }
49524986 return 0 ;
4987+ out_put_pages :
4988+ /* put back original number of pages, chg */
4989+ (void )hugepage_subpool_put_pages (spool , chg );
4990+ out_uncharge_cgroup :
4991+ hugetlb_cgroup_uncharge_cgroup_rsvd (hstate_index (h ),
4992+ chg * pages_per_huge_page (h ), h_cg );
49534993out_err :
49544994 if (!vma || vma -> vm_flags & VM_MAYSHARE )
49554995 /* Only call region_abort if the region_chg succeeded but the
0 commit comments