Skip to content

Commit d85aecf

Browse files
MiaoheLintorvalds
authored andcommitted
hugetlb_cgroup: fix imbalanced css_get and css_put pair for shared mappings
The current implementation of hugetlb_cgroup for shared mappings could have different behavior. Consider the following two scenarios: 1.Assume initial css reference count of hugetlb_cgroup is 1: 1.1 Call hugetlb_reserve_pages with from = 1, to = 2. So css reference count is 2 associated with 1 file_region. 1.2 Call hugetlb_reserve_pages with from = 2, to = 3. So css reference count is 3 associated with 2 file_region. 1.3 coalesce_file_region will coalesce these two file_regions into one. So css reference count is 3 associated with 1 file_region now. 2.Assume initial css reference count of hugetlb_cgroup is 1 again: 2.1 Call hugetlb_reserve_pages with from = 1, to = 3. So css reference count is 2 associated with 1 file_region. Therefore, we might have one file_region while holding one or more css reference counts. This inconsistency could lead to imbalanced css_get() and css_put() pair. If we do css_put one by one (i.g. hole punch case), scenario 2 would put one more css reference. If we do css_put all together (i.g. truncate case), scenario 1 will leak one css reference. The imbalanced css_get() and css_put() pair would result in a non-zero reference when we try to destroy the hugetlb cgroup. The hugetlb cgroup directory is removed __but__ associated resource is not freed. This might result in OOM or can not create a new hugetlb cgroup in a busy workload ultimately. In order to fix this, we have to make sure that one file_region must hold exactly one css reference. So in coalesce_file_region case, we should release one css reference before coalescence. Also only put css reference when the entire file_region is removed. The last thing to note is that the caller of region_add() will only hold one reference to h_cg->css for the whole contiguous reservation region. But this area might be scattered when there are already some file_regions reside in it. As a result, many file_regions may share only one h_cg->css reference. In order to ensure that one file_region must hold exactly one css reference, we should do css_get() for each file_region and release the reference held by caller when they are done. [[email protected]: fix imbalanced css_get and css_put pair for shared mappings] Link: https://lkml.kernel.org/r/[email protected] Link: https://lkml.kernel.org/r/[email protected] Fixes: 075a61d ("hugetlb_cgroup: add accounting for shared mappings") Reported-by: kernel test robot <[email protected]> (auto build test ERROR) Signed-off-by: Miaohe Lin <[email protected]> Reviewed-by: Mike Kravetz <[email protected]> Cc: Aneesh Kumar K.V <[email protected]> Cc: Wanpeng Li <[email protected]> Cc: Mina Almasry <[email protected]> Cc: <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 7acac4b commit d85aecf

File tree

3 files changed

+58
-8
lines changed

3 files changed

+58
-8
lines changed

include/linux/hugetlb_cgroup.h

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,11 @@ static inline bool hugetlb_cgroup_disabled(void)
113113
return !cgroup_subsys_enabled(hugetlb_cgrp_subsys);
114114
}
115115

116+
static inline void hugetlb_cgroup_put_rsvd_cgroup(struct hugetlb_cgroup *h_cg)
117+
{
118+
css_put(&h_cg->css);
119+
}
120+
116121
extern int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
117122
struct hugetlb_cgroup **ptr);
118123
extern int hugetlb_cgroup_charge_cgroup_rsvd(int idx, unsigned long nr_pages,
@@ -138,7 +143,8 @@ extern void hugetlb_cgroup_uncharge_counter(struct resv_map *resv,
138143

139144
extern void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv,
140145
struct file_region *rg,
141-
unsigned long nr_pages);
146+
unsigned long nr_pages,
147+
bool region_del);
142148

143149
extern void hugetlb_cgroup_file_init(void) __init;
144150
extern void hugetlb_cgroup_migrate(struct page *oldhpage,
@@ -147,7 +153,8 @@ extern void hugetlb_cgroup_migrate(struct page *oldhpage,
147153
#else
148154
static inline void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv,
149155
struct file_region *rg,
150-
unsigned long nr_pages)
156+
unsigned long nr_pages,
157+
bool region_del)
151158
{
152159
}
153160

@@ -185,6 +192,10 @@ static inline bool hugetlb_cgroup_disabled(void)
185192
return true;
186193
}
187194

195+
static inline void hugetlb_cgroup_put_rsvd_cgroup(struct hugetlb_cgroup *h_cg)
196+
{
197+
}
198+
188199
static inline int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
189200
struct hugetlb_cgroup **ptr)
190201
{

mm/hugetlb.c

Lines changed: 37 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,17 @@ static void record_hugetlb_cgroup_uncharge_info(struct hugetlb_cgroup *h_cg,
280280
nrg->reservation_counter =
281281
&h_cg->rsvd_hugepage[hstate_index(h)];
282282
nrg->css = &h_cg->css;
283+
/*
284+
* The caller will hold exactly one h_cg->css reference for the
285+
* whole contiguous reservation region. But this area might be
286+
* scattered when there are already some file_regions reside in
287+
* it. As a result, many file_regions may share only one css
288+
* reference. In order to ensure that one file_region must hold
289+
* exactly one h_cg->css reference, we should do css_get for
290+
* each file_region and leave the reference held by caller
291+
* untouched.
292+
*/
293+
css_get(&h_cg->css);
283294
if (!resv->pages_per_hpage)
284295
resv->pages_per_hpage = pages_per_huge_page(h);
285296
/* pages_per_hpage should be the same for all entries in
@@ -293,6 +304,14 @@ static void record_hugetlb_cgroup_uncharge_info(struct hugetlb_cgroup *h_cg,
293304
#endif
294305
}
295306

307+
static void put_uncharge_info(struct file_region *rg)
308+
{
309+
#ifdef CONFIG_CGROUP_HUGETLB
310+
if (rg->css)
311+
css_put(rg->css);
312+
#endif
313+
}
314+
296315
static bool has_same_uncharge_info(struct file_region *rg,
297316
struct file_region *org)
298317
{
@@ -316,6 +335,7 @@ static void coalesce_file_region(struct resv_map *resv, struct file_region *rg)
316335
prg->to = rg->to;
317336

318337
list_del(&rg->link);
338+
put_uncharge_info(rg);
319339
kfree(rg);
320340

321341
rg = prg;
@@ -327,6 +347,7 @@ static void coalesce_file_region(struct resv_map *resv, struct file_region *rg)
327347
nrg->from = rg->from;
328348

329349
list_del(&rg->link);
350+
put_uncharge_info(rg);
330351
kfree(rg);
331352
}
332353
}
@@ -662,7 +683,7 @@ static long region_del(struct resv_map *resv, long f, long t)
662683

663684
del += t - f;
664685
hugetlb_cgroup_uncharge_file_region(
665-
resv, rg, t - f);
686+
resv, rg, t - f, false);
666687

667688
/* New entry for end of split region */
668689
nrg->from = t;
@@ -683,21 +704,21 @@ static long region_del(struct resv_map *resv, long f, long t)
683704
if (f <= rg->from && t >= rg->to) { /* Remove entire region */
684705
del += rg->to - rg->from;
685706
hugetlb_cgroup_uncharge_file_region(resv, rg,
686-
rg->to - rg->from);
707+
rg->to - rg->from, true);
687708
list_del(&rg->link);
688709
kfree(rg);
689710
continue;
690711
}
691712

692713
if (f <= rg->from) { /* Trim beginning of region */
693714
hugetlb_cgroup_uncharge_file_region(resv, rg,
694-
t - rg->from);
715+
t - rg->from, false);
695716

696717
del += t - rg->from;
697718
rg->from = t;
698719
} else { /* Trim end of region */
699720
hugetlb_cgroup_uncharge_file_region(resv, rg,
700-
rg->to - f);
721+
rg->to - f, false);
701722

702723
del += rg->to - f;
703724
rg->to = f;
@@ -5187,13 +5208,25 @@ bool hugetlb_reserve_pages(struct inode *inode,
51875208
*/
51885209
long rsv_adjust;
51895210

5211+
/*
5212+
* hugetlb_cgroup_uncharge_cgroup_rsvd() will put the
5213+
* reference to h_cg->css. See comment below for detail.
5214+
*/
51905215
hugetlb_cgroup_uncharge_cgroup_rsvd(
51915216
hstate_index(h),
51925217
(chg - add) * pages_per_huge_page(h), h_cg);
51935218

51945219
rsv_adjust = hugepage_subpool_put_pages(spool,
51955220
chg - add);
51965221
hugetlb_acct_memory(h, -rsv_adjust);
5222+
} else if (h_cg) {
5223+
/*
5224+
* The file_regions will hold their own reference to
5225+
* h_cg->css. So we should release the reference held
5226+
* via hugetlb_cgroup_charge_cgroup_rsvd() when we are
5227+
* done.
5228+
*/
5229+
hugetlb_cgroup_put_rsvd_cgroup(h_cg);
51975230
}
51985231
}
51995232
return true;

mm/hugetlb_cgroup.c

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -391,7 +391,8 @@ void hugetlb_cgroup_uncharge_counter(struct resv_map *resv, unsigned long start,
391391

392392
void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv,
393393
struct file_region *rg,
394-
unsigned long nr_pages)
394+
unsigned long nr_pages,
395+
bool region_del)
395396
{
396397
if (hugetlb_cgroup_disabled() || !resv || !rg || !nr_pages)
397398
return;
@@ -400,7 +401,12 @@ void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv,
400401
!resv->reservation_counter) {
401402
page_counter_uncharge(rg->reservation_counter,
402403
nr_pages * resv->pages_per_hpage);
403-
css_put(rg->css);
404+
/*
405+
* Only do css_put(rg->css) when we delete the entire region
406+
* because one file_region must hold exactly one css reference.
407+
*/
408+
if (region_del)
409+
css_put(rg->css);
404410
}
405411
}
406412

0 commit comments

Comments
 (0)