Skip to content

Commit 9a4e9f3

Browse files
kvaneeshtorvalds
authored andcommitted
mm: update get_user_pages_longterm to migrate pages allocated from CMA region
This patch updates get_user_pages_longterm to migrate pages allocated out of CMA region. This makes sure that we don't keep non-movable pages (due to page reference count) in the CMA area. This will be used by ppc64 in a later patch to avoid pinning pages in the CMA region. ppc64 uses CMA region for allocation of the hardware page table (hash page table) and not able to migrate pages out of CMA region results in page table allocation failures. One case where we hit this easy is when a guest using a VFIO passthrough device. VFIO locks all the guest's memory and if the guest memory is backed by CMA region, it becomes unmovable resulting in fragmenting the CMA and possibly preventing other guests from allocation a large enough hash page table. NOTE: We allocate the new page without using __GFP_THISNODE Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Aneesh Kumar K.V <[email protected]> Cc: Alexey Kardashevskiy <[email protected]> Cc: Andrea Arcangeli <[email protected]> Cc: David Gibson <[email protected]> Cc: Michael Ellerman <[email protected]> Cc: Michal Hocko <[email protected]> Cc: Mel Gorman <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent d7fefcc commit 9a4e9f3

File tree

4 files changed

+182
-27
lines changed

4 files changed

+182
-27
lines changed

include/linux/hugetlb.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -371,6 +371,8 @@ struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
371371
nodemask_t *nmask);
372372
struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma,
373373
unsigned long address);
374+
struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask,
375+
int nid, nodemask_t *nmask);
374376
int huge_add_to_page_cache(struct page *page, struct address_space *mapping,
375377
pgoff_t idx);
376378

include/linux/mm.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1536,7 +1536,8 @@ long get_user_pages_locked(unsigned long start, unsigned long nr_pages,
15361536
unsigned int gup_flags, struct page **pages, int *locked);
15371537
long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
15381538
struct page **pages, unsigned int gup_flags);
1539-
#ifdef CONFIG_FS_DAX
1539+
1540+
#if defined(CONFIG_FS_DAX) || defined(CONFIG_CMA)
15401541
long get_user_pages_longterm(unsigned long start, unsigned long nr_pages,
15411542
unsigned int gup_flags, struct page **pages,
15421543
struct vm_area_struct **vmas);

mm/gup.c

Lines changed: 176 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@
1313
#include <linux/sched/signal.h>
1414
#include <linux/rwsem.h>
1515
#include <linux/hugetlb.h>
16+
#include <linux/migrate.h>
17+
#include <linux/mm_inline.h>
18+
#include <linux/sched/mm.h>
1619

1720
#include <asm/mmu_context.h>
1821
#include <asm/pgtable.h>
@@ -1126,7 +1129,167 @@ long get_user_pages(unsigned long start, unsigned long nr_pages,
11261129
}
11271130
EXPORT_SYMBOL(get_user_pages);
11281131

1132+
#if defined(CONFIG_FS_DAX) || defined (CONFIG_CMA)
1133+
11291134
#ifdef CONFIG_FS_DAX
1135+
static bool check_dax_vmas(struct vm_area_struct **vmas, long nr_pages)
1136+
{
1137+
long i;
1138+
struct vm_area_struct *vma_prev = NULL;
1139+
1140+
for (i = 0; i < nr_pages; i++) {
1141+
struct vm_area_struct *vma = vmas[i];
1142+
1143+
if (vma == vma_prev)
1144+
continue;
1145+
1146+
vma_prev = vma;
1147+
1148+
if (vma_is_fsdax(vma))
1149+
return true;
1150+
}
1151+
return false;
1152+
}
1153+
#else
1154+
static inline bool check_dax_vmas(struct vm_area_struct **vmas, long nr_pages)
1155+
{
1156+
return false;
1157+
}
1158+
#endif
1159+
1160+
#ifdef CONFIG_CMA
1161+
static struct page *new_non_cma_page(struct page *page, unsigned long private)
1162+
{
1163+
/*
1164+
* We want to make sure we allocate the new page from the same node
1165+
* as the source page.
1166+
*/
1167+
int nid = page_to_nid(page);
1168+
/*
1169+
* Trying to allocate a page for migration. Ignore allocation
1170+
* failure warnings. We don't force __GFP_THISNODE here because
1171+
* this node here is the node where we have CMA reservation and
1172+
* in some case these nodes will have really less non movable
1173+
* allocation memory.
1174+
*/
1175+
gfp_t gfp_mask = GFP_USER | __GFP_NOWARN;
1176+
1177+
if (PageHighMem(page))
1178+
gfp_mask |= __GFP_HIGHMEM;
1179+
1180+
#ifdef CONFIG_HUGETLB_PAGE
1181+
if (PageHuge(page)) {
1182+
struct hstate *h = page_hstate(page);
1183+
/*
1184+
* We don't want to dequeue from the pool because pool pages will
1185+
* mostly be from the CMA region.
1186+
*/
1187+
return alloc_migrate_huge_page(h, gfp_mask, nid, NULL);
1188+
}
1189+
#endif
1190+
if (PageTransHuge(page)) {
1191+
struct page *thp;
1192+
/*
1193+
* ignore allocation failure warnings
1194+
*/
1195+
gfp_t thp_gfpmask = GFP_TRANSHUGE | __GFP_NOWARN;
1196+
1197+
/*
1198+
* Remove the movable mask so that we don't allocate from
1199+
* CMA area again.
1200+
*/
1201+
thp_gfpmask &= ~__GFP_MOVABLE;
1202+
thp = __alloc_pages_node(nid, thp_gfpmask, HPAGE_PMD_ORDER);
1203+
if (!thp)
1204+
return NULL;
1205+
prep_transhuge_page(thp);
1206+
return thp;
1207+
}
1208+
1209+
return __alloc_pages_node(nid, gfp_mask, 0);
1210+
}
1211+
1212+
static long check_and_migrate_cma_pages(unsigned long start, long nr_pages,
1213+
unsigned int gup_flags,
1214+
struct page **pages,
1215+
struct vm_area_struct **vmas)
1216+
{
1217+
long i;
1218+
bool drain_allow = true;
1219+
bool migrate_allow = true;
1220+
LIST_HEAD(cma_page_list);
1221+
1222+
check_again:
1223+
for (i = 0; i < nr_pages; i++) {
1224+
/*
1225+
* If we get a page from the CMA zone, since we are going to
1226+
* be pinning these entries, we might as well move them out
1227+
* of the CMA zone if possible.
1228+
*/
1229+
if (is_migrate_cma_page(pages[i])) {
1230+
1231+
struct page *head = compound_head(pages[i]);
1232+
1233+
if (PageHuge(head)) {
1234+
isolate_huge_page(head, &cma_page_list);
1235+
} else {
1236+
if (!PageLRU(head) && drain_allow) {
1237+
lru_add_drain_all();
1238+
drain_allow = false;
1239+
}
1240+
1241+
if (!isolate_lru_page(head)) {
1242+
list_add_tail(&head->lru, &cma_page_list);
1243+
mod_node_page_state(page_pgdat(head),
1244+
NR_ISOLATED_ANON +
1245+
page_is_file_cache(head),
1246+
hpage_nr_pages(head));
1247+
}
1248+
}
1249+
}
1250+
}
1251+
1252+
if (!list_empty(&cma_page_list)) {
1253+
/*
1254+
* drop the above get_user_pages reference.
1255+
*/
1256+
for (i = 0; i < nr_pages; i++)
1257+
put_page(pages[i]);
1258+
1259+
if (migrate_pages(&cma_page_list, new_non_cma_page,
1260+
NULL, 0, MIGRATE_SYNC, MR_CONTIG_RANGE)) {
1261+
/*
1262+
* some of the pages failed migration. Do get_user_pages
1263+
* without migration.
1264+
*/
1265+
migrate_allow = false;
1266+
1267+
if (!list_empty(&cma_page_list))
1268+
putback_movable_pages(&cma_page_list);
1269+
}
1270+
/*
1271+
* We did migrate all the pages, Try to get the page references again
1272+
* migrating any new CMA pages which we failed to isolate earlier.
1273+
*/
1274+
nr_pages = get_user_pages(start, nr_pages, gup_flags, pages, vmas);
1275+
if ((nr_pages > 0) && migrate_allow) {
1276+
drain_allow = true;
1277+
goto check_again;
1278+
}
1279+
}
1280+
1281+
return nr_pages;
1282+
}
1283+
#else
1284+
static inline long check_and_migrate_cma_pages(unsigned long start, long nr_pages,
1285+
unsigned int gup_flags,
1286+
struct page **pages,
1287+
struct vm_area_struct **vmas)
1288+
{
1289+
return nr_pages;
1290+
}
1291+
#endif
1292+
11301293
/*
11311294
* This is the same as get_user_pages() in that it assumes we are
11321295
* operating on the current task's mm, but it goes further to validate
@@ -1140,11 +1303,11 @@ EXPORT_SYMBOL(get_user_pages);
11401303
* Contrast this to iov_iter_get_pages() usages which are transient.
11411304
*/
11421305
long get_user_pages_longterm(unsigned long start, unsigned long nr_pages,
1143-
unsigned int gup_flags, struct page **pages,
1144-
struct vm_area_struct **vmas_arg)
1306+
unsigned int gup_flags, struct page **pages,
1307+
struct vm_area_struct **vmas_arg)
11451308
{
11461309
struct vm_area_struct **vmas = vmas_arg;
1147-
struct vm_area_struct *vma_prev = NULL;
1310+
unsigned long flags;
11481311
long rc, i;
11491312

11501313
if (!pages)
@@ -1157,31 +1320,20 @@ long get_user_pages_longterm(unsigned long start, unsigned long nr_pages,
11571320
return -ENOMEM;
11581321
}
11591322

1323+
flags = memalloc_nocma_save();
11601324
rc = get_user_pages(start, nr_pages, gup_flags, pages, vmas);
1325+
memalloc_nocma_restore(flags);
1326+
if (rc < 0)
1327+
goto out;
11611328

1162-
for (i = 0; i < rc; i++) {
1163-
struct vm_area_struct *vma = vmas[i];
1164-
1165-
if (vma == vma_prev)
1166-
continue;
1167-
1168-
vma_prev = vma;
1169-
1170-
if (vma_is_fsdax(vma))
1171-
break;
1172-
}
1173-
1174-
/*
1175-
* Either get_user_pages() failed, or the vma validation
1176-
* succeeded, in either case we don't need to put_page() before
1177-
* returning.
1178-
*/
1179-
if (i >= rc)
1329+
if (check_dax_vmas(vmas, rc)) {
1330+
for (i = 0; i < rc; i++)
1331+
put_page(pages[i]);
1332+
rc = -EOPNOTSUPP;
11801333
goto out;
1334+
}
11811335

1182-
for (i = 0; i < rc; i++)
1183-
put_page(pages[i]);
1184-
rc = -EOPNOTSUPP;
1336+
rc = check_and_migrate_cma_pages(start, rc, gup_flags, pages, vmas);
11851337
out:
11861338
if (vmas != vmas_arg)
11871339
kfree(vmas);

mm/hugetlb.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1587,8 +1587,8 @@ static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask,
15871587
return page;
15881588
}
15891589

1590-
static struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask,
1591-
int nid, nodemask_t *nmask)
1590+
struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask,
1591+
int nid, nodemask_t *nmask)
15921592
{
15931593
struct page *page;
15941594

0 commit comments

Comments
 (0)