1313#include <linux/sched/signal.h>
1414#include <linux/rwsem.h>
1515#include <linux/hugetlb.h>
16+ #include <linux/migrate.h>
17+ #include <linux/mm_inline.h>
18+ #include <linux/sched/mm.h>
1619
1720#include <asm/mmu_context.h>
1821#include <asm/pgtable.h>
@@ -1126,7 +1129,167 @@ long get_user_pages(unsigned long start, unsigned long nr_pages,
11261129}
11271130EXPORT_SYMBOL (get_user_pages );
11281131
1132+ #if defined(CONFIG_FS_DAX ) || defined (CONFIG_CMA )
1133+
11291134#ifdef CONFIG_FS_DAX
1135+ static bool check_dax_vmas (struct vm_area_struct * * vmas , long nr_pages )
1136+ {
1137+ long i ;
1138+ struct vm_area_struct * vma_prev = NULL ;
1139+
1140+ for (i = 0 ; i < nr_pages ; i ++ ) {
1141+ struct vm_area_struct * vma = vmas [i ];
1142+
1143+ if (vma == vma_prev )
1144+ continue ;
1145+
1146+ vma_prev = vma ;
1147+
1148+ if (vma_is_fsdax (vma ))
1149+ return true;
1150+ }
1151+ return false;
1152+ }
1153+ #else
1154+ static inline bool check_dax_vmas (struct vm_area_struct * * vmas , long nr_pages )
1155+ {
1156+ return false;
1157+ }
1158+ #endif
1159+
1160+ #ifdef CONFIG_CMA
1161+ static struct page * new_non_cma_page (struct page * page , unsigned long private )
1162+ {
1163+ /*
1164+ * We want to make sure we allocate the new page from the same node
1165+ * as the source page.
1166+ */
1167+ int nid = page_to_nid (page );
1168+ /*
1169+ * Trying to allocate a page for migration. Ignore allocation
1170+ * failure warnings. We don't force __GFP_THISNODE here because
1171+ * this node here is the node where we have CMA reservation and
1172+ * in some case these nodes will have really less non movable
1173+ * allocation memory.
1174+ */
1175+ gfp_t gfp_mask = GFP_USER | __GFP_NOWARN ;
1176+
1177+ if (PageHighMem (page ))
1178+ gfp_mask |= __GFP_HIGHMEM ;
1179+
1180+ #ifdef CONFIG_HUGETLB_PAGE
1181+ if (PageHuge (page )) {
1182+ struct hstate * h = page_hstate (page );
1183+ /*
1184+ * We don't want to dequeue from the pool because pool pages will
1185+ * mostly be from the CMA region.
1186+ */
1187+ return alloc_migrate_huge_page (h , gfp_mask , nid , NULL );
1188+ }
1189+ #endif
1190+ if (PageTransHuge (page )) {
1191+ struct page * thp ;
1192+ /*
1193+ * ignore allocation failure warnings
1194+ */
1195+ gfp_t thp_gfpmask = GFP_TRANSHUGE | __GFP_NOWARN ;
1196+
1197+ /*
1198+ * Remove the movable mask so that we don't allocate from
1199+ * CMA area again.
1200+ */
1201+ thp_gfpmask &= ~__GFP_MOVABLE ;
1202+ thp = __alloc_pages_node (nid , thp_gfpmask , HPAGE_PMD_ORDER );
1203+ if (!thp )
1204+ return NULL ;
1205+ prep_transhuge_page (thp );
1206+ return thp ;
1207+ }
1208+
1209+ return __alloc_pages_node (nid , gfp_mask , 0 );
1210+ }
1211+
1212+ static long check_and_migrate_cma_pages (unsigned long start , long nr_pages ,
1213+ unsigned int gup_flags ,
1214+ struct page * * pages ,
1215+ struct vm_area_struct * * vmas )
1216+ {
1217+ long i ;
1218+ bool drain_allow = true;
1219+ bool migrate_allow = true;
1220+ LIST_HEAD (cma_page_list );
1221+
1222+ check_again :
1223+ for (i = 0 ; i < nr_pages ; i ++ ) {
1224+ /*
1225+ * If we get a page from the CMA zone, since we are going to
1226+ * be pinning these entries, we might as well move them out
1227+ * of the CMA zone if possible.
1228+ */
1229+ if (is_migrate_cma_page (pages [i ])) {
1230+
1231+ struct page * head = compound_head (pages [i ]);
1232+
1233+ if (PageHuge (head )) {
1234+ isolate_huge_page (head , & cma_page_list );
1235+ } else {
1236+ if (!PageLRU (head ) && drain_allow ) {
1237+ lru_add_drain_all ();
1238+ drain_allow = false;
1239+ }
1240+
1241+ if (!isolate_lru_page (head )) {
1242+ list_add_tail (& head -> lru , & cma_page_list );
1243+ mod_node_page_state (page_pgdat (head ),
1244+ NR_ISOLATED_ANON +
1245+ page_is_file_cache (head ),
1246+ hpage_nr_pages (head ));
1247+ }
1248+ }
1249+ }
1250+ }
1251+
1252+ if (!list_empty (& cma_page_list )) {
1253+ /*
1254+ * drop the above get_user_pages reference.
1255+ */
1256+ for (i = 0 ; i < nr_pages ; i ++ )
1257+ put_page (pages [i ]);
1258+
1259+ if (migrate_pages (& cma_page_list , new_non_cma_page ,
1260+ NULL , 0 , MIGRATE_SYNC , MR_CONTIG_RANGE )) {
1261+ /*
1262+ * some of the pages failed migration. Do get_user_pages
1263+ * without migration.
1264+ */
1265+ migrate_allow = false;
1266+
1267+ if (!list_empty (& cma_page_list ))
1268+ putback_movable_pages (& cma_page_list );
1269+ }
1270+ /*
1271+ * We did migrate all the pages, Try to get the page references again
1272+ * migrating any new CMA pages which we failed to isolate earlier.
1273+ */
1274+ nr_pages = get_user_pages (start , nr_pages , gup_flags , pages , vmas );
1275+ if ((nr_pages > 0 ) && migrate_allow ) {
1276+ drain_allow = true;
1277+ goto check_again ;
1278+ }
1279+ }
1280+
1281+ return nr_pages ;
1282+ }
1283+ #else
1284+ static inline long check_and_migrate_cma_pages (unsigned long start , long nr_pages ,
1285+ unsigned int gup_flags ,
1286+ struct page * * pages ,
1287+ struct vm_area_struct * * vmas )
1288+ {
1289+ return nr_pages ;
1290+ }
1291+ #endif
1292+
11301293/*
11311294 * This is the same as get_user_pages() in that it assumes we are
11321295 * operating on the current task's mm, but it goes further to validate
@@ -1140,11 +1303,11 @@ EXPORT_SYMBOL(get_user_pages);
11401303 * Contrast this to iov_iter_get_pages() usages which are transient.
11411304 */
11421305long get_user_pages_longterm (unsigned long start , unsigned long nr_pages ,
1143- unsigned int gup_flags , struct page * * pages ,
1144- struct vm_area_struct * * vmas_arg )
1306+ unsigned int gup_flags , struct page * * pages ,
1307+ struct vm_area_struct * * vmas_arg )
11451308{
11461309 struct vm_area_struct * * vmas = vmas_arg ;
1147- struct vm_area_struct * vma_prev = NULL ;
1310+ unsigned long flags ;
11481311 long rc , i ;
11491312
11501313 if (!pages )
@@ -1157,31 +1320,20 @@ long get_user_pages_longterm(unsigned long start, unsigned long nr_pages,
11571320 return - ENOMEM ;
11581321 }
11591322
1323+ flags = memalloc_nocma_save ();
11601324 rc = get_user_pages (start , nr_pages , gup_flags , pages , vmas );
1325+ memalloc_nocma_restore (flags );
1326+ if (rc < 0 )
1327+ goto out ;
11611328
1162- for (i = 0 ; i < rc ; i ++ ) {
1163- struct vm_area_struct * vma = vmas [i ];
1164-
1165- if (vma == vma_prev )
1166- continue ;
1167-
1168- vma_prev = vma ;
1169-
1170- if (vma_is_fsdax (vma ))
1171- break ;
1172- }
1173-
1174- /*
1175- * Either get_user_pages() failed, or the vma validation
1176- * succeeded, in either case we don't need to put_page() before
1177- * returning.
1178- */
1179- if (i >= rc )
1329+ if (check_dax_vmas (vmas , rc )) {
1330+ for (i = 0 ; i < rc ; i ++ )
1331+ put_page (pages [i ]);
1332+ rc = - EOPNOTSUPP ;
11801333 goto out ;
1334+ }
11811335
1182- for (i = 0 ; i < rc ; i ++ )
1183- put_page (pages [i ]);
1184- rc = - EOPNOTSUPP ;
1336+ rc = check_and_migrate_cma_pages (start , rc , gup_flags , pages , vmas );
11851337out :
11861338 if (vmas != vmas_arg )
11871339 kfree (vmas );
0 commit comments