@@ -599,6 +599,10 @@ extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
599
599
extern pgtable_t pgtable_trans_huge_withdraw (struct mm_struct * mm , pmd_t * pmdp );
600
600
#endif
601
601
602
+ #ifndef arch_needs_pgtable_deposit
603
+ #define arch_needs_pgtable_deposit () (false)
604
+ #endif
605
+
602
606
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
603
607
/*
604
608
* This is an implementation of pmdp_establish() that is only suitable for an
@@ -1300,9 +1304,10 @@ static inline int pud_trans_huge(pud_t pud)
1300
1304
}
1301
1305
#endif
1302
1306
1303
- /* See pmd_none_or_trans_huge_or_clear_bad for discussion. */
1304
- static inline int pud_none_or_trans_huge_or_dev_or_clear_bad (pud_t * pud )
1307
+ static inline int pud_trans_unstable (pud_t * pud )
1305
1308
{
1309
+ #if defined(CONFIG_TRANSPARENT_HUGEPAGE ) && \
1310
+ defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD )
1306
1311
pud_t pudval = READ_ONCE (* pud );
1307
1312
1308
1313
if (pud_none (pudval ) || pud_trans_huge (pudval ) || pud_devmap (pudval ))
@@ -1311,104 +1316,10 @@ static inline int pud_none_or_trans_huge_or_dev_or_clear_bad(pud_t *pud)
1311
1316
pud_clear_bad (pud );
1312
1317
return 1 ;
1313
1318
}
1314
- return 0 ;
1315
- }
1316
-
1317
- /* See pmd_trans_unstable for discussion. */
1318
- static inline int pud_trans_unstable (pud_t * pud )
1319
- {
1320
- #if defined(CONFIG_TRANSPARENT_HUGEPAGE ) && \
1321
- defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD )
1322
- return pud_none_or_trans_huge_or_dev_or_clear_bad (pud );
1323
- #else
1324
- return 0 ;
1325
1319
#endif
1326
- }
1327
-
1328
- #ifndef arch_needs_pgtable_deposit
1329
- #define arch_needs_pgtable_deposit () (false)
1330
- #endif
1331
- /*
1332
- * This function is meant to be used by sites walking pagetables with
1333
- * the mmap_lock held in read mode to protect against MADV_DONTNEED and
1334
- * transhuge page faults. MADV_DONTNEED can convert a transhuge pmd
1335
- * into a null pmd and the transhuge page fault can convert a null pmd
1336
- * into an hugepmd or into a regular pmd (if the hugepage allocation
1337
- * fails). While holding the mmap_lock in read mode the pmd becomes
1338
- * stable and stops changing under us only if it's not null and not a
1339
- * transhuge pmd. When those races occurs and this function makes a
1340
- * difference vs the standard pmd_none_or_clear_bad, the result is
1341
- * undefined so behaving like if the pmd was none is safe (because it
1342
- * can return none anyway). The compiler level barrier() is critically
1343
- * important to compute the two checks atomically on the same pmdval.
1344
- *
1345
- * For 32bit kernels with a 64bit large pmd_t this automatically takes
1346
- * care of reading the pmd atomically to avoid SMP race conditions
1347
- * against pmd_populate() when the mmap_lock is hold for reading by the
1348
- * caller (a special atomic read not done by "gcc" as in the generic
1349
- * version above, is also needed when THP is disabled because the page
1350
- * fault can populate the pmd from under us).
1351
- */
1352
- static inline int pmd_none_or_trans_huge_or_clear_bad (pmd_t * pmd )
1353
- {
1354
- pmd_t pmdval = pmdp_get_lockless (pmd );
1355
- /*
1356
- * !pmd_present() checks for pmd migration entries
1357
- *
1358
- * The complete check uses is_pmd_migration_entry() in linux/swapops.h
1359
- * But using that requires moving current function and pmd_trans_unstable()
1360
- * to linux/swapops.h to resolve dependency, which is too much code move.
1361
- *
1362
- * !pmd_present() is equivalent to is_pmd_migration_entry() currently,
1363
- * because !pmd_present() pages can only be under migration not swapped
1364
- * out.
1365
- *
1366
- * pmd_none() is preserved for future condition checks on pmd migration
1367
- * entries and not confusing with this function name, although it is
1368
- * redundant with !pmd_present().
1369
- */
1370
- if (pmd_none (pmdval ) || pmd_trans_huge (pmdval ) ||
1371
- (IS_ENABLED (CONFIG_ARCH_ENABLE_THP_MIGRATION ) && !pmd_present (pmdval )))
1372
- return 1 ;
1373
- if (unlikely (pmd_bad (pmdval ))) {
1374
- pmd_clear_bad (pmd );
1375
- return 1 ;
1376
- }
1377
1320
return 0 ;
1378
1321
}
1379
1322
1380
- /*
1381
- * This is a noop if Transparent Hugepage Support is not built into
1382
- * the kernel. Otherwise it is equivalent to
1383
- * pmd_none_or_trans_huge_or_clear_bad(), and shall only be called in
1384
- * places that already verified the pmd is not none and they want to
1385
- * walk ptes while holding the mmap sem in read mode (write mode don't
1386
- * need this). If THP is not enabled, the pmd can't go away under the
1387
- * code even if MADV_DONTNEED runs, but if THP is enabled we need to
1388
- * run a pmd_trans_unstable before walking the ptes after
1389
- * split_huge_pmd returns (because it may have run when the pmd become
1390
- * null, but then a page fault can map in a THP and not a regular page).
1391
- */
1392
- static inline int pmd_trans_unstable (pmd_t * pmd )
1393
- {
1394
- #ifdef CONFIG_TRANSPARENT_HUGEPAGE
1395
- return pmd_none_or_trans_huge_or_clear_bad (pmd );
1396
- #else
1397
- return 0 ;
1398
- #endif
1399
- }
1400
-
1401
- /*
1402
- * the ordering of these checks is important for pmds with _page_devmap set.
1403
- * if we check pmd_trans_unstable() first we will trip the bad_pmd() check
1404
- * inside of pmd_none_or_trans_huge_or_clear_bad(). this will end up correctly
1405
- * returning 1 but not before it spams dmesg with the pmd_clear_bad() output.
1406
- */
1407
- static inline int pmd_devmap_trans_unstable (pmd_t * pmd )
1408
- {
1409
- return pmd_devmap (* pmd ) || pmd_trans_unstable (pmd );
1410
- }
1411
-
1412
1323
#ifndef CONFIG_NUMA_BALANCING
1413
1324
/*
1414
1325
* Technically a PTE can be PROTNONE even when not doing NUMA balancing but
0 commit comments