Skip to content

Commit e95a985

Browse files
mjkravetzakpm00
authored andcommitted
hugetlb: skip to end of PT page mapping when pte not present
Patch series "hugetlb: speed up linear address scanning", v2. At unmap, fork and remap time hugetlb address ranges are linearly scanned. We can optimize these scans if the ranges are sparsely populated. Also, enable page table "Lazy copy" for hugetlb at fork. NOTE: Architectures not defining CONFIG_ARCH_WANT_GENERAL_HUGETLB need to add an arch specific version hugetlb_mask_last_page() to take advantage of sparse address scanning improvements. Baolin Wang added the routine for arm64. Other architectures which could be optimized are: ia64, mips, parisc, powerpc, s390, sh and sparc. This patch (of 4): HugeTLB address ranges are linearly scanned during fork, unmap and remap operations. If a non-present entry is encountered, the code currently continues to the next huge page aligned address. However, a non-present entry implies that the page table page for that entry is not present. Therefore, the linear scan can skip to the end of range mapped by the page table page. This can speed operations on large sparsely populated hugetlb mappings. Create a new routine hugetlb_mask_last_page() that will return an address mask. When the mask is ORed with an address, the result will be the address of the last huge page mapped by the associated page table page. Use this mask to update addresses in routines which linearly scan hugetlb address ranges when a non-present pte is encountered. hugetlb_mask_last_page is related to the implementation of huge_pte_offset as hugetlb_mask_last_page is called when huge_pte_offset returns NULL. This patch only provides a complete hugetlb_mask_last_page implementation when CONFIG_ARCH_WANT_GENERAL_HUGETLB is defined. Architectures which provide their own versions of huge_pte_offset can also provide their own version of hugetlb_mask_last_page. Link: https://lkml.kernel.org/r/[email protected] Link: https://lkml.kernel.org/r/[email protected] Signed-off-by: Mike Kravetz <[email protected]> Tested-by: Baolin Wang <[email protected]> Reviewed-by: Baolin Wang <[email protected]> Acked-by: Muchun Song <[email protected]> Reported-by: kernel test robot <[email protected]> Cc: Michal Hocko <[email protected]> Cc: Peter Xu <[email protected]> Cc: Naoya Horiguchi <[email protected]> Cc: James Houghton <[email protected]> Cc: Mina Almasry <[email protected]> Cc: "Aneesh Kumar K.V" <[email protected]> Cc: Anshuman Khandual <[email protected]> Cc: Paul Walmsley <[email protected]> Cc: Christian Borntraeger <[email protected]> Cc: Catalin Marinas <[email protected]> Cc: Will Deacon <[email protected]> Cc: Rolf Eike Beer <[email protected]> Cc: David Hildenbrand <[email protected]> Signed-off-by: Andrew Morton <[email protected]>
1 parent 3de0de7 commit e95a985

File tree

2 files changed

+52
-5
lines changed

2 files changed

+52
-5
lines changed

include/linux/hugetlb.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
194194
unsigned long addr, unsigned long sz);
195195
pte_t *huge_pte_offset(struct mm_struct *mm,
196196
unsigned long addr, unsigned long sz);
197+
unsigned long hugetlb_mask_last_page(struct hstate *h);
197198
int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
198199
unsigned long *addr, pte_t *ptep);
199200
void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,

mm/hugetlb.c

Lines changed: 51 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4727,6 +4727,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
47274727
unsigned long npages = pages_per_huge_page(h);
47284728
struct address_space *mapping = src_vma->vm_file->f_mapping;
47294729
struct mmu_notifier_range range;
4730+
unsigned long last_addr_mask;
47304731
int ret = 0;
47314732

47324733
if (cow) {
@@ -4746,11 +4747,14 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
47464747
i_mmap_lock_read(mapping);
47474748
}
47484749

4750+
last_addr_mask = hugetlb_mask_last_page(h);
47494751
for (addr = src_vma->vm_start; addr < src_vma->vm_end; addr += sz) {
47504752
spinlock_t *src_ptl, *dst_ptl;
47514753
src_pte = huge_pte_offset(src, addr, sz);
4752-
if (!src_pte)
4754+
if (!src_pte) {
4755+
addr |= last_addr_mask;
47534756
continue;
4757+
}
47544758
dst_pte = huge_pte_alloc(dst, dst_vma, addr, sz);
47554759
if (!dst_pte) {
47564760
ret = -ENOMEM;
@@ -4767,8 +4771,10 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
47674771
* after taking the lock below.
47684772
*/
47694773
dst_entry = huge_ptep_get(dst_pte);
4770-
if ((dst_pte == src_pte) || !huge_pte_none(dst_entry))
4774+
if ((dst_pte == src_pte) || !huge_pte_none(dst_entry)) {
4775+
addr |= last_addr_mask;
47714776
continue;
4777+
}
47724778

47734779
dst_ptl = huge_pte_lock(h, dst, dst_pte);
47744780
src_ptl = huge_pte_lockptr(h, src, src_pte);
@@ -4928,6 +4934,7 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma,
49284934
unsigned long sz = huge_page_size(h);
49294935
struct mm_struct *mm = vma->vm_mm;
49304936
unsigned long old_end = old_addr + len;
4937+
unsigned long last_addr_mask;
49314938
unsigned long old_addr_copy;
49324939
pte_t *src_pte, *dst_pte;
49334940
struct mmu_notifier_range range;
@@ -4943,12 +4950,16 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma,
49434950
flush_cache_range(vma, range.start, range.end);
49444951

49454952
mmu_notifier_invalidate_range_start(&range);
4953+
last_addr_mask = hugetlb_mask_last_page(h);
49464954
/* Prevent race with file truncation */
49474955
i_mmap_lock_write(mapping);
49484956
for (; old_addr < old_end; old_addr += sz, new_addr += sz) {
49494957
src_pte = huge_pte_offset(mm, old_addr, sz);
4950-
if (!src_pte)
4958+
if (!src_pte) {
4959+
old_addr |= last_addr_mask;
4960+
new_addr |= last_addr_mask;
49514961
continue;
4962+
}
49524963
if (huge_pte_none(huge_ptep_get(src_pte)))
49534964
continue;
49544965

@@ -4993,6 +5004,7 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
49935004
struct hstate *h = hstate_vma(vma);
49945005
unsigned long sz = huge_page_size(h);
49955006
struct mmu_notifier_range range;
5007+
unsigned long last_addr_mask;
49965008
bool force_flush = false;
49975009

49985010
WARN_ON(!is_vm_hugetlb_page(vma));
@@ -5013,11 +5025,14 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
50135025
end);
50145026
adjust_range_if_pmd_sharing_possible(vma, &range.start, &range.end);
50155027
mmu_notifier_invalidate_range_start(&range);
5028+
last_addr_mask = hugetlb_mask_last_page(h);
50165029
address = start;
50175030
for (; address < end; address += sz) {
50185031
ptep = huge_pte_offset(mm, address, sz);
5019-
if (!ptep)
5032+
if (!ptep) {
5033+
address |= last_addr_mask;
50205034
continue;
5035+
}
50215036

50225037
ptl = huge_pte_lock(h, mm, ptep);
50235038
if (huge_pmd_unshare(mm, vma, &address, ptep)) {
@@ -6285,6 +6300,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
62856300
unsigned long pages = 0, psize = huge_page_size(h);
62866301
bool shared_pmd = false;
62876302
struct mmu_notifier_range range;
6303+
unsigned long last_addr_mask;
62886304
bool uffd_wp = cp_flags & MM_CP_UFFD_WP;
62896305
bool uffd_wp_resolve = cp_flags & MM_CP_UFFD_WP_RESOLVE;
62906306

@@ -6301,12 +6317,15 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
63016317
flush_cache_range(vma, range.start, range.end);
63026318

63036319
mmu_notifier_invalidate_range_start(&range);
6320+
last_addr_mask = hugetlb_mask_last_page(h);
63046321
i_mmap_lock_write(vma->vm_file->f_mapping);
63056322
for (; address < end; address += psize) {
63066323
spinlock_t *ptl;
63076324
ptep = huge_pte_offset(mm, address, psize);
6308-
if (!ptep)
6325+
if (!ptep) {
6326+
address |= last_addr_mask;
63096327
continue;
6328+
}
63106329
ptl = huge_pte_lock(h, mm, ptep);
63116330
if (huge_pmd_unshare(mm, vma, &address, ptep)) {
63126331
/*
@@ -6856,6 +6875,33 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
68566875
return (pte_t *)pmd;
68576876
}
68586877

6878+
/*
6879+
* Return a mask that can be used to update an address to the last huge
6880+
* page in a page table page mapping size. Used to skip non-present
6881+
* page table entries when linearly scanning address ranges. Architectures
6882+
* with unique huge page to page table relationships can define their own
6883+
* version of this routine.
6884+
*/
6885+
unsigned long hugetlb_mask_last_page(struct hstate *h)
6886+
{
6887+
unsigned long hp_size = huge_page_size(h);
6888+
6889+
if (hp_size == PUD_SIZE)
6890+
return P4D_SIZE - PUD_SIZE;
6891+
else if (hp_size == PMD_SIZE)
6892+
return PUD_SIZE - PMD_SIZE;
6893+
else
6894+
return 0UL;
6895+
}
6896+
6897+
#else
6898+
6899+
/* See description above. Architectures can provide their own version. */
6900+
__weak unsigned long hugetlb_mask_last_page(struct hstate *h)
6901+
{
6902+
return 0UL;
6903+
}
6904+
68596905
#endif /* CONFIG_ARCH_WANT_GENERAL_HUGETLB */
68606906

68616907
/*

0 commit comments

Comments
 (0)