Skip to content

Commit 8d40091

Browse files
osalvadorvilardagatorvalds
authored andcommitted
x86/vmemmap: handle unpopulated sub-pmd ranges
When sizeof(struct page) is not a power of 2, sections do not span a PMD anymore and so when populating them some parts of the PMD will remain unused. Because of this, PMDs will be left behind when depopulating sections since remove_pmd_table() thinks that those unused parts are still in use. Fix this by marking the unused parts with PAGE_UNUSED, so memchr_inv() will do the right thing and will let us free the PMD when the last user of it is gone. This patch is based on a similar patch by David Hildenbrand: https://lore.kernel.org/linux-mm/[email protected]/ [[email protected]: go back to the ifdef version] Link: https://lkml.kernel.org/r/[email protected] Link: https://lkml.kernel.org/r/[email protected] Signed-off-by: Oscar Salvador <[email protected]> Reviewed-by: David Hildenbrand <[email protected]> Acked-by: Dave Hansen <[email protected]> Cc: Andy Lutomirski <[email protected]> Cc: Borislav Petkov <[email protected]> Cc: "H . Peter Anvin" <[email protected]> Cc: Ingo Molnar <[email protected]> Cc: Michal Hocko <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Thomas Gleixner <[email protected]> Cc: Zi Yan <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 69ccfe7 commit 8d40091

File tree

1 file changed

+55
-13
lines changed

1 file changed

+55
-13
lines changed

arch/x86/mm/init_64.c

Lines changed: 55 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -826,6 +826,51 @@ void __init paging_init(void)
826826
zone_sizes_init();
827827
}
828828

829+
#ifdef CONFIG_SPARSEMEM_VMEMMAP
830+
#define PAGE_UNUSED 0xFD
831+
832+
/* Returns true if the PMD is completely unused and thus it can be freed */
833+
static bool __meminit vmemmap_pmd_is_unused(unsigned long addr, unsigned long end)
834+
{
835+
unsigned long start = ALIGN_DOWN(addr, PMD_SIZE);
836+
837+
memset((void *)addr, PAGE_UNUSED, end - addr);
838+
839+
return !memchr_inv((void *)start, PAGE_UNUSED, PMD_SIZE);
840+
}
841+
842+
static void __meminit vmemmap_use_sub_pmd(unsigned long start)
843+
{
844+
/*
845+
* As we expect to add in the same granularity as we remove, it's
846+
* sufficient to mark only some piece used to block the memmap page from
847+
* getting removed when removing some other adjacent memmap (just in
848+
* case the first memmap never gets initialized e.g., because the memory
849+
* block never gets onlined).
850+
*/
851+
memset((void *)start, 0, sizeof(struct page));
852+
}
853+
854+
static void __meminit vmemmap_use_new_sub_pmd(unsigned long start, unsigned long end)
855+
{
856+
/*
857+
* Could be our memmap page is filled with PAGE_UNUSED already from a
858+
* previous remove. Make sure to reset it.
859+
*/
860+
vmemmap_use_sub_pmd(start);
861+
862+
/*
863+
* Mark with PAGE_UNUSED the unused parts of the new memmap range
864+
*/
865+
if (!IS_ALIGNED(start, PMD_SIZE))
866+
memset((void *)start, PAGE_UNUSED,
867+
start - ALIGN_DOWN(start, PMD_SIZE));
868+
if (!IS_ALIGNED(end, PMD_SIZE))
869+
memset((void *)end, PAGE_UNUSED,
870+
ALIGN(end, PMD_SIZE) - end);
871+
}
872+
#endif
873+
829874
/*
830875
* Memory hotplug specific functions
831876
*/
@@ -871,8 +916,6 @@ int arch_add_memory(int nid, u64 start, u64 size,
871916
return add_pages(nid, start_pfn, nr_pages, params);
872917
}
873918

874-
#define PAGE_INUSE 0xFD
875-
876919
static void __meminit free_pagetable(struct page *page, int order)
877920
{
878921
unsigned long magic;
@@ -1006,7 +1049,6 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end,
10061049
unsigned long next, pages = 0;
10071050
pte_t *pte_base;
10081051
pmd_t *pmd;
1009-
void *page_addr;
10101052

10111053
pmd = pmd_start + pmd_index(addr);
10121054
for (; addr < end; addr = next, pmd++) {
@@ -1026,22 +1068,16 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end,
10261068
pmd_clear(pmd);
10271069
spin_unlock(&init_mm.page_table_lock);
10281070
pages++;
1029-
} else {
1030-
/* If here, we are freeing vmemmap pages. */
1031-
memset((void *)addr, PAGE_INUSE, next - addr);
1032-
1033-
page_addr = page_address(pmd_page(*pmd));
1034-
if (!memchr_inv(page_addr, PAGE_INUSE,
1035-
PMD_SIZE)) {
1071+
}
1072+
#ifdef CONFIG_SPARSEMEM_VMEMMAP
1073+
else if (vmemmap_pmd_is_unused(addr, next)) {
10361074
free_hugepage_table(pmd_page(*pmd),
10371075
altmap);
1038-
10391076
spin_lock(&init_mm.page_table_lock);
10401077
pmd_clear(pmd);
10411078
spin_unlock(&init_mm.page_table_lock);
1042-
}
10431079
}
1044-
1080+
#endif
10451081
continue;
10461082
}
10471083

@@ -1492,11 +1528,17 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start,
14921528

14931529
addr_end = addr + PMD_SIZE;
14941530
p_end = p + PMD_SIZE;
1531+
1532+
if (!IS_ALIGNED(addr, PMD_SIZE) ||
1533+
!IS_ALIGNED(next, PMD_SIZE))
1534+
vmemmap_use_new_sub_pmd(addr, next);
1535+
14951536
continue;
14961537
} else if (altmap)
14971538
return -ENOMEM; /* no fallback */
14981539
} else if (pmd_large(*pmd)) {
14991540
vmemmap_verify((pte_t *)pmd, node, addr, next);
1541+
vmemmap_use_sub_pmd(addr);
15001542
continue;
15011543
}
15021544
if (vmemmap_populate_basepages(addr, next, node, NULL))

0 commit comments

Comments
 (0)