Skip to content

Commit fd919a8

Browse files
hnazakpm00
authored andcommitted
mm: page_isolation: prepare for hygienic freelists
Page isolation currently sets MIGRATE_ISOLATE on a block, then drops zone->lock and scans the block for straddling buddies to split up. Because this happens non-atomically wrt the page allocator, it's possible for allocations to get a buddy whose first block is a regular pcp migratetype but whose tail is isolated. This means that in certain cases memory can still be allocated after isolation. It will also trigger the freelist type hygiene warnings in subsequent patches. start_isolate_page_range() isolate_single_pageblock() set_migratetype_isolate(tail) lock zone->lock move_freepages_block(tail) // nop set_pageblock_migratetype(tail) unlock zone->lock __rmqueue_smallest() del_page_from_freelist(head) expand(head, head_mt) WARN(head_mt != tail_mt) start_pfn = ALIGN_DOWN(MAX_ORDER_NR_PAGES) for (pfn = start_pfn, pfn < end_pfn) if (PageBuddy()) split_free_page(head) Introduce a variant of move_freepages_block() provided by the allocator specifically for page isolation; it moves free pages, converts the block, and handles the splitting of straddling buddies while holding zone->lock. The allocator knows that pageblocks and buddies are always naturally aligned, which means that buddies can only straddle blocks if they're actually >pageblock_order. This means the search-and-split part can be simplified compared to what page isolation used to do. Also tighten up the page isolation code around the expectations of which pages can be large, and how they are freed. Based on extensive discussions with and invaluable input from Zi Yan. [[email protected]: work around older gcc warning] Link: https://lkml.kernel.org/r/[email protected] Link: https://lkml.kernel.org/r/[email protected] Signed-off-by: Johannes Weiner <[email protected]> Reviewed-by: Vlastimil Babka <[email protected]> Tested-by: Baolin Wang <[email protected]> Cc: David Hildenbrand <[email protected]> Cc: "Huang, Ying" <[email protected]> Cc: Mel Gorman <[email protected]> Cc: Zi Yan <[email protected]> Signed-off-by: Andrew Morton <[email protected]>
1 parent f37c0f6 commit fd919a8

File tree

4 files changed

+155
-163
lines changed

4 files changed

+155
-163
lines changed

include/linux/page-isolation.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,9 @@ static inline bool is_migrate_isolate(int migratetype)
3434
#define REPORT_FAILURE 0x2
3535

3636
void set_pageblock_migratetype(struct page *page, int migratetype);
37-
int move_freepages_block(struct zone *zone, struct page *page, int migratetype);
37+
38+
bool move_freepages_block_isolate(struct zone *zone, struct page *page,
39+
int migratetype);
3840

3941
int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
4042
int migratetype, int flags, gfp_t gfp_flags);

mm/internal.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -562,10 +562,6 @@ extern void *memmap_alloc(phys_addr_t size, phys_addr_t align,
562562
void memmap_init_range(unsigned long, int, unsigned long, unsigned long,
563563
unsigned long, enum meminit_context, struct vmem_altmap *, int);
564564

565-
566-
int split_free_page(struct page *free_page,
567-
unsigned int order, unsigned long split_pfn_offset);
568-
569565
#if defined CONFIG_COMPACTION || defined CONFIG_CMA
570566

571567
/*

mm/page_alloc.c

Lines changed: 120 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -833,64 +833,6 @@ static inline void __free_one_page(struct page *page,
833833
page_reporting_notify_free(order);
834834
}
835835

836-
/**
837-
* split_free_page() -- split a free page at split_pfn_offset
838-
* @free_page: the original free page
839-
* @order: the order of the page
840-
* @split_pfn_offset: split offset within the page
841-
*
842-
* Return -ENOENT if the free page is changed, otherwise 0
843-
*
844-
* It is used when the free page crosses two pageblocks with different migratetypes
845-
* at split_pfn_offset within the page. The split free page will be put into
846-
* separate migratetype lists afterwards. Otherwise, the function achieves
847-
* nothing.
848-
*/
849-
int split_free_page(struct page *free_page,
850-
unsigned int order, unsigned long split_pfn_offset)
851-
{
852-
struct zone *zone = page_zone(free_page);
853-
unsigned long free_page_pfn = page_to_pfn(free_page);
854-
unsigned long pfn;
855-
unsigned long flags;
856-
int free_page_order;
857-
int mt;
858-
int ret = 0;
859-
860-
if (split_pfn_offset == 0)
861-
return ret;
862-
863-
spin_lock_irqsave(&zone->lock, flags);
864-
865-
if (!PageBuddy(free_page) || buddy_order(free_page) != order) {
866-
ret = -ENOENT;
867-
goto out;
868-
}
869-
870-
mt = get_pfnblock_migratetype(free_page, free_page_pfn);
871-
if (likely(!is_migrate_isolate(mt)))
872-
__mod_zone_freepage_state(zone, -(1UL << order), mt);
873-
874-
del_page_from_free_list(free_page, zone, order);
875-
for (pfn = free_page_pfn;
876-
pfn < free_page_pfn + (1UL << order);) {
877-
int mt = get_pfnblock_migratetype(pfn_to_page(pfn), pfn);
878-
879-
free_page_order = min_t(unsigned int,
880-
pfn ? __ffs(pfn) : order,
881-
__fls(split_pfn_offset));
882-
__free_one_page(pfn_to_page(pfn), pfn, zone, free_page_order,
883-
mt, FPI_NONE);
884-
pfn += 1UL << free_page_order;
885-
split_pfn_offset -= (1UL << free_page_order);
886-
/* we have done the first part, now switch to second part */
887-
if (split_pfn_offset == 0)
888-
split_pfn_offset = (1UL << order) - (pfn - free_page_pfn);
889-
}
890-
out:
891-
spin_unlock_irqrestore(&zone->lock, flags);
892-
return ret;
893-
}
894836
/*
895837
* A bad page could be due to a number of fields. Instead of multiple branches,
896838
* try and check multiple fields with one check. The caller must do a detailed
@@ -1674,8 +1616,8 @@ static bool prep_move_freepages_block(struct zone *zone, struct page *page,
16741616
return true;
16751617
}
16761618

1677-
int move_freepages_block(struct zone *zone, struct page *page,
1678-
int migratetype)
1619+
static int move_freepages_block(struct zone *zone, struct page *page,
1620+
int migratetype)
16791621
{
16801622
unsigned long start_pfn, end_pfn;
16811623

@@ -1686,6 +1628,123 @@ int move_freepages_block(struct zone *zone, struct page *page,
16861628
return move_freepages(zone, start_pfn, end_pfn, migratetype);
16871629
}
16881630

1631+
#ifdef CONFIG_MEMORY_ISOLATION
1632+
/* Look for a buddy that straddles start_pfn */
1633+
static unsigned long find_large_buddy(unsigned long start_pfn)
1634+
{
1635+
int order = 0;
1636+
struct page *page;
1637+
unsigned long pfn = start_pfn;
1638+
1639+
while (!PageBuddy(page = pfn_to_page(pfn))) {
1640+
/* Nothing found */
1641+
if (++order > MAX_PAGE_ORDER)
1642+
return start_pfn;
1643+
pfn &= ~0UL << order;
1644+
}
1645+
1646+
/*
1647+
* Found a preceding buddy, but does it straddle?
1648+
*/
1649+
if (pfn + (1 << buddy_order(page)) > start_pfn)
1650+
return pfn;
1651+
1652+
/* Nothing found */
1653+
return start_pfn;
1654+
}
1655+
1656+
/* Split a multi-block free page into its individual pageblocks */
1657+
static void split_large_buddy(struct zone *zone, struct page *page,
1658+
unsigned long pfn, int order)
1659+
{
1660+
unsigned long end_pfn = pfn + (1 << order);
1661+
1662+
VM_WARN_ON_ONCE(order <= pageblock_order);
1663+
VM_WARN_ON_ONCE(pfn & (pageblock_nr_pages - 1));
1664+
1665+
/* Caller removed page from freelist, buddy info cleared! */
1666+
VM_WARN_ON_ONCE(PageBuddy(page));
1667+
1668+
while (pfn != end_pfn) {
1669+
int mt = get_pfnblock_migratetype(page, pfn);
1670+
1671+
__free_one_page(page, pfn, zone, pageblock_order, mt, FPI_NONE);
1672+
pfn += pageblock_nr_pages;
1673+
page = pfn_to_page(pfn);
1674+
}
1675+
}
1676+
1677+
/**
1678+
* move_freepages_block_isolate - move free pages in block for page isolation
1679+
* @zone: the zone
1680+
* @page: the pageblock page
1681+
* @migratetype: migratetype to set on the pageblock
1682+
*
1683+
* This is similar to move_freepages_block(), but handles the special
1684+
* case encountered in page isolation, where the block of interest
1685+
* might be part of a larger buddy spanning multiple pageblocks.
1686+
*
1687+
* Unlike the regular page allocator path, which moves pages while
1688+
* stealing buddies off the freelist, page isolation is interested in
1689+
* arbitrary pfn ranges that may have overlapping buddies on both ends.
1690+
*
1691+
* This function handles that. Straddling buddies are split into
1692+
* individual pageblocks. Only the block of interest is moved.
1693+
*
1694+
* Returns %true if pages could be moved, %false otherwise.
1695+
*/
1696+
bool move_freepages_block_isolate(struct zone *zone, struct page *page,
1697+
int migratetype)
1698+
{
1699+
unsigned long start_pfn, end_pfn, pfn;
1700+
int nr_moved, mt;
1701+
1702+
if (!prep_move_freepages_block(zone, page, &start_pfn, &end_pfn,
1703+
NULL, NULL))
1704+
return false;
1705+
1706+
/* No splits needed if buddies can't span multiple blocks */
1707+
if (pageblock_order == MAX_PAGE_ORDER)
1708+
goto move;
1709+
1710+
/* We're a tail block in a larger buddy */
1711+
pfn = find_large_buddy(start_pfn);
1712+
if (pfn != start_pfn) {
1713+
struct page *buddy = pfn_to_page(pfn);
1714+
int order = buddy_order(buddy);
1715+
int mt = get_pfnblock_migratetype(buddy, pfn);
1716+
1717+
if (!is_migrate_isolate(mt))
1718+
__mod_zone_freepage_state(zone, -(1UL << order), mt);
1719+
del_page_from_free_list(buddy, zone, order);
1720+
set_pageblock_migratetype(page, migratetype);
1721+
split_large_buddy(zone, buddy, pfn, order);
1722+
return true;
1723+
}
1724+
1725+
/* We're the starting block of a larger buddy */
1726+
if (PageBuddy(page) && buddy_order(page) > pageblock_order) {
1727+
int mt = get_pfnblock_migratetype(page, pfn);
1728+
int order = buddy_order(page);
1729+
1730+
if (!is_migrate_isolate(mt))
1731+
__mod_zone_freepage_state(zone, -(1UL << order), mt);
1732+
del_page_from_free_list(page, zone, order);
1733+
set_pageblock_migratetype(page, migratetype);
1734+
split_large_buddy(zone, page, pfn, order);
1735+
return true;
1736+
}
1737+
move:
1738+
mt = get_pfnblock_migratetype(page, start_pfn);
1739+
nr_moved = move_freepages(zone, start_pfn, end_pfn, migratetype);
1740+
if (!is_migrate_isolate(mt))
1741+
__mod_zone_freepage_state(zone, -nr_moved, mt);
1742+
else if (!is_migrate_isolate(migratetype))
1743+
__mod_zone_freepage_state(zone, nr_moved, migratetype);
1744+
return true;
1745+
}
1746+
#endif /* CONFIG_MEMORY_ISOLATION */
1747+
16891748
static void change_pageblock_range(struct page *pageblock_page,
16901749
int start_order, int migratetype)
16911750
{
@@ -6365,7 +6424,6 @@ int alloc_contig_range_noprof(unsigned long start, unsigned long end,
63656424
unsigned migratetype, gfp_t gfp_mask)
63666425
{
63676426
unsigned long outer_start, outer_end;
6368-
int order;
63696427
int ret = 0;
63706428

63716429
struct compact_control cc = {
@@ -6438,29 +6496,7 @@ int alloc_contig_range_noprof(unsigned long start, unsigned long end,
64386496
* We don't have to hold zone->lock here because the pages are
64396497
* isolated thus they won't get removed from buddy.
64406498
*/
6441-
6442-
order = 0;
6443-
outer_start = start;
6444-
while (!PageBuddy(pfn_to_page(outer_start))) {
6445-
if (++order > MAX_PAGE_ORDER) {
6446-
outer_start = start;
6447-
break;
6448-
}
6449-
outer_start &= ~0UL << order;
6450-
}
6451-
6452-
if (outer_start != start) {
6453-
order = buddy_order(pfn_to_page(outer_start));
6454-
6455-
/*
6456-
* outer_start page could be small order buddy page and
6457-
* it doesn't include start page. Adjust outer_start
6458-
* in this case to report failed page properly
6459-
* on tracepoint in test_pages_isolated()
6460-
*/
6461-
if (outer_start + (1UL << order) <= start)
6462-
outer_start = start;
6463-
}
6499+
outer_start = find_large_buddy(start);
64646500

64656501
/* Make sure the range is really isolated. */
64666502
if (test_pages_isolated(outer_start, end, 0)) {

0 commit comments

Comments
 (0)