Skip to content

Commit 5e1f0f0

Browse files
gormanmtorvalds
authored andcommitted
mm, compaction: capture a page under direct compaction
Compaction is inherently race-prone as a suitable page freed during compaction can be allocated by any parallel task. This patch uses a capture_control structure to isolate a page immediately when it is freed by a direct compactor in the slow path of the page allocator. The intent is to avoid redundant scanning. 5.0.0-rc1 5.0.0-rc1 selective-v3r17 capture-v3r19 Amean fault-both-1 0.00 ( 0.00%) 0.00 * 0.00%* Amean fault-both-3 2582.11 ( 0.00%) 2563.68 ( 0.71%) Amean fault-both-5 4500.26 ( 0.00%) 4233.52 ( 5.93%) Amean fault-both-7 5819.53 ( 0.00%) 6333.65 ( -8.83%) Amean fault-both-12 9321.18 ( 0.00%) 9759.38 ( -4.70%) Amean fault-both-18 9782.76 ( 0.00%) 10338.76 ( -5.68%) Amean fault-both-24 15272.81 ( 0.00%) 13379.55 * 12.40%* Amean fault-both-30 15121.34 ( 0.00%) 16158.25 ( -6.86%) Amean fault-both-32 18466.67 ( 0.00%) 18971.21 ( -2.73%) Latency is only moderately affected but the devil is in the details. A closer examination indicates that base page fault latency is reduced but latency of huge pages is increased as it takes creater care to succeed. Part of the "problem" is that allocation success rates are close to 100% even when under pressure and compaction gets harder 5.0.0-rc1 5.0.0-rc1 selective-v3r17 capture-v3r19 Percentage huge-3 96.70 ( 0.00%) 98.23 ( 1.58%) Percentage huge-5 96.99 ( 0.00%) 95.30 ( -1.75%) Percentage huge-7 94.19 ( 0.00%) 97.24 ( 3.24%) Percentage huge-12 94.95 ( 0.00%) 97.35 ( 2.53%) Percentage huge-18 96.74 ( 0.00%) 97.30 ( 0.58%) Percentage huge-24 97.07 ( 0.00%) 97.55 ( 0.50%) Percentage huge-30 95.69 ( 0.00%) 98.50 ( 2.95%) Percentage huge-32 96.70 ( 0.00%) 99.27 ( 2.65%) And scan rates are reduced as expected by 6% for the migration scanner and 29% for the free scanner indicating that there is less redundant work. Compaction migrate scanned 20815362 19573286 Compaction free scanned 16352612 11510663 [[email protected]: remove redundant check] Link: http://lkml.kernel.org/r/[email protected] Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Mel Gorman <[email protected]> Acked-by: Vlastimil Babka <[email protected]> Cc: Andrea Arcangeli <[email protected]> Cc: Dan Carpenter <[email protected]> Cc: David Rientjes <[email protected]> Cc: YueHaibing <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent e332f74 commit 5e1f0f0

File tree

6 files changed

+111
-12
lines changed

6 files changed

+111
-12
lines changed

include/linux/compaction.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,8 @@ extern int sysctl_compact_unevictable_allowed;
9393
extern int fragmentation_index(struct zone *zone, unsigned int order);
9494
extern enum compact_result try_to_compact_pages(gfp_t gfp_mask,
9595
unsigned int order, unsigned int alloc_flags,
96-
const struct alloc_context *ac, enum compact_priority prio);
96+
const struct alloc_context *ac, enum compact_priority prio,
97+
struct page **page);
9798
extern void reset_isolation_suitable(pg_data_t *pgdat);
9899
extern enum compact_result compaction_suitable(struct zone *zone, int order,
99100
unsigned int alloc_flags, int classzone_idx);

include/linux/sched.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ struct pid_namespace;
4747
struct pipe_inode_info;
4848
struct rcu_node;
4949
struct reclaim_state;
50+
struct capture_control;
5051
struct robust_list_head;
5152
struct sched_attr;
5253
struct sched_param;
@@ -958,6 +959,9 @@ struct task_struct {
958959

959960
struct io_context *io_context;
960961

962+
#ifdef CONFIG_COMPACTION
963+
struct capture_control *capture_control;
964+
#endif
961965
/* Ptrace state: */
962966
unsigned long ptrace_message;
963967
kernel_siginfo_t *last_siginfo;

kernel/sched/core.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2190,6 +2190,9 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
21902190
INIT_HLIST_HEAD(&p->preempt_notifiers);
21912191
#endif
21922192

2193+
#ifdef CONFIG_COMPACTION
2194+
p->capture_control = NULL;
2195+
#endif
21932196
init_numa_balancing(clone_flags, p);
21942197
}
21952198

mm/compaction.c

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2056,7 +2056,8 @@ bool compaction_zonelist_suitable(struct alloc_context *ac, int order,
20562056
return false;
20572057
}
20582058

2059-
static enum compact_result compact_zone(struct compact_control *cc)
2059+
static enum compact_result
2060+
compact_zone(struct compact_control *cc, struct capture_control *capc)
20602061
{
20612062
enum compact_result ret;
20622063
unsigned long start_pfn = cc->zone->zone_start_pfn;
@@ -2225,6 +2226,11 @@ static enum compact_result compact_zone(struct compact_control *cc)
22252226
}
22262227
}
22272228

2229+
/* Stop if a page has been captured */
2230+
if (capc && capc->page) {
2231+
ret = COMPACT_SUCCESS;
2232+
break;
2233+
}
22282234
}
22292235

22302236
out:
@@ -2258,7 +2264,8 @@ static enum compact_result compact_zone(struct compact_control *cc)
22582264

22592265
static enum compact_result compact_zone_order(struct zone *zone, int order,
22602266
gfp_t gfp_mask, enum compact_priority prio,
2261-
unsigned int alloc_flags, int classzone_idx)
2267+
unsigned int alloc_flags, int classzone_idx,
2268+
struct page **capture)
22622269
{
22632270
enum compact_result ret;
22642271
struct compact_control cc = {
@@ -2279,14 +2286,24 @@ static enum compact_result compact_zone_order(struct zone *zone, int order,
22792286
.ignore_skip_hint = (prio == MIN_COMPACT_PRIORITY),
22802287
.ignore_block_suitable = (prio == MIN_COMPACT_PRIORITY)
22812288
};
2289+
struct capture_control capc = {
2290+
.cc = &cc,
2291+
.page = NULL,
2292+
};
2293+
2294+
if (capture)
2295+
current->capture_control = &capc;
22822296
INIT_LIST_HEAD(&cc.freepages);
22832297
INIT_LIST_HEAD(&cc.migratepages);
22842298

2285-
ret = compact_zone(&cc);
2299+
ret = compact_zone(&cc, &capc);
22862300

22872301
VM_BUG_ON(!list_empty(&cc.freepages));
22882302
VM_BUG_ON(!list_empty(&cc.migratepages));
22892303

2304+
*capture = capc.page;
2305+
current->capture_control = NULL;
2306+
22902307
return ret;
22912308
}
22922309

@@ -2304,7 +2321,7 @@ int sysctl_extfrag_threshold = 500;
23042321
*/
23052322
enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order,
23062323
unsigned int alloc_flags, const struct alloc_context *ac,
2307-
enum compact_priority prio)
2324+
enum compact_priority prio, struct page **capture)
23082325
{
23092326
int may_perform_io = gfp_mask & __GFP_IO;
23102327
struct zoneref *z;
@@ -2332,7 +2349,7 @@ enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order,
23322349
}
23332350

23342351
status = compact_zone_order(zone, order, gfp_mask, prio,
2335-
alloc_flags, ac_classzone_idx(ac));
2352+
alloc_flags, ac_classzone_idx(ac), capture);
23362353
rc = max(status, rc);
23372354

23382355
/* The allocation should succeed, stop compacting */
@@ -2400,7 +2417,7 @@ static void compact_node(int nid)
24002417
INIT_LIST_HEAD(&cc.freepages);
24012418
INIT_LIST_HEAD(&cc.migratepages);
24022419

2403-
compact_zone(&cc);
2420+
compact_zone(&cc, NULL);
24042421

24052422
VM_BUG_ON(!list_empty(&cc.freepages));
24062423
VM_BUG_ON(!list_empty(&cc.migratepages));
@@ -2535,7 +2552,7 @@ static void kcompactd_do_work(pg_data_t *pgdat)
25352552

25362553
if (kthread_should_stop())
25372554
return;
2538-
status = compact_zone(&cc);
2555+
status = compact_zone(&cc, NULL);
25392556

25402557
if (status == COMPACT_SUCCESS) {
25412558
compaction_defer_reset(zone, cc.order, false);

mm/internal.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,15 @@ struct compact_control {
209209
bool rescan; /* Rescanning the same pageblock */
210210
};
211211

212+
/*
213+
* Used in direct compaction when a page should be taken from the freelists
214+
* immediately when one is created during the free path.
215+
*/
216+
struct capture_control {
217+
struct compact_control *cc;
218+
struct page *page;
219+
};
220+
212221
unsigned long
213222
isolate_freepages_range(struct compact_control *cc,
214223
unsigned long start_pfn, unsigned long end_pfn);

mm/page_alloc.c

Lines changed: 69 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -789,6 +789,57 @@ static inline int page_is_buddy(struct page *page, struct page *buddy,
789789
return 0;
790790
}
791791

792+
#ifdef CONFIG_COMPACTION
793+
static inline struct capture_control *task_capc(struct zone *zone)
794+
{
795+
struct capture_control *capc = current->capture_control;
796+
797+
return capc &&
798+
!(current->flags & PF_KTHREAD) &&
799+
!capc->page &&
800+
capc->cc->zone == zone &&
801+
capc->cc->direct_compaction ? capc : NULL;
802+
}
803+
804+
static inline bool
805+
compaction_capture(struct capture_control *capc, struct page *page,
806+
int order, int migratetype)
807+
{
808+
if (!capc || order != capc->cc->order)
809+
return false;
810+
811+
/* Do not accidentally pollute CMA or isolated regions*/
812+
if (is_migrate_cma(migratetype) ||
813+
is_migrate_isolate(migratetype))
814+
return false;
815+
816+
/*
817+
* Do not let lower order allocations polluate a movable pageblock.
818+
* This might let an unmovable request use a reclaimable pageblock
819+
* and vice-versa but no more than normal fallback logic which can
820+
* have trouble finding a high-order free page.
821+
*/
822+
if (order < pageblock_order && migratetype == MIGRATE_MOVABLE)
823+
return false;
824+
825+
capc->page = page;
826+
return true;
827+
}
828+
829+
#else
830+
static inline struct capture_control *task_capc(struct zone *zone)
831+
{
832+
return NULL;
833+
}
834+
835+
static inline bool
836+
compaction_capture(struct capture_control *capc, struct page *page,
837+
int order, int migratetype)
838+
{
839+
return false;
840+
}
841+
#endif /* CONFIG_COMPACTION */
842+
792843
/*
793844
* Freeing function for a buddy system allocator.
794845
*
@@ -822,6 +873,7 @@ static inline void __free_one_page(struct page *page,
822873
unsigned long uninitialized_var(buddy_pfn);
823874
struct page *buddy;
824875
unsigned int max_order;
876+
struct capture_control *capc = task_capc(zone);
825877

826878
max_order = min_t(unsigned int, MAX_ORDER, pageblock_order + 1);
827879

@@ -837,6 +889,11 @@ static inline void __free_one_page(struct page *page,
837889

838890
continue_merging:
839891
while (order < max_order - 1) {
892+
if (compaction_capture(capc, page, order, migratetype)) {
893+
__mod_zone_freepage_state(zone, -(1 << order),
894+
migratetype);
895+
return;
896+
}
840897
buddy_pfn = __find_buddy_pfn(pfn, order);
841898
buddy = page + (buddy_pfn - pfn);
842899

@@ -3710,7 +3767,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
37103767
unsigned int alloc_flags, const struct alloc_context *ac,
37113768
enum compact_priority prio, enum compact_result *compact_result)
37123769
{
3713-
struct page *page;
3770+
struct page *page = NULL;
37143771
unsigned long pflags;
37153772
unsigned int noreclaim_flag;
37163773

@@ -3721,21 +3778,29 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
37213778
noreclaim_flag = memalloc_noreclaim_save();
37223779

37233780
*compact_result = try_to_compact_pages(gfp_mask, order, alloc_flags, ac,
3724-
prio);
3781+
prio, &page);
37253782

37263783
memalloc_noreclaim_restore(noreclaim_flag);
37273784
psi_memstall_leave(&pflags);
37283785

3729-
if (*compact_result <= COMPACT_INACTIVE)
3786+
if (*compact_result <= COMPACT_INACTIVE) {
3787+
WARN_ON_ONCE(page);
37303788
return NULL;
3789+
}
37313790

37323791
/*
37333792
* At least in one zone compaction wasn't deferred or skipped, so let's
37343793
* count a compaction stall
37353794
*/
37363795
count_vm_event(COMPACTSTALL);
37373796

3738-
page = get_page_from_freelist(gfp_mask, order, alloc_flags, ac);
3797+
/* Prep a captured page if available */
3798+
if (page)
3799+
prep_new_page(page, order, gfp_mask, alloc_flags);
3800+
3801+
/* Try get a page from the freelist if available */
3802+
if (!page)
3803+
page = get_page_from_freelist(gfp_mask, order, alloc_flags, ac);
37393804

37403805
if (page) {
37413806
struct zone *zone = page_zone(page);

0 commit comments

Comments
 (0)