@@ -84,6 +84,9 @@ struct scan_control {
8484 /* Scan (total_size >> priority) pages at once */
8585 int priority ;
8686
87+ /* The highest zone to isolate pages for reclaim from */
88+ enum zone_type reclaim_idx ;
89+
8790 unsigned int may_writepage :1 ;
8891
8992 /* Can mapped pages be reclaimed? */
@@ -1392,6 +1395,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
13921395 unsigned long nr_taken = 0 ;
13931396 unsigned long nr_zone_taken [MAX_NR_ZONES ] = { 0 };
13941397 unsigned long scan , nr_pages ;
1398+ LIST_HEAD (pages_skipped );
13951399
13961400 for (scan = 0 ; scan < nr_to_scan && nr_taken < nr_to_scan &&
13971401 !list_empty (src ); scan ++ ) {
@@ -1402,6 +1406,11 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
14021406
14031407 VM_BUG_ON_PAGE (!PageLRU (page ), page );
14041408
1409+ if (page_zonenum (page ) > sc -> reclaim_idx ) {
1410+ list_move (& page -> lru , & pages_skipped );
1411+ continue ;
1412+ }
1413+
14051414 switch (__isolate_lru_page (page , mode )) {
14061415 case 0 :
14071416 nr_pages = hpage_nr_pages (page );
@@ -1420,6 +1429,15 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
14201429 }
14211430 }
14221431
1432+ /*
1433+ * Splice any skipped pages to the start of the LRU list. Note that
1434+ * this disrupts the LRU order when reclaiming for lower zones but
1435+ * we cannot splice to the tail. If we did then the SWAP_CLUSTER_MAX
1436+ * scanning would soon rescan the same pages to skip and put the
1437+ * system at risk of premature OOM.
1438+ */
1439+ if (!list_empty (& pages_skipped ))
1440+ list_splice (& pages_skipped , src );
14231441 * nr_scanned = scan ;
14241442 trace_mm_vmscan_lru_isolate (sc -> order , nr_to_scan , scan ,
14251443 nr_taken , mode , is_file_lru (lru ));
@@ -1589,7 +1607,7 @@ static int current_may_throttle(void)
15891607}
15901608
15911609/*
1592- * shrink_inactive_list() is a helper for shrink_zone (). It returns the number
1610+ * shrink_inactive_list() is a helper for shrink_node (). It returns the number
15931611 * of reclaimed pages
15941612 */
15951613static noinline_for_stack unsigned long
@@ -2401,12 +2419,13 @@ static inline bool should_continue_reclaim(struct zone *zone,
24012419 }
24022420}
24032421
2404- static bool shrink_zone ( struct zone * zone , struct scan_control * sc ,
2405- bool is_classzone )
2422+ static bool shrink_node ( pg_data_t * pgdat , struct scan_control * sc ,
2423+ enum zone_type classzone_idx )
24062424{
24072425 struct reclaim_state * reclaim_state = current -> reclaim_state ;
24082426 unsigned long nr_reclaimed , nr_scanned ;
24092427 bool reclaimable = false;
2428+ struct zone * zone = & pgdat -> node_zones [classzone_idx ];
24102429
24112430 do {
24122431 struct mem_cgroup * root = sc -> target_mem_cgroup ;
@@ -2438,7 +2457,7 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc,
24382457 shrink_zone_memcg (zone , memcg , sc , & lru_pages );
24392458 zone_lru_pages += lru_pages ;
24402459
2441- if (memcg && is_classzone )
2460+ if (! global_reclaim ( sc ) )
24422461 shrink_slab (sc -> gfp_mask , zone_to_nid (zone ),
24432462 memcg , sc -> nr_scanned - scanned ,
24442463 lru_pages );
@@ -2469,7 +2488,7 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc,
24692488 * Shrink the slab caches in the same proportion that
24702489 * the eligible LRU pages were scanned.
24712490 */
2472- if (global_reclaim (sc ) && is_classzone )
2491+ if (global_reclaim (sc ))
24732492 shrink_slab (sc -> gfp_mask , zone_to_nid (zone ), NULL ,
24742493 sc -> nr_scanned - nr_scanned ,
24752494 zone_lru_pages );
@@ -2553,25 +2572,31 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
25532572 unsigned long nr_soft_reclaimed ;
25542573 unsigned long nr_soft_scanned ;
25552574 gfp_t orig_mask ;
2556- enum zone_type requested_highidx = gfp_zone ( sc -> gfp_mask ) ;
2575+ enum zone_type classzone_idx ;
25572576
25582577 /*
25592578 * If the number of buffer_heads in the machine exceeds the maximum
25602579 * allowed level, force direct reclaim to scan the highmem zone as
25612580 * highmem pages could be pinning lowmem pages storing buffer_heads
25622581 */
25632582 orig_mask = sc -> gfp_mask ;
2564- if (buffer_heads_over_limit )
2583+ if (buffer_heads_over_limit ) {
25652584 sc -> gfp_mask |= __GFP_HIGHMEM ;
2585+ sc -> reclaim_idx = classzone_idx = gfp_zone (sc -> gfp_mask );
2586+ }
25662587
25672588 for_each_zone_zonelist_nodemask (zone , z , zonelist ,
2568- gfp_zone (sc -> gfp_mask ), sc -> nodemask ) {
2569- enum zone_type classzone_idx ;
2570-
2589+ sc -> reclaim_idx , sc -> nodemask ) {
25712590 if (!populated_zone (zone ))
25722591 continue ;
25732592
2574- classzone_idx = requested_highidx ;
2593+ /*
2594+ * Note that reclaim_idx does not change as it is the highest
2595+ * zone reclaimed from which for empty zones is a no-op but
2596+ * classzone_idx is used by shrink_node to test if the slabs
2597+ * should be shrunk on a given node.
2598+ */
2599+ classzone_idx = sc -> reclaim_idx ;
25752600 while (!populated_zone (zone -> zone_pgdat -> node_zones +
25762601 classzone_idx ))
25772602 classzone_idx -- ;
@@ -2600,8 +2625,8 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
26002625 */
26012626 if (IS_ENABLED (CONFIG_COMPACTION ) &&
26022627 sc -> order > PAGE_ALLOC_COSTLY_ORDER &&
2603- zonelist_zone_idx (z ) <= requested_highidx &&
2604- compaction_ready (zone , sc -> order , requested_highidx )) {
2628+ zonelist_zone_idx (z ) <= classzone_idx &&
2629+ compaction_ready (zone , sc -> order , classzone_idx )) {
26052630 sc -> compaction_ready = true;
26062631 continue ;
26072632 }
@@ -2621,7 +2646,7 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
26212646 /* need some check for avoid more shrink_zone() */
26222647 }
26232648
2624- shrink_zone (zone , sc , zone_idx ( zone ) == classzone_idx );
2649+ shrink_node (zone -> zone_pgdat , sc , classzone_idx );
26252650 }
26262651
26272652 /*
@@ -2847,6 +2872,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
28472872 struct scan_control sc = {
28482873 .nr_to_reclaim = SWAP_CLUSTER_MAX ,
28492874 .gfp_mask = (gfp_mask = memalloc_noio_flags (gfp_mask )),
2875+ .reclaim_idx = gfp_zone (gfp_mask ),
28502876 .order = order ,
28512877 .nodemask = nodemask ,
28522878 .priority = DEF_PRIORITY ,
@@ -2886,6 +2912,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg,
28862912 .target_mem_cgroup = memcg ,
28872913 .may_writepage = !laptop_mode ,
28882914 .may_unmap = 1 ,
2915+ .reclaim_idx = MAX_NR_ZONES - 1 ,
28892916 .may_swap = !noswap ,
28902917 };
28912918 unsigned long lru_pages ;
@@ -2924,6 +2951,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
29242951 .nr_to_reclaim = max (nr_pages , SWAP_CLUSTER_MAX ),
29252952 .gfp_mask = (gfp_mask & GFP_RECLAIM_MASK ) |
29262953 (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK ),
2954+ .reclaim_idx = MAX_NR_ZONES - 1 ,
29272955 .target_mem_cgroup = memcg ,
29282956 .priority = DEF_PRIORITY ,
29292957 .may_writepage = !laptop_mode ,
@@ -3118,7 +3146,7 @@ static bool kswapd_shrink_zone(struct zone *zone,
31183146 balance_gap , classzone_idx ))
31193147 return true;
31203148
3121- shrink_zone (zone , sc , zone_idx ( zone ) == classzone_idx );
3149+ shrink_node (zone -> zone_pgdat , sc , classzone_idx );
31223150
31233151 /* TODO: ANOMALY */
31243152 clear_bit (PGDAT_WRITEBACK , & pgdat -> flags );
@@ -3167,6 +3195,7 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx)
31673195 unsigned long nr_soft_scanned ;
31683196 struct scan_control sc = {
31693197 .gfp_mask = GFP_KERNEL ,
3198+ .reclaim_idx = MAX_NR_ZONES - 1 ,
31703199 .order = order ,
31713200 .priority = DEF_PRIORITY ,
31723201 .may_writepage = !laptop_mode ,
@@ -3237,15 +3266,14 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx)
32373266 sc .may_writepage = 1 ;
32383267
32393268 /*
3240- * Now scan the zone in the dma->highmem direction, stopping
3241- * at the last zone which needs scanning.
3242- *
3243- * We do this because the page allocator works in the opposite
3244- * direction. This prevents the page allocator from allocating
3245- * pages behind kswapd's direction of progress, which would
3246- * cause too much scanning of the lower zones.
3269+ * Continue scanning in the highmem->dma direction stopping at
3270+ * the last zone which needs scanning. This may reclaim lowmem
3271+ * pages that are not necessary for zone balancing but it
3272+ * preserves LRU ordering. It is assumed that the bulk of
3273+ * allocation requests can use arbitrary zones with the
3274+ * possible exception of big highmem:lowmem configurations.
32473275 */
3248- for (i = 0 ; i <= end_zone ; i ++ ) {
3276+ for (i = end_zone ; i >= 0 ; i -- ) {
32493277 struct zone * zone = pgdat -> node_zones + i ;
32503278
32513279 if (!populated_zone (zone ))
@@ -3256,6 +3284,7 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx)
32563284 continue ;
32573285
32583286 sc .nr_scanned = 0 ;
3287+ sc .reclaim_idx = i ;
32593288
32603289 nr_soft_scanned = 0 ;
32613290 /*
@@ -3513,6 +3542,7 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
35133542 struct scan_control sc = {
35143543 .nr_to_reclaim = nr_to_reclaim ,
35153544 .gfp_mask = GFP_HIGHUSER_MOVABLE ,
3545+ .reclaim_idx = MAX_NR_ZONES - 1 ,
35163546 .priority = DEF_PRIORITY ,
35173547 .may_writepage = 1 ,
35183548 .may_unmap = 1 ,
@@ -3704,6 +3734,7 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
37043734 .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE ),
37053735 .may_unmap = !!(zone_reclaim_mode & RECLAIM_UNMAP ),
37063736 .may_swap = 1 ,
3737+ .reclaim_idx = zone_idx (zone ),
37073738 };
37083739
37093740 cond_resched ();
@@ -3723,7 +3754,7 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
37233754 * priorities until we have enough memory freed.
37243755 */
37253756 do {
3726- shrink_zone (zone , & sc , true );
3757+ shrink_node (zone -> zone_pgdat , & sc , zone_idx ( zone ) );
37273758 } while (sc .nr_reclaimed < nr_pages && -- sc .priority >= 0 );
37283759 }
37293760
0 commit comments