Skip to content

Commit dcf6b7d

Browse files
aquinitorvalds
authored andcommitted
swap: discard while swapping only if SWAP_FLAG_DISCARD_PAGES
Considering the use cases where the swap device supports discard: a) and can do it quickly; b) but it's slow to do in small granularities (or concurrent with other I/O); c) but the implementation is so horrendous that you don't even want to send one down; And assuming that the sysadmin considers it useful to send the discards down at all, we would (probably) want the following solutions: i. do the fine-grained discards for freed swap pages, if device is capable of doing so optimally; ii. do single-time (batched) swap area discards, either at swapon or via something like fstrim (not implemented yet); iii. allow doing both single-time and fine-grained discards; or iv. turn it off completely (default behavior) As implemented today, one can only enable/disable discards for swap, but one cannot select, for instance, solution (ii) on a swap device like (b) even though the single-time discard is regarded to be interesting, or necessary to the workload because it would imply (1), and the device is not capable of performing it optimally. This patch addresses the scenario depicted above by introducing a way to ensure the (probably) wanted solutions (i, ii, iii and iv) can be flexibly flagged through swapon(8) to allow a sysadmin to select the best suitable swap discard policy accordingly to system constraints. This patch introduces SWAP_FLAG_DISCARD_PAGES and SWAP_FLAG_DISCARD_ONCE new flags to allow more flexibe swap discard policies being flagged through swapon(8). The default behavior is to keep both single-time, or batched, area discards (SWAP_FLAG_DISCARD_ONCE) and fine-grained discards for page-clusters (SWAP_FLAG_DISCARD_PAGES) enabled, in order to keep consistentcy with older kernel behavior, as well as maintain compatibility with older swapon(8). However, through the new introduced flags the best suitable discard policy can be selected accordingly to any given swap device constraint. [[email protected]: tweak comments] Signed-off-by: Rafael Aquini <[email protected]> Acked-by: KOSAKI Motohiro <[email protected]> Cc: Hugh Dickins <[email protected]> Cc: Shaohua Li <[email protected]> Cc: Karel Zak <[email protected]> Cc: Jeff Moyer <[email protected]> Cc: Rik van Riel <[email protected]> Cc: Larry Woodman <[email protected]> Cc: Mel Gorman <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 917d929 commit dcf6b7d

File tree

2 files changed

+59
-9
lines changed

2 files changed

+59
-9
lines changed

include/linux/swap.h

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,13 @@ struct bio;
2020
#define SWAP_FLAG_PREFER 0x8000 /* set if swap priority specified */
2121
#define SWAP_FLAG_PRIO_MASK 0x7fff
2222
#define SWAP_FLAG_PRIO_SHIFT 0
23-
#define SWAP_FLAG_DISCARD 0x10000 /* discard swap cluster after use */
23+
#define SWAP_FLAG_DISCARD 0x10000 /* enable discard for swap */
24+
#define SWAP_FLAG_DISCARD_ONCE 0x20000 /* discard swap area at swapon-time */
25+
#define SWAP_FLAG_DISCARD_PAGES 0x40000 /* discard page-clusters after use */
2426

2527
#define SWAP_FLAGS_VALID (SWAP_FLAG_PRIO_MASK | SWAP_FLAG_PREFER | \
26-
SWAP_FLAG_DISCARD)
28+
SWAP_FLAG_DISCARD | SWAP_FLAG_DISCARD_ONCE | \
29+
SWAP_FLAG_DISCARD_PAGES)
2730

2831
static inline int current_is_kswapd(void)
2932
{
@@ -147,14 +150,16 @@ struct swap_extent {
147150
enum {
148151
SWP_USED = (1 << 0), /* is slot in swap_info[] used? */
149152
SWP_WRITEOK = (1 << 1), /* ok to write to this swap? */
150-
SWP_DISCARDABLE = (1 << 2), /* swapon+blkdev support discard */
153+
SWP_DISCARDABLE = (1 << 2), /* blkdev support discard */
151154
SWP_DISCARDING = (1 << 3), /* now discarding a free cluster */
152155
SWP_SOLIDSTATE = (1 << 4), /* blkdev seeks are cheap */
153156
SWP_CONTINUED = (1 << 5), /* swap_map has count continuation */
154157
SWP_BLKDEV = (1 << 6), /* its a block device */
155158
SWP_FILE = (1 << 7), /* set after swap_activate success */
159+
SWP_AREA_DISCARD = (1 << 8), /* single-time swap area discards */
160+
SWP_PAGE_DISCARD = (1 << 9), /* freed swap page-cluster discards */
156161
/* add others here before... */
157-
SWP_SCANNING = (1 << 8), /* refcount in scan_swap_map */
162+
SWP_SCANNING = (1 << 10), /* refcount in scan_swap_map */
158163
};
159164

160165
#define SWAP_CLUSTER_MAX 32UL

mm/swapfile.c

Lines changed: 50 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,7 @@ static unsigned long scan_swap_map(struct swap_info_struct *si,
212212
si->cluster_nr = SWAPFILE_CLUSTER - 1;
213213
goto checks;
214214
}
215-
if (si->flags & SWP_DISCARDABLE) {
215+
if (si->flags & SWP_PAGE_DISCARD) {
216216
/*
217217
* Start range check on racing allocations, in case
218218
* they overlap the cluster we eventually decide on
@@ -322,7 +322,7 @@ static unsigned long scan_swap_map(struct swap_info_struct *si,
322322

323323
if (si->lowest_alloc) {
324324
/*
325-
* Only set when SWP_DISCARDABLE, and there's a scan
325+
* Only set when SWP_PAGE_DISCARD, and there's a scan
326326
* for a free cluster in progress or just completed.
327327
*/
328328
if (found_free_cluster) {
@@ -2016,6 +2016,20 @@ static int setup_swap_map_and_extents(struct swap_info_struct *p,
20162016
return nr_extents;
20172017
}
20182018

2019+
/*
2020+
* Helper to sys_swapon determining if a given swap
2021+
* backing device queue supports DISCARD operations.
2022+
*/
2023+
static bool swap_discardable(struct swap_info_struct *si)
2024+
{
2025+
struct request_queue *q = bdev_get_queue(si->bdev);
2026+
2027+
if (!q || !blk_queue_discard(q))
2028+
return false;
2029+
2030+
return true;
2031+
}
2032+
20192033
SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
20202034
{
20212035
struct swap_info_struct *p;
@@ -2123,8 +2137,37 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
21232137
p->flags |= SWP_SOLIDSTATE;
21242138
p->cluster_next = 1 + (prandom_u32() % p->highest_bit);
21252139
}
2126-
if ((swap_flags & SWAP_FLAG_DISCARD) && discard_swap(p) == 0)
2127-
p->flags |= SWP_DISCARDABLE;
2140+
2141+
if ((swap_flags & SWAP_FLAG_DISCARD) && swap_discardable(p)) {
2142+
/*
2143+
* When discard is enabled for swap with no particular
2144+
* policy flagged, we set all swap discard flags here in
2145+
* order to sustain backward compatibility with older
2146+
* swapon(8) releases.
2147+
*/
2148+
p->flags |= (SWP_DISCARDABLE | SWP_AREA_DISCARD |
2149+
SWP_PAGE_DISCARD);
2150+
2151+
/*
2152+
* By flagging sys_swapon, a sysadmin can tell us to
2153+
* either do single-time area discards only, or to just
2154+
* perform discards for released swap page-clusters.
2155+
* Now it's time to adjust the p->flags accordingly.
2156+
*/
2157+
if (swap_flags & SWAP_FLAG_DISCARD_ONCE)
2158+
p->flags &= ~SWP_PAGE_DISCARD;
2159+
else if (swap_flags & SWAP_FLAG_DISCARD_PAGES)
2160+
p->flags &= ~SWP_AREA_DISCARD;
2161+
2162+
/* issue a swapon-time discard if it's still required */
2163+
if (p->flags & SWP_AREA_DISCARD) {
2164+
int err = discard_swap(p);
2165+
if (unlikely(err))
2166+
printk(KERN_ERR
2167+
"swapon: discard_swap(%p): %d\n",
2168+
p, err);
2169+
}
2170+
}
21282171
}
21292172

21302173
mutex_lock(&swapon_mutex);
@@ -2135,11 +2178,13 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
21352178
enable_swap_info(p, prio, swap_map, frontswap_map);
21362179

21372180
printk(KERN_INFO "Adding %uk swap on %s. "
2138-
"Priority:%d extents:%d across:%lluk %s%s%s\n",
2181+
"Priority:%d extents:%d across:%lluk %s%s%s%s%s\n",
21392182
p->pages<<(PAGE_SHIFT-10), name->name, p->prio,
21402183
nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10),
21412184
(p->flags & SWP_SOLIDSTATE) ? "SS" : "",
21422185
(p->flags & SWP_DISCARDABLE) ? "D" : "",
2186+
(p->flags & SWP_AREA_DISCARD) ? "s" : "",
2187+
(p->flags & SWP_PAGE_DISCARD) ? "c" : "",
21432188
(frontswap_map) ? "FS" : "");
21442189

21452190
mutex_unlock(&swapon_mutex);

0 commit comments

Comments
 (0)