Skip to content

Commit 72c2706

Browse files
author
Kent Overstreet
committed
bcache: Write out full stripes
Now that we're tracking dirty data per stripe, we can add two optimizations for raid5/6: * If a stripe is already dirty, force writes to that stripe to writeback mode - to help build up full stripes of dirty data * When flushing dirty data, preferentially write out full stripes first if there are any. Signed-off-by: Kent Overstreet <[email protected]>
1 parent 279afba commit 72c2706

File tree

9 files changed

+121
-37
lines changed

9 files changed

+121
-37
lines changed

drivers/md/bcache/bcache.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -387,8 +387,6 @@ struct keybuf_key {
387387
typedef bool (keybuf_pred_fn)(struct keybuf *, struct bkey *);
388388

389389
struct keybuf {
390-
keybuf_pred_fn *key_predicate;
391-
392390
struct bkey last_scanned;
393391
spinlock_t lock;
394392

@@ -532,6 +530,7 @@ struct cached_dev {
532530
unsigned sequential_merge:1;
533531
unsigned verify:1;
534532

533+
unsigned partial_stripes_expensive:1;
535534
unsigned writeback_metadata:1;
536535
unsigned writeback_running:1;
537536
unsigned char writeback_percent;

drivers/md/bcache/btree.c

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2252,7 +2252,8 @@ static inline int keybuf_nonoverlapping_cmp(struct keybuf_key *l,
22522252
}
22532253

22542254
static int bch_btree_refill_keybuf(struct btree *b, struct btree_op *op,
2255-
struct keybuf *buf, struct bkey *end)
2255+
struct keybuf *buf, struct bkey *end,
2256+
keybuf_pred_fn *pred)
22562257
{
22572258
struct btree_iter iter;
22582259
bch_btree_iter_init(b, &iter, &buf->last_scanned);
@@ -2271,7 +2272,7 @@ static int bch_btree_refill_keybuf(struct btree *b, struct btree_op *op,
22712272
if (bkey_cmp(&buf->last_scanned, end) >= 0)
22722273
break;
22732274

2274-
if (buf->key_predicate(buf, k)) {
2275+
if (pred(buf, k)) {
22752276
struct keybuf_key *w;
22762277

22772278
spin_lock(&buf->lock);
@@ -2290,7 +2291,7 @@ static int bch_btree_refill_keybuf(struct btree *b, struct btree_op *op,
22902291
if (!k)
22912292
break;
22922293

2293-
btree(refill_keybuf, k, b, op, buf, end);
2294+
btree(refill_keybuf, k, b, op, buf, end, pred);
22942295
/*
22952296
* Might get an error here, but can't really do anything
22962297
* and it'll get logged elsewhere. Just read what we
@@ -2308,15 +2309,15 @@ static int bch_btree_refill_keybuf(struct btree *b, struct btree_op *op,
23082309
}
23092310

23102311
void bch_refill_keybuf(struct cache_set *c, struct keybuf *buf,
2311-
struct bkey *end)
2312+
struct bkey *end, keybuf_pred_fn *pred)
23122313
{
23132314
struct bkey start = buf->last_scanned;
23142315
struct btree_op op;
23152316
bch_btree_op_init_stack(&op);
23162317

23172318
cond_resched();
23182319

2319-
btree_root(refill_keybuf, c, &op, buf, end);
2320+
btree_root(refill_keybuf, c, &op, buf, end, pred);
23202321
closure_sync(&op.cl);
23212322

23222323
pr_debug("found %s keys from %llu:%llu to %llu:%llu",
@@ -2402,7 +2403,8 @@ struct keybuf_key *bch_keybuf_next(struct keybuf *buf)
24022403

24032404
struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *c,
24042405
struct keybuf *buf,
2405-
struct bkey *end)
2406+
struct bkey *end,
2407+
keybuf_pred_fn *pred)
24062408
{
24072409
struct keybuf_key *ret;
24082410

@@ -2416,15 +2418,14 @@ struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *c,
24162418
break;
24172419
}
24182420

2419-
bch_refill_keybuf(c, buf, end);
2421+
bch_refill_keybuf(c, buf, end, pred);
24202422
}
24212423

24222424
return ret;
24232425
}
24242426

2425-
void bch_keybuf_init(struct keybuf *buf, keybuf_pred_fn *fn)
2427+
void bch_keybuf_init(struct keybuf *buf)
24262428
{
2427-
buf->key_predicate = fn;
24282429
buf->last_scanned = MAX_KEY;
24292430
buf->keys = RB_ROOT;
24302431

drivers/md/bcache/btree.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -391,13 +391,14 @@ void bch_moving_gc(struct closure *);
391391
int bch_btree_check(struct cache_set *, struct btree_op *);
392392
uint8_t __bch_btree_mark_key(struct cache_set *, int, struct bkey *);
393393

394-
void bch_keybuf_init(struct keybuf *, keybuf_pred_fn *);
395-
void bch_refill_keybuf(struct cache_set *, struct keybuf *, struct bkey *);
394+
void bch_keybuf_init(struct keybuf *);
395+
void bch_refill_keybuf(struct cache_set *, struct keybuf *, struct bkey *,
396+
keybuf_pred_fn *);
396397
bool bch_keybuf_check_overlapping(struct keybuf *, struct bkey *,
397398
struct bkey *);
398399
void bch_keybuf_del(struct keybuf *, struct keybuf_key *);
399400
struct keybuf_key *bch_keybuf_next(struct keybuf *);
400-
struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *,
401-
struct keybuf *, struct bkey *);
401+
struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *, struct keybuf *,
402+
struct bkey *, keybuf_pred_fn *);
402403

403404
#endif

drivers/md/bcache/debug.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -357,7 +357,7 @@ static ssize_t bch_dump_read(struct file *file, char __user *buf,
357357
if (i->bytes)
358358
break;
359359

360-
w = bch_keybuf_next_rescan(i->c, &i->keys, &MAX_KEY);
360+
w = bch_keybuf_next_rescan(i->c, &i->keys, &MAX_KEY, dump_pred);
361361
if (!w)
362362
break;
363363

@@ -380,7 +380,7 @@ static int bch_dump_open(struct inode *inode, struct file *file)
380380

381381
file->private_data = i;
382382
i->c = c;
383-
bch_keybuf_init(&i->keys, dump_pred);
383+
bch_keybuf_init(&i->keys);
384384
i->keys.last_scanned = KEY(0, 0, 0);
385385

386386
return 0;

drivers/md/bcache/movinggc.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,8 @@ static void read_moving(struct closure *cl)
136136
/* XXX: if we error, background writeback could stall indefinitely */
137137

138138
while (!test_bit(CACHE_SET_STOPPING, &c->flags)) {
139-
w = bch_keybuf_next_rescan(c, &c->moving_gc_keys, &MAX_KEY);
139+
w = bch_keybuf_next_rescan(c, &c->moving_gc_keys,
140+
&MAX_KEY, moving_pred);
140141
if (!w)
141142
break;
142143

@@ -248,5 +249,5 @@ void bch_moving_gc(struct closure *cl)
248249

249250
void bch_moving_init_cache_set(struct cache_set *c)
250251
{
251-
bch_keybuf_init(&c->moving_gc_keys, moving_pred);
252+
bch_keybuf_init(&c->moving_gc_keys);
252253
}

drivers/md/bcache/request.c

Lines changed: 7 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,6 @@
2222

2323
#define CUTOFF_CACHE_ADD 95
2424
#define CUTOFF_CACHE_READA 90
25-
#define CUTOFF_WRITEBACK 50
26-
#define CUTOFF_WRITEBACK_SYNC 75
2725

2826
struct kmem_cache *bch_search_cache;
2927

@@ -998,17 +996,6 @@ static void cached_dev_write_complete(struct closure *cl)
998996
cached_dev_bio_complete(cl);
999997
}
1000998

1001-
static bool should_writeback(struct cached_dev *dc, struct bio *bio)
1002-
{
1003-
unsigned threshold = (bio->bi_rw & REQ_SYNC)
1004-
? CUTOFF_WRITEBACK_SYNC
1005-
: CUTOFF_WRITEBACK;
1006-
1007-
return !atomic_read(&dc->disk.detaching) &&
1008-
cache_mode(dc, bio) == CACHE_MODE_WRITEBACK &&
1009-
dc->disk.c->gc_stats.in_use < threshold;
1010-
}
1011-
1012999
static void request_write(struct cached_dev *dc, struct search *s)
10131000
{
10141001
struct closure *cl = &s->cl;
@@ -1030,12 +1017,16 @@ static void request_write(struct cached_dev *dc, struct search *s)
10301017
if (bio->bi_rw & REQ_DISCARD)
10311018
goto skip;
10321019

1020+
if (should_writeback(dc, s->orig_bio,
1021+
cache_mode(dc, bio),
1022+
s->op.skip)) {
1023+
s->op.skip = false;
1024+
s->writeback = true;
1025+
}
1026+
10331027
if (s->op.skip)
10341028
goto skip;
10351029

1036-
if (should_writeback(dc, s->orig_bio))
1037-
s->writeback = true;
1038-
10391030
trace_bcache_write(s->orig_bio, s->writeback, s->op.skip);
10401031

10411032
if (!s->writeback) {

drivers/md/bcache/sysfs.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,9 @@ rw_attribute(writeback_rate_p_term_inverse);
8181
rw_attribute(writeback_rate_d_smooth);
8282
read_attribute(writeback_rate_debug);
8383

84+
read_attribute(stripe_size);
85+
read_attribute(partial_stripes_expensive);
86+
8487
rw_attribute(synchronous);
8588
rw_attribute(journal_delay_ms);
8689
rw_attribute(discard);
@@ -147,6 +150,9 @@ SHOW(__bch_cached_dev)
147150
sysfs_hprint(dirty_data,
148151
bcache_dev_sectors_dirty(&dc->disk) << 9);
149152

153+
sysfs_hprint(stripe_size, (1 << dc->disk.stripe_size_bits) << 9);
154+
var_printf(partial_stripes_expensive, "%u");
155+
150156
var_printf(sequential_merge, "%i");
151157
var_hprint(sequential_cutoff);
152158
var_hprint(readahead);
@@ -286,6 +292,8 @@ static struct attribute *bch_cached_dev_files[] = {
286292
&sysfs_writeback_rate_d_smooth,
287293
&sysfs_writeback_rate_debug,
288294
&sysfs_dirty_data,
295+
&sysfs_stripe_size,
296+
&sysfs_partial_stripes_expensive,
289297
&sysfs_sequential_cutoff,
290298
&sysfs_sequential_merge,
291299
&sysfs_clear_stats,

drivers/md/bcache/writeback.c

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,31 @@ static bool dirty_pred(struct keybuf *buf, struct bkey *k)
108108
return KEY_DIRTY(k);
109109
}
110110

111+
static bool dirty_full_stripe_pred(struct keybuf *buf, struct bkey *k)
112+
{
113+
uint64_t stripe;
114+
unsigned nr_sectors = KEY_SIZE(k);
115+
struct cached_dev *dc = container_of(buf, struct cached_dev,
116+
writeback_keys);
117+
unsigned stripe_size = 1 << dc->disk.stripe_size_bits;
118+
119+
if (!KEY_DIRTY(k))
120+
return false;
121+
122+
stripe = KEY_START(k) >> dc->disk.stripe_size_bits;
123+
while (1) {
124+
if (atomic_read(dc->disk.stripe_sectors_dirty + stripe) !=
125+
stripe_size)
126+
return false;
127+
128+
if (nr_sectors <= stripe_size)
129+
return true;
130+
131+
nr_sectors -= stripe_size;
132+
stripe++;
133+
}
134+
}
135+
111136
static void dirty_init(struct keybuf_key *w)
112137
{
113138
struct dirty_io *io = w->private;
@@ -152,7 +177,22 @@ static void refill_dirty(struct closure *cl)
152177
searched_from_start = true;
153178
}
154179

155-
bch_refill_keybuf(dc->disk.c, buf, &end);
180+
if (dc->partial_stripes_expensive) {
181+
uint64_t i;
182+
183+
for (i = 0; i < dc->disk.nr_stripes; i++)
184+
if (atomic_read(dc->disk.stripe_sectors_dirty + i) ==
185+
1 << dc->disk.stripe_size_bits)
186+
goto full_stripes;
187+
188+
goto normal_refill;
189+
full_stripes:
190+
bch_refill_keybuf(dc->disk.c, buf, &end,
191+
dirty_full_stripe_pred);
192+
} else {
193+
normal_refill:
194+
bch_refill_keybuf(dc->disk.c, buf, &end, dirty_pred);
195+
}
156196

157197
if (bkey_cmp(&buf->last_scanned, &end) >= 0 && searched_from_start) {
158198
/* Searched the entire btree - delay awhile */
@@ -446,7 +486,7 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc)
446486
closure_init_unlocked(&dc->writeback);
447487
init_rwsem(&dc->writeback_lock);
448488

449-
bch_keybuf_init(&dc->writeback_keys, dirty_pred);
489+
bch_keybuf_init(&dc->writeback_keys);
450490

451491
dc->writeback_metadata = true;
452492
dc->writeback_running = true;

drivers/md/bcache/writeback.h

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
#ifndef _BCACHE_WRITEBACK_H
22
#define _BCACHE_WRITEBACK_H
33

4+
#define CUTOFF_WRITEBACK 40
5+
#define CUTOFF_WRITEBACK_SYNC 70
6+
47
static inline uint64_t bcache_dev_sectors_dirty(struct bcache_device *d)
58
{
69
uint64_t i, ret = 0;
@@ -11,6 +14,46 @@ static inline uint64_t bcache_dev_sectors_dirty(struct bcache_device *d)
1114
return ret;
1215
}
1316

17+
static inline bool bcache_dev_stripe_dirty(struct bcache_device *d,
18+
uint64_t offset,
19+
unsigned nr_sectors)
20+
{
21+
uint64_t stripe = offset >> d->stripe_size_bits;
22+
23+
while (1) {
24+
if (atomic_read(d->stripe_sectors_dirty + stripe))
25+
return true;
26+
27+
if (nr_sectors <= 1 << d->stripe_size_bits)
28+
return false;
29+
30+
nr_sectors -= 1 << d->stripe_size_bits;
31+
stripe++;
32+
}
33+
}
34+
35+
static inline bool should_writeback(struct cached_dev *dc, struct bio *bio,
36+
unsigned cache_mode, bool would_skip)
37+
{
38+
unsigned in_use = dc->disk.c->gc_stats.in_use;
39+
40+
if (cache_mode != CACHE_MODE_WRITEBACK ||
41+
atomic_read(&dc->disk.detaching) ||
42+
in_use > CUTOFF_WRITEBACK_SYNC)
43+
return false;
44+
45+
if (dc->partial_stripes_expensive &&
46+
bcache_dev_stripe_dirty(&dc->disk, bio->bi_sector,
47+
bio_sectors(bio)))
48+
return true;
49+
50+
if (would_skip)
51+
return false;
52+
53+
return bio->bi_rw & REQ_SYNC ||
54+
in_use <= CUTOFF_WRITEBACK;
55+
}
56+
1457
void bcache_dev_sectors_dirty_add(struct cache_set *, unsigned, uint64_t, int);
1558
void bch_writeback_queue(struct cached_dev *);
1659
void bch_writeback_add(struct cached_dev *);

0 commit comments

Comments
 (0)