Skip to content

Commit cc27b0c

Browse files
NeilBrownshligit
authored andcommitted
md: fix deadlock between mddev_suspend() and md_write_start()
If mddev_suspend() races with md_write_start() we can deadlock with mddev_suspend() waiting for the request that is currently in md_write_start() to complete the ->make_request() call, and md_write_start() waiting for the metadata to be updated to mark the array as 'dirty'. As metadata updates done by md_check_recovery() only happen then the mddev_lock() can be claimed, and as mddev_suspend() is often called with the lock held, these threads wait indefinitely for each other. We fix this by having md_write_start() abort if mddev_suspend() is happening, and ->make_request() aborts if md_write_start() aborted. md_make_request() can detect this abort, decrease the ->active_io count, and wait for mddev_suspend(). Reported-by: Nix <[email protected]> Fix: 68866e4(MD: no sync IO while suspended) Cc: [email protected] Signed-off-by: NeilBrown <[email protected]> Signed-off-by: Shaohua Li <[email protected]>
1 parent 63f700a commit cc27b0c

File tree

9 files changed

+56
-35
lines changed

9 files changed

+56
-35
lines changed

drivers/md/faulty.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ static void add_sector(struct faulty_conf *conf, sector_t start, int mode)
170170
conf->nfaults = n+1;
171171
}
172172

173-
static void faulty_make_request(struct mddev *mddev, struct bio *bio)
173+
static bool faulty_make_request(struct mddev *mddev, struct bio *bio)
174174
{
175175
struct faulty_conf *conf = mddev->private;
176176
int failit = 0;
@@ -182,7 +182,7 @@ static void faulty_make_request(struct mddev *mddev, struct bio *bio)
182182
* just fail immediately
183183
*/
184184
bio_io_error(bio);
185-
return;
185+
return true;
186186
}
187187

188188
if (check_sector(conf, bio->bi_iter.bi_sector,
@@ -224,6 +224,7 @@ static void faulty_make_request(struct mddev *mddev, struct bio *bio)
224224
bio->bi_bdev = conf->rdev->bdev;
225225

226226
generic_make_request(bio);
227+
return true;
227228
}
228229

229230
static void faulty_status(struct seq_file *seq, struct mddev *mddev)

drivers/md/linear.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,7 @@ static void linear_free(struct mddev *mddev, void *priv)
245245
kfree(conf);
246246
}
247247

248-
static void linear_make_request(struct mddev *mddev, struct bio *bio)
248+
static bool linear_make_request(struct mddev *mddev, struct bio *bio)
249249
{
250250
char b[BDEVNAME_SIZE];
251251
struct dev_info *tmp_dev;
@@ -254,7 +254,7 @@ static void linear_make_request(struct mddev *mddev, struct bio *bio)
254254

255255
if (unlikely(bio->bi_opf & REQ_PREFLUSH)) {
256256
md_flush_request(mddev, bio);
257-
return;
257+
return true;
258258
}
259259

260260
tmp_dev = which_dev(mddev, bio_sector);
@@ -292,7 +292,7 @@ static void linear_make_request(struct mddev *mddev, struct bio *bio)
292292
mddev_check_write_zeroes(mddev, bio);
293293
generic_make_request(bio);
294294
}
295-
return;
295+
return true;
296296

297297
out_of_bounds:
298298
pr_err("md/linear:%s: make_request: Sector %llu out of bounds on dev %s: %llu sectors, offset %llu\n",
@@ -302,6 +302,7 @@ static void linear_make_request(struct mddev *mddev, struct bio *bio)
302302
(unsigned long long)tmp_dev->rdev->sectors,
303303
(unsigned long long)start_sector);
304304
bio_io_error(bio);
305+
return true;
305306
}
306307

307308
static void linear_status (struct seq_file *seq, struct mddev *mddev)

drivers/md/md.c

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,7 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
277277
bio_endio(bio);
278278
return BLK_QC_T_NONE;
279279
}
280-
smp_rmb(); /* Ensure implications of 'active' are visible */
280+
check_suspended:
281281
rcu_read_lock();
282282
if (mddev->suspended) {
283283
DEFINE_WAIT(__wait);
@@ -302,7 +302,11 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
302302
sectors = bio_sectors(bio);
303303
/* bio could be mergeable after passing to underlayer */
304304
bio->bi_opf &= ~REQ_NOMERGE;
305-
mddev->pers->make_request(mddev, bio);
305+
if (!mddev->pers->make_request(mddev, bio)) {
306+
atomic_dec(&mddev->active_io);
307+
wake_up(&mddev->sb_wait);
308+
goto check_suspended;
309+
}
306310

307311
cpu = part_stat_lock();
308312
part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
@@ -327,6 +331,7 @@ void mddev_suspend(struct mddev *mddev)
327331
if (mddev->suspended++)
328332
return;
329333
synchronize_rcu();
334+
wake_up(&mddev->sb_wait);
330335
wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0);
331336
mddev->pers->quiesce(mddev, 1);
332337

@@ -7950,12 +7955,14 @@ EXPORT_SYMBOL(md_done_sync);
79507955
* If we need to update some array metadata (e.g. 'active' flag
79517956
* in superblock) before writing, schedule a superblock update
79527957
* and wait for it to complete.
7958+
* A return value of 'false' means that the write wasn't recorded
7959+
* and cannot proceed as the array is being suspend.
79537960
*/
7954-
void md_write_start(struct mddev *mddev, struct bio *bi)
7961+
bool md_write_start(struct mddev *mddev, struct bio *bi)
79557962
{
79567963
int did_change = 0;
79577964
if (bio_data_dir(bi) != WRITE)
7958-
return;
7965+
return true;
79597966

79607967
BUG_ON(mddev->ro == 1);
79617968
if (mddev->ro == 2) {
@@ -7987,7 +7994,12 @@ void md_write_start(struct mddev *mddev, struct bio *bi)
79877994
if (did_change)
79887995
sysfs_notify_dirent_safe(mddev->sysfs_state);
79897996
wait_event(mddev->sb_wait,
7990-
!test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
7997+
!test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags) && !mddev->suspended);
7998+
if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) {
7999+
percpu_ref_put(&mddev->writes_pending);
8000+
return false;
8001+
}
8002+
return true;
79918003
}
79928004
EXPORT_SYMBOL(md_write_start);
79938005

drivers/md/md.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -510,7 +510,7 @@ struct md_personality
510510
int level;
511511
struct list_head list;
512512
struct module *owner;
513-
void (*make_request)(struct mddev *mddev, struct bio *bio);
513+
bool (*make_request)(struct mddev *mddev, struct bio *bio);
514514
int (*run)(struct mddev *mddev);
515515
void (*free)(struct mddev *mddev, void *priv);
516516
void (*status)(struct seq_file *seq, struct mddev *mddev);
@@ -649,7 +649,7 @@ extern void md_wakeup_thread(struct md_thread *thread);
649649
extern void md_check_recovery(struct mddev *mddev);
650650
extern void md_reap_sync_thread(struct mddev *mddev);
651651
extern int mddev_init_writes_pending(struct mddev *mddev);
652-
extern void md_write_start(struct mddev *mddev, struct bio *bi);
652+
extern bool md_write_start(struct mddev *mddev, struct bio *bi);
653653
extern void md_write_inc(struct mddev *mddev, struct bio *bi);
654654
extern void md_write_end(struct mddev *mddev);
655655
extern void md_done_sync(struct mddev *mddev, int blocks, int ok);

drivers/md/multipath.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -106,15 +106,15 @@ static void multipath_end_request(struct bio *bio)
106106
rdev_dec_pending(rdev, conf->mddev);
107107
}
108108

109-
static void multipath_make_request(struct mddev *mddev, struct bio * bio)
109+
static bool multipath_make_request(struct mddev *mddev, struct bio * bio)
110110
{
111111
struct mpconf *conf = mddev->private;
112112
struct multipath_bh * mp_bh;
113113
struct multipath_info *multipath;
114114

115115
if (unlikely(bio->bi_opf & REQ_PREFLUSH)) {
116116
md_flush_request(mddev, bio);
117-
return;
117+
return true;
118118
}
119119

120120
mp_bh = mempool_alloc(conf->pool, GFP_NOIO);
@@ -126,7 +126,7 @@ static void multipath_make_request(struct mddev *mddev, struct bio * bio)
126126
if (mp_bh->path < 0) {
127127
bio_io_error(bio);
128128
mempool_free(mp_bh, conf->pool);
129-
return;
129+
return true;
130130
}
131131
multipath = conf->multipaths + mp_bh->path;
132132

@@ -141,7 +141,7 @@ static void multipath_make_request(struct mddev *mddev, struct bio * bio)
141141
mddev_check_writesame(mddev, &mp_bh->bio);
142142
mddev_check_write_zeroes(mddev, &mp_bh->bio);
143143
generic_make_request(&mp_bh->bio);
144-
return;
144+
return true;
145145
}
146146

147147
static void multipath_status(struct seq_file *seq, struct mddev *mddev)

drivers/md/raid0.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -548,7 +548,7 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio)
548548
bio_endio(bio);
549549
}
550550

551-
static void raid0_make_request(struct mddev *mddev, struct bio *bio)
551+
static bool raid0_make_request(struct mddev *mddev, struct bio *bio)
552552
{
553553
struct strip_zone *zone;
554554
struct md_rdev *tmp_dev;
@@ -559,12 +559,12 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio)
559559

560560
if (unlikely(bio->bi_opf & REQ_PREFLUSH)) {
561561
md_flush_request(mddev, bio);
562-
return;
562+
return true;
563563
}
564564

565565
if (unlikely((bio_op(bio) == REQ_OP_DISCARD))) {
566566
raid0_handle_discard(mddev, bio);
567-
return;
567+
return true;
568568
}
569569

570570
bio_sector = bio->bi_iter.bi_sector;
@@ -599,6 +599,7 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio)
599599
mddev_check_writesame(mddev, bio);
600600
mddev_check_write_zeroes(mddev, bio);
601601
generic_make_request(bio);
602+
return true;
602603
}
603604

604605
static void raid0_status(struct seq_file *seq, struct mddev *mddev)

drivers/md/raid1.c

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1321,7 +1321,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
13211321
* Continue immediately if no resync is active currently.
13221322
*/
13231323

1324-
md_write_start(mddev, bio); /* wait on superblock update early */
13251324

13261325
if ((bio_end_sector(bio) > mddev->suspend_lo &&
13271326
bio->bi_iter.bi_sector < mddev->suspend_hi) ||
@@ -1550,13 +1549,13 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
15501549
wake_up(&conf->wait_barrier);
15511550
}
15521551

1553-
static void raid1_make_request(struct mddev *mddev, struct bio *bio)
1552+
static bool raid1_make_request(struct mddev *mddev, struct bio *bio)
15541553
{
15551554
sector_t sectors;
15561555

15571556
if (unlikely(bio->bi_opf & REQ_PREFLUSH)) {
15581557
md_flush_request(mddev, bio);
1559-
return;
1558+
return true;
15601559
}
15611560

15621561
/*
@@ -1571,8 +1570,12 @@ static void raid1_make_request(struct mddev *mddev, struct bio *bio)
15711570

15721571
if (bio_data_dir(bio) == READ)
15731572
raid1_read_request(mddev, bio, sectors, NULL);
1574-
else
1573+
else {
1574+
if (!md_write_start(mddev,bio))
1575+
return false;
15751576
raid1_write_request(mddev, bio, sectors);
1577+
}
1578+
return true;
15761579
}
15771580

15781581
static void raid1_status(struct seq_file *seq, struct mddev *mddev)

drivers/md/raid10.c

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1303,8 +1303,6 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
13031303
sector_t sectors;
13041304
int max_sectors;
13051305

1306-
md_write_start(mddev, bio);
1307-
13081306
/*
13091307
* Register the new request and wait if the reconstruction
13101308
* thread has put up a bar for new requests.
@@ -1525,7 +1523,7 @@ static void __make_request(struct mddev *mddev, struct bio *bio, int sectors)
15251523
raid10_write_request(mddev, bio, r10_bio);
15261524
}
15271525

1528-
static void raid10_make_request(struct mddev *mddev, struct bio *bio)
1526+
static bool raid10_make_request(struct mddev *mddev, struct bio *bio)
15291527
{
15301528
struct r10conf *conf = mddev->private;
15311529
sector_t chunk_mask = (conf->geo.chunk_mask & conf->prev.chunk_mask);
@@ -1534,9 +1532,12 @@ static void raid10_make_request(struct mddev *mddev, struct bio *bio)
15341532

15351533
if (unlikely(bio->bi_opf & REQ_PREFLUSH)) {
15361534
md_flush_request(mddev, bio);
1537-
return;
1535+
return true;
15381536
}
15391537

1538+
if (!md_write_start(mddev, bio))
1539+
return false;
1540+
15401541
/*
15411542
* If this request crosses a chunk boundary, we need to split
15421543
* it.
@@ -1553,6 +1554,7 @@ static void raid10_make_request(struct mddev *mddev, struct bio *bio)
15531554

15541555
/* In case raid10d snuck in to freeze_array */
15551556
wake_up(&conf->wait_barrier);
1557+
return true;
15561558
}
15571559

15581560
static void raid10_status(struct seq_file *seq, struct mddev *mddev)

drivers/md/raid5.c

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5479,7 +5479,6 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi)
54795479
last_sector = bi->bi_iter.bi_sector + (bi->bi_iter.bi_size>>9);
54805480

54815481
bi->bi_next = NULL;
5482-
md_write_start(mddev, bi);
54835482

54845483
stripe_sectors = conf->chunk_sectors *
54855484
(conf->raid_disks - conf->max_degraded);
@@ -5549,11 +5548,10 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi)
55495548
release_stripe_plug(mddev, sh);
55505549
}
55515550

5552-
md_write_end(mddev);
55535551
bio_endio(bi);
55545552
}
55555553

5556-
static void raid5_make_request(struct mddev *mddev, struct bio * bi)
5554+
static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
55575555
{
55585556
struct r5conf *conf = mddev->private;
55595557
int dd_idx;
@@ -5569,10 +5567,10 @@ static void raid5_make_request(struct mddev *mddev, struct bio * bi)
55695567
int ret = r5l_handle_flush_request(conf->log, bi);
55705568

55715569
if (ret == 0)
5572-
return;
5570+
return true;
55735571
if (ret == -ENODEV) {
55745572
md_flush_request(mddev, bi);
5575-
return;
5573+
return true;
55765574
}
55775575
/* ret == -EAGAIN, fallback */
55785576
/*
@@ -5582,6 +5580,8 @@ static void raid5_make_request(struct mddev *mddev, struct bio * bi)
55825580
do_flush = bi->bi_opf & REQ_PREFLUSH;
55835581
}
55845582

5583+
if (!md_write_start(mddev, bi))
5584+
return false;
55855585
/*
55865586
* If array is degraded, better not do chunk aligned read because
55875587
* later we might have to read it again in order to reconstruct
@@ -5591,18 +5591,18 @@ static void raid5_make_request(struct mddev *mddev, struct bio * bi)
55915591
mddev->reshape_position == MaxSector) {
55925592
bi = chunk_aligned_read(mddev, bi);
55935593
if (!bi)
5594-
return;
5594+
return true;
55955595
}
55965596

55975597
if (unlikely(bio_op(bi) == REQ_OP_DISCARD)) {
55985598
make_discard_request(mddev, bi);
5599-
return;
5599+
md_write_end(mddev);
5600+
return true;
56005601
}
56015602

56025603
logical_sector = bi->bi_iter.bi_sector & ~((sector_t)STRIPE_SECTORS-1);
56035604
last_sector = bio_end_sector(bi);
56045605
bi->bi_next = NULL;
5605-
md_write_start(mddev, bi);
56065606

56075607
prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE);
56085608
for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) {
@@ -5740,6 +5740,7 @@ static void raid5_make_request(struct mddev *mddev, struct bio * bi)
57405740
if (rw == WRITE)
57415741
md_write_end(mddev);
57425742
bio_endio(bi);
5743+
return true;
57435744
}
57445745

57455746
static sector_t raid5_size(struct mddev *mddev, sector_t sectors, int raid_disks);

0 commit comments

Comments
 (0)