Skip to content

Commit 3312e6c

Browse files
lsgunthaxboe
authored andcommitted
md/raid5: Keep a reference to last stripe_head for batch
When batching, every stripe head has to find the previous stripe head to add to the batch list. This involves taking the hash lock which is highly contended during IO. Instead of finding the previous stripe_head each time, store a reference to the previous stripe_head in a pointer so that it doesn't require taking the contended lock another time. The reference to the previous stripe must be released before scheduling and waiting for work to get done. Otherwise, it can hold up raid5_activate_delayed() and deadlock. Signed-off-by: Logan Gunthorpe <[email protected]> Reviewed-by: Christoph Hellwig <[email protected]> Acked-by: Guoqing Jiang <[email protected]> Signed-off-by: Song Liu <[email protected]> Signed-off-by: Jens Axboe <[email protected]>
1 parent 0a2d169 commit 3312e6c

File tree

1 file changed

+40
-12
lines changed

1 file changed

+40
-12
lines changed

drivers/md/raid5.c

Lines changed: 40 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -843,7 +843,8 @@ static bool stripe_can_batch(struct stripe_head *sh)
843843
}
844844

845845
/* we only do back search */
846-
static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh)
846+
static void stripe_add_to_batch_list(struct r5conf *conf,
847+
struct stripe_head *sh, struct stripe_head *last_sh)
847848
{
848849
struct stripe_head *head;
849850
sector_t head_sector, tmp_sec;
@@ -856,15 +857,20 @@ static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh
856857
return;
857858
head_sector = sh->sector - RAID5_STRIPE_SECTORS(conf);
858859

859-
hash = stripe_hash_locks_hash(conf, head_sector);
860-
spin_lock_irq(conf->hash_locks + hash);
861-
head = find_get_stripe(conf, head_sector, conf->generation, hash);
862-
spin_unlock_irq(conf->hash_locks + hash);
863-
864-
if (!head)
865-
return;
866-
if (!stripe_can_batch(head))
867-
goto out;
860+
if (last_sh && head_sector == last_sh->sector) {
861+
head = last_sh;
862+
atomic_inc(&head->count);
863+
} else {
864+
hash = stripe_hash_locks_hash(conf, head_sector);
865+
spin_lock_irq(conf->hash_locks + hash);
866+
head = find_get_stripe(conf, head_sector, conf->generation,
867+
hash);
868+
spin_unlock_irq(conf->hash_locks + hash);
869+
if (!head)
870+
return;
871+
if (!stripe_can_batch(head))
872+
goto out;
873+
}
868874

869875
lock_two_stripes(head, sh);
870876
/* clear_batch_ready clear the flag */
@@ -5795,6 +5801,8 @@ enum stripe_result {
57955801
};
57965802

57975803
struct stripe_request_ctx {
5804+
/* a reference to the last stripe_head for batching */
5805+
struct stripe_head *batch_last;
57985806
/* the request had REQ_PREFLUSH, cleared after the first stripe_head */
57995807
bool do_flush;
58005808
};
@@ -5889,8 +5897,13 @@ static enum stripe_result make_stripe_request(struct mddev *mddev,
58895897
goto out_release;
58905898
}
58915899

5892-
if (stripe_can_batch(sh))
5893-
stripe_add_to_batch_list(conf, sh);
5900+
if (stripe_can_batch(sh)) {
5901+
stripe_add_to_batch_list(conf, sh, ctx->batch_last);
5902+
if (ctx->batch_last)
5903+
raid5_release_stripe(ctx->batch_last);
5904+
atomic_inc(&sh->count);
5905+
ctx->batch_last = sh;
5906+
}
58945907

58955908
if (ctx->do_flush) {
58965909
set_bit(STRIPE_R5C_PREFLUSH, &sh->state);
@@ -5985,6 +5998,18 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
59855998
continue;
59865999

59876000
if (res == STRIPE_SCHEDULE_AND_RETRY) {
6001+
/*
6002+
* Must release the reference to batch_last before
6003+
* scheduling and waiting for work to be done,
6004+
* otherwise the batch_last stripe head could prevent
6005+
* raid5_activate_delayed() from making progress
6006+
* and thus deadlocking.
6007+
*/
6008+
if (ctx.batch_last) {
6009+
raid5_release_stripe(ctx.batch_last);
6010+
ctx.batch_last = NULL;
6011+
}
6012+
59886013
schedule();
59896014
prepare_to_wait(&conf->wait_for_overlap, &w,
59906015
TASK_UNINTERRUPTIBLE);
@@ -5996,6 +6021,9 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
59966021

59976022
finish_wait(&conf->wait_for_overlap, &w);
59986023

6024+
if (ctx.batch_last)
6025+
raid5_release_stripe(ctx.batch_last);
6026+
59996027
if (rw == WRITE)
60006028
md_write_end(mddev);
60016029
bio_endio(bi);

0 commit comments

Comments
 (0)