Skip to content

Commit a2826aa

Browse files
committed
md: support barrier requests on all personalities.
Previously barriers were only supported on RAID1. This is because other levels requires synchronisation across all devices and so needed a different approach. Here is that approach. When a barrier arrives, we send a zero-length barrier to every active device. When that completes - and if the original request was not empty - we submit the barrier request itself (with the barrier flag cleared) and then submit a fresh load of zero length barriers. The barrier request itself is asynchronous, but any subsequent request will block until the barrier completes. The reason for clearing the barrier flag is that a barrier request is allowed to fail. If we pass a non-empty barrier through a striping raid level it is conceivable that part of it could succeed and part could fail. That would be way too hard to deal with. So if the first run of zero length barriers succeed, we assume all is sufficiently well that we send the request and ignore errors in the second run of barriers. RAID5 needs extra care as write requests may not have been submitted to the underlying devices yet. So we flush the stripe cache before proceeding with the barrier. Note that the second set of zero-length barriers are submitted immediately after the original request is submitted. Thus when a personality finds mddev->barrier to be set during make_request, it should not return from make_request until the corresponding per-device request(s) have been queued. That will be done in later patches. Signed-off-by: NeilBrown <[email protected]> Reviewed-by: Andre Noll <[email protected]>
1 parent efa5933 commit a2826aa

File tree

7 files changed

+126
-7
lines changed

7 files changed

+126
-7
lines changed

drivers/md/linear.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,7 @@ static int linear_make_request (struct request_queue *q, struct bio *bio)
292292
int cpu;
293293

294294
if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
295-
bio_endio(bio, -EOPNOTSUPP);
295+
md_barrier_request(mddev, bio);
296296
return 0;
297297
}
298298

drivers/md/md.c

Lines changed: 103 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -213,12 +213,12 @@ static int md_make_request(struct request_queue *q, struct bio *bio)
213213
return 0;
214214
}
215215
rcu_read_lock();
216-
if (mddev->suspended) {
216+
if (mddev->suspended || mddev->barrier) {
217217
DEFINE_WAIT(__wait);
218218
for (;;) {
219219
prepare_to_wait(&mddev->sb_wait, &__wait,
220220
TASK_UNINTERRUPTIBLE);
221-
if (!mddev->suspended)
221+
if (!mddev->suspended && !mddev->barrier)
222222
break;
223223
rcu_read_unlock();
224224
schedule();
@@ -260,10 +260,110 @@ static void mddev_resume(mddev_t *mddev)
260260

261261
int mddev_congested(mddev_t *mddev, int bits)
262262
{
263+
if (mddev->barrier)
264+
return 1;
263265
return mddev->suspended;
264266
}
265267
EXPORT_SYMBOL(mddev_congested);
266268

269+
/*
270+
* Generic barrier handling for md
271+
*/
272+
273+
#define POST_REQUEST_BARRIER ((void*)1)
274+
275+
static void md_end_barrier(struct bio *bio, int err)
276+
{
277+
mdk_rdev_t *rdev = bio->bi_private;
278+
mddev_t *mddev = rdev->mddev;
279+
if (err == -EOPNOTSUPP && mddev->barrier != POST_REQUEST_BARRIER)
280+
set_bit(BIO_EOPNOTSUPP, &mddev->barrier->bi_flags);
281+
282+
rdev_dec_pending(rdev, mddev);
283+
284+
if (atomic_dec_and_test(&mddev->flush_pending)) {
285+
if (mddev->barrier == POST_REQUEST_BARRIER) {
286+
/* This was a post-request barrier */
287+
mddev->barrier = NULL;
288+
wake_up(&mddev->sb_wait);
289+
} else
290+
/* The pre-request barrier has finished */
291+
schedule_work(&mddev->barrier_work);
292+
}
293+
bio_put(bio);
294+
}
295+
296+
static void submit_barriers(mddev_t *mddev)
297+
{
298+
mdk_rdev_t *rdev;
299+
300+
rcu_read_lock();
301+
list_for_each_entry_rcu(rdev, &mddev->disks, same_set)
302+
if (rdev->raid_disk >= 0 &&
303+
!test_bit(Faulty, &rdev->flags)) {
304+
/* Take two references, one is dropped
305+
* when request finishes, one after
306+
* we reclaim rcu_read_lock
307+
*/
308+
struct bio *bi;
309+
atomic_inc(&rdev->nr_pending);
310+
atomic_inc(&rdev->nr_pending);
311+
rcu_read_unlock();
312+
bi = bio_alloc(GFP_KERNEL, 0);
313+
bi->bi_end_io = md_end_barrier;
314+
bi->bi_private = rdev;
315+
bi->bi_bdev = rdev->bdev;
316+
atomic_inc(&mddev->flush_pending);
317+
submit_bio(WRITE_BARRIER, bi);
318+
rcu_read_lock();
319+
rdev_dec_pending(rdev, mddev);
320+
}
321+
rcu_read_unlock();
322+
}
323+
324+
static void md_submit_barrier(struct work_struct *ws)
325+
{
326+
mddev_t *mddev = container_of(ws, mddev_t, barrier_work);
327+
struct bio *bio = mddev->barrier;
328+
329+
atomic_set(&mddev->flush_pending, 1);
330+
331+
if (test_bit(BIO_EOPNOTSUPP, &bio->bi_flags))
332+
bio_endio(bio, -EOPNOTSUPP);
333+
else if (bio->bi_size == 0)
334+
/* an empty barrier - all done */
335+
bio_endio(bio, 0);
336+
else {
337+
bio->bi_rw &= ~(1<<BIO_RW_BARRIER);
338+
if (mddev->pers->make_request(mddev->queue, bio))
339+
generic_make_request(bio);
340+
mddev->barrier = POST_REQUEST_BARRIER;
341+
submit_barriers(mddev);
342+
}
343+
if (atomic_dec_and_test(&mddev->flush_pending)) {
344+
mddev->barrier = NULL;
345+
wake_up(&mddev->sb_wait);
346+
}
347+
}
348+
349+
void md_barrier_request(mddev_t *mddev, struct bio *bio)
350+
{
351+
spin_lock_irq(&mddev->write_lock);
352+
wait_event_lock_irq(mddev->sb_wait,
353+
!mddev->barrier,
354+
mddev->write_lock, /*nothing*/);
355+
mddev->barrier = bio;
356+
spin_unlock_irq(&mddev->write_lock);
357+
358+
atomic_set(&mddev->flush_pending, 1);
359+
INIT_WORK(&mddev->barrier_work, md_submit_barrier);
360+
361+
submit_barriers(mddev);
362+
363+
if (atomic_dec_and_test(&mddev->flush_pending))
364+
schedule_work(&mddev->barrier_work);
365+
}
366+
EXPORT_SYMBOL(md_barrier_request);
267367

268368
static inline mddev_t *mddev_get(mddev_t *mddev)
269369
{
@@ -371,6 +471,7 @@ static mddev_t * mddev_find(dev_t unit)
371471
atomic_set(&new->openers, 0);
372472
atomic_set(&new->active_io, 0);
373473
spin_lock_init(&new->write_lock);
474+
atomic_set(&new->flush_pending, 0);
374475
init_waitqueue_head(&new->sb_wait);
375476
init_waitqueue_head(&new->recovery_wait);
376477
new->reshape_position = MaxSector;

drivers/md/md.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,17 @@ struct mddev_s
292292
struct mutex bitmap_mutex;
293293

294294
struct list_head all_mddevs;
295+
296+
/* Generic barrier handling.
297+
* If there is a pending barrier request, all other
298+
* writes are blocked while the devices are flushed.
299+
* The last to finish a flush schedules a worker to
300+
* submit the barrier request (without the barrier flag),
301+
* then submit more flush requests.
302+
*/
303+
struct bio *barrier;
304+
atomic_t flush_pending;
305+
struct work_struct barrier_work;
295306
};
296307

297308

@@ -432,6 +443,7 @@ extern void md_done_sync(mddev_t *mddev, int blocks, int ok);
432443
extern void md_error(mddev_t *mddev, mdk_rdev_t *rdev);
433444

434445
extern int mddev_congested(mddev_t *mddev, int bits);
446+
extern void md_barrier_request(mddev_t *mddev, struct bio *bio);
435447
extern void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
436448
sector_t sector, int size, struct page *page);
437449
extern void md_super_wait(mddev_t *mddev);

drivers/md/multipath.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ static int multipath_make_request (struct request_queue *q, struct bio * bio)
145145
int cpu;
146146

147147
if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
148-
bio_endio(bio, -EOPNOTSUPP);
148+
md_barrier_request(mddev, bio);
149149
return 0;
150150
}
151151

drivers/md/raid0.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -453,7 +453,7 @@ static int raid0_make_request(struct request_queue *q, struct bio *bio)
453453
int cpu;
454454

455455
if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
456-
bio_endio(bio, -EOPNOTSUPP);
456+
md_barrier_request(mddev, bio);
457457
return 0;
458458
}
459459

drivers/md/raid10.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -804,7 +804,7 @@ static int make_request(struct request_queue *q, struct bio * bio)
804804
mdk_rdev_t *blocked_rdev;
805805

806806
if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
807-
bio_endio(bio, -EOPNOTSUPP);
807+
md_barrier_request(mddev, bio);
808808
return 0;
809809
}
810810

drivers/md/raid5.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3865,7 +3865,13 @@ static int make_request(struct request_queue *q, struct bio * bi)
38653865
int cpu, remaining;
38663866

38673867
if (unlikely(bio_rw_flagged(bi, BIO_RW_BARRIER))) {
3868-
bio_endio(bi, -EOPNOTSUPP);
3868+
/* Drain all pending writes. We only really need
3869+
* to ensure they have been submitted, but this is
3870+
* easier.
3871+
*/
3872+
mddev->pers->quiesce(mddev, 1);
3873+
mddev->pers->quiesce(mddev, 0);
3874+
md_barrier_request(mddev, bi);
38693875
return 0;
38703876
}
38713877

0 commit comments

Comments
 (0)