Skip to content

Commit 0fbb4d9

Browse files
committed
dm: add dm_submit_bio_remap interface
Where possible, switch from early bio-based IO accounting (at the time DM clones each incoming bio) to late IO accounting just before each remapped bio is issued to underlying device via submit_bio_noacct(). Allows more precise bio-based IO accounting for DM targets that use their own workqueues to perform additional processing of each bio in conjunction with their DM_MAPIO_SUBMITTED return from their map function. When a target is updated to use dm_submit_bio_remap() they must also set ti->accounts_remapped_io to true. Use xchg() in start_io_acct(), as suggested by Mikulas, to ensure each IO is only started once. The xchg race only happens if __send_duplicate_bios() sends multiple bios -- that case is reflected via tio->is_duplicate_bio. Given the niche nature of this race, it is best to avoid any xchg performance penalty for normal IO. For IO that was never submitted with dm_bio_submit_remap(), but the target completes the clone with bio_endio, accounting is started then ended and pending_io counter decremented. Reviewed-by: Mikulas Patocka <[email protected]> Signed-off-by: Mike Snitzer <[email protected]>
1 parent e6fc9f6 commit 0fbb4d9

File tree

4 files changed

+118
-22
lines changed

4 files changed

+118
-22
lines changed

drivers/md/dm-core.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,8 @@ struct dm_io {
232232
struct mapped_device *md;
233233
struct bio *orig_bio;
234234
blk_status_t status;
235+
bool start_io_acct:1;
236+
int was_accounted;
235237
unsigned long start_time;
236238
spinlock_t endio_lock;
237239
struct dm_stats_aux stats_aux;

drivers/md/dm.c

Lines changed: 107 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -518,14 +518,33 @@ static void dm_io_acct(bool end, struct mapped_device *md, struct bio *bio,
518518
bio->bi_iter.bi_size = bi_size;
519519
}
520520

521-
static void dm_start_io_acct(struct dm_io *io)
521+
static void __dm_start_io_acct(struct dm_io *io, struct bio *bio)
522522
{
523-
dm_io_acct(false, io->md, io->orig_bio, io->start_time, &io->stats_aux);
523+
dm_io_acct(false, io->md, bio, io->start_time, &io->stats_aux);
524524
}
525525

526-
static void dm_end_io_acct(struct dm_io *io)
526+
static void dm_start_io_acct(struct dm_io *io, struct bio *clone)
527527
{
528-
dm_io_acct(true, io->md, io->orig_bio, io->start_time, &io->stats_aux);
528+
/* Must account IO to DM device in terms of orig_bio */
529+
struct bio *bio = io->orig_bio;
530+
531+
/*
532+
* Ensure IO accounting is only ever started once.
533+
* Expect no possibility for race unless is_duplicate_bio.
534+
*/
535+
if (!clone || likely(!clone_to_tio(clone)->is_duplicate_bio)) {
536+
if (WARN_ON(io->was_accounted))
537+
return;
538+
io->was_accounted = 1;
539+
} else if (xchg(&io->was_accounted, 1) == 1)
540+
return;
541+
542+
__dm_start_io_acct(io, bio);
543+
}
544+
545+
static void dm_end_io_acct(struct dm_io *io, struct bio *bio)
546+
{
547+
dm_io_acct(true, io->md, bio, io->start_time, &io->stats_aux);
529548
}
530549

531550
static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio)
@@ -545,11 +564,13 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio)
545564
io->status = 0;
546565
atomic_set(&io->io_count, 1);
547566
this_cpu_inc(*md->pending_io);
548-
io->orig_bio = bio;
567+
io->orig_bio = NULL;
549568
io->md = md;
550569
spin_lock_init(&io->endio_lock);
551570

552571
io->start_time = jiffies;
572+
io->start_io_acct = false;
573+
io->was_accounted = 0;
553574

554575
dm_stats_record_start(&md->stats, &io->stats_aux);
555576

@@ -849,7 +870,16 @@ void dm_io_dec_pending(struct dm_io *io, blk_status_t error)
849870
}
850871

851872
io_error = io->status;
852-
dm_end_io_acct(io);
873+
if (io->was_accounted)
874+
dm_end_io_acct(io, bio);
875+
else if (!io_error) {
876+
/*
877+
* Must handle target that DM_MAPIO_SUBMITTED only to
878+
* then bio_endio() rather than dm_submit_bio_remap()
879+
*/
880+
__dm_start_io_acct(io, bio);
881+
dm_end_io_acct(io, bio);
882+
}
853883
free_io(io);
854884
smp_wmb();
855885
this_cpu_dec(*md->pending_io);
@@ -1131,6 +1161,56 @@ void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors)
11311161
}
11321162
EXPORT_SYMBOL_GPL(dm_accept_partial_bio);
11331163

1164+
static inline void __dm_submit_bio_remap(struct bio *clone,
1165+
dev_t dev, sector_t old_sector)
1166+
{
1167+
trace_block_bio_remap(clone, dev, old_sector);
1168+
submit_bio_noacct(clone);
1169+
}
1170+
1171+
/*
1172+
* @clone: clone bio that DM core passed to target's .map function
1173+
* @tgt_clone: clone of @clone bio that target needs submitted
1174+
* @from_wq: caller is a workqueue thread managed by DM target
1175+
*
1176+
* Targets should use this interface to submit bios they take
1177+
* ownership of when returning DM_MAPIO_SUBMITTED.
1178+
*
1179+
* Target should also enable ti->accounts_remapped_io
1180+
*/
1181+
void dm_submit_bio_remap(struct bio *clone, struct bio *tgt_clone,
1182+
bool from_wq)
1183+
{
1184+
struct dm_target_io *tio = clone_to_tio(clone);
1185+
struct dm_io *io = tio->io;
1186+
1187+
/* establish bio that will get submitted */
1188+
if (!tgt_clone)
1189+
tgt_clone = clone;
1190+
1191+
/*
1192+
* Account io->origin_bio to DM dev on behalf of target
1193+
* that took ownership of IO with DM_MAPIO_SUBMITTED.
1194+
*/
1195+
if (!from_wq) {
1196+
/* Still in target's map function */
1197+
io->start_io_acct = true;
1198+
} else {
1199+
/*
1200+
* Called by another thread, managed by DM target,
1201+
* wait for dm_split_and_process_bio() to store
1202+
* io->orig_bio
1203+
*/
1204+
while (unlikely(!smp_load_acquire(&io->orig_bio)))
1205+
msleep(1);
1206+
dm_start_io_acct(io, clone);
1207+
}
1208+
1209+
__dm_submit_bio_remap(tgt_clone, disk_devt(io->md->disk),
1210+
tio->old_sector);
1211+
}
1212+
EXPORT_SYMBOL_GPL(dm_submit_bio_remap);
1213+
11341214
static noinline void __set_swap_bios_limit(struct mapped_device *md, int latch)
11351215
{
11361216
mutex_lock(&md->swap_bios_lock);
@@ -1157,9 +1237,7 @@ static void __map_bio(struct bio *clone)
11571237
clone->bi_end_io = clone_endio;
11581238

11591239
/*
1160-
* Map the clone. If r == 0 we don't need to do
1161-
* anything, the target has assumed ownership of
1162-
* this io.
1240+
* Map the clone.
11631241
*/
11641242
dm_io_inc_pending(io);
11651243
tio->old_sector = clone->bi_iter.bi_sector;
@@ -1184,12 +1262,18 @@ static void __map_bio(struct bio *clone)
11841262

11851263
switch (r) {
11861264
case DM_MAPIO_SUBMITTED:
1265+
/* target has assumed ownership of this io */
1266+
if (!ti->accounts_remapped_io)
1267+
io->start_io_acct = true;
11871268
break;
11881269
case DM_MAPIO_REMAPPED:
1189-
/* the bio has been remapped so dispatch it */
1190-
trace_block_bio_remap(clone, bio_dev(io->orig_bio),
1270+
/*
1271+
* the bio has been remapped so dispatch it, but defer
1272+
* dm_start_io_acct() until after possible bio_split().
1273+
*/
1274+
__dm_submit_bio_remap(clone, disk_devt(io->md->disk),
11911275
tio->old_sector);
1192-
submit_bio_noacct(clone);
1276+
io->start_io_acct = true;
11931277
break;
11941278
case DM_MAPIO_KILL:
11951279
case DM_MAPIO_REQUEUE:
@@ -1404,7 +1488,7 @@ static void dm_split_and_process_bio(struct mapped_device *md,
14041488
struct dm_table *map, struct bio *bio)
14051489
{
14061490
struct clone_info ci;
1407-
struct bio *b;
1491+
struct bio *orig_bio = NULL;
14081492
int error = 0;
14091493

14101494
init_clone_info(&ci, md, map, bio);
@@ -1426,15 +1510,18 @@ static void dm_split_and_process_bio(struct mapped_device *md,
14261510
* used by dm_end_io_acct() and for dm_io_dec_pending() to use for
14271511
* completion handling.
14281512
*/
1429-
b = bio_split(bio, bio_sectors(bio) - ci.sector_count,
1430-
GFP_NOIO, &md->queue->bio_split);
1431-
ci.io->orig_bio = b;
1432-
1433-
bio_chain(b, bio);
1434-
trace_block_split(b, bio->bi_iter.bi_sector);
1513+
orig_bio = bio_split(bio, bio_sectors(bio) - ci.sector_count,
1514+
GFP_NOIO, &md->queue->bio_split);
1515+
bio_chain(orig_bio, bio);
1516+
trace_block_split(orig_bio, bio->bi_iter.bi_sector);
14351517
submit_bio_noacct(bio);
14361518
out:
1437-
dm_start_io_acct(ci.io);
1519+
if (!orig_bio)
1520+
orig_bio = bio;
1521+
smp_store_release(&ci.io->orig_bio, orig_bio);
1522+
if (ci.io->start_io_acct)
1523+
dm_start_io_acct(ci.io, NULL);
1524+
14381525
/* drop the extra reference count */
14391526
dm_io_dec_pending(ci.io, errno_to_blk_status(error));
14401527
}

include/linux/device-mapper.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -362,6 +362,12 @@ struct dm_target {
362362
* zone append operations using regular writes.
363363
*/
364364
bool emulate_zone_append:1;
365+
366+
/*
367+
* Set if the target will submit IO using dm_submit_bio_remap()
368+
* after returning DM_MAPIO_SUBMITTED from its map function.
369+
*/
370+
bool accounts_remapped_io:1;
365371
};
366372

367373
void *dm_per_bio_data(struct bio *bio, size_t data_size);
@@ -465,6 +471,7 @@ int dm_suspended(struct dm_target *ti);
465471
int dm_post_suspending(struct dm_target *ti);
466472
int dm_noflush_suspending(struct dm_target *ti);
467473
void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors);
474+
void dm_submit_bio_remap(struct bio *clone, struct bio *tgt_clone, bool from_wq);
468475
union map_info *dm_get_rq_mapinfo(struct request *rq);
469476

470477
#ifdef CONFIG_BLK_DEV_ZONED

include/uapi/linux/dm-ioctl.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -286,9 +286,9 @@ enum {
286286
#define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
287287

288288
#define DM_VERSION_MAJOR 4
289-
#define DM_VERSION_MINOR 45
289+
#define DM_VERSION_MINOR 46
290290
#define DM_VERSION_PATCHLEVEL 0
291-
#define DM_VERSION_EXTRA "-ioctl (2021-03-22)"
291+
#define DM_VERSION_EXTRA "-ioctl (2022-02-22)"
292292

293293
/* Status bits */
294294
#define DM_READONLY_FLAG (1 << 0) /* In/Out */

0 commit comments

Comments
 (0)