@@ -518,14 +518,33 @@ static void dm_io_acct(bool end, struct mapped_device *md, struct bio *bio,
518518 bio -> bi_iter .bi_size = bi_size ;
519519}
520520
521- static void dm_start_io_acct (struct dm_io * io )
521+ static void __dm_start_io_acct (struct dm_io * io , struct bio * bio )
522522{
523- dm_io_acct (false, io -> md , io -> orig_bio , io -> start_time , & io -> stats_aux );
523+ dm_io_acct (false, io -> md , bio , io -> start_time , & io -> stats_aux );
524524}
525525
526- static void dm_end_io_acct (struct dm_io * io )
526+ static void dm_start_io_acct (struct dm_io * io , struct bio * clone )
527527{
528- dm_io_acct (true, io -> md , io -> orig_bio , io -> start_time , & io -> stats_aux );
528+ /* Must account IO to DM device in terms of orig_bio */
529+ struct bio * bio = io -> orig_bio ;
530+
531+ /*
532+ * Ensure IO accounting is only ever started once.
533+ * Expect no possibility for race unless is_duplicate_bio.
534+ */
535+ if (!clone || likely (!clone_to_tio (clone )-> is_duplicate_bio )) {
536+ if (WARN_ON (io -> was_accounted ))
537+ return ;
538+ io -> was_accounted = 1 ;
539+ } else if (xchg (& io -> was_accounted , 1 ) == 1 )
540+ return ;
541+
542+ __dm_start_io_acct (io , bio );
543+ }
544+
545+ static void dm_end_io_acct (struct dm_io * io , struct bio * bio )
546+ {
547+ dm_io_acct (true, io -> md , bio , io -> start_time , & io -> stats_aux );
529548}
530549
531550static struct dm_io * alloc_io (struct mapped_device * md , struct bio * bio )
@@ -545,11 +564,13 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio)
545564 io -> status = 0 ;
546565 atomic_set (& io -> io_count , 1 );
547566 this_cpu_inc (* md -> pending_io );
548- io -> orig_bio = bio ;
567+ io -> orig_bio = NULL ;
549568 io -> md = md ;
550569 spin_lock_init (& io -> endio_lock );
551570
552571 io -> start_time = jiffies ;
572+ io -> start_io_acct = false;
573+ io -> was_accounted = 0 ;
553574
554575 dm_stats_record_start (& md -> stats , & io -> stats_aux );
555576
@@ -849,7 +870,16 @@ void dm_io_dec_pending(struct dm_io *io, blk_status_t error)
849870 }
850871
851872 io_error = io -> status ;
852- dm_end_io_acct (io );
873+ if (io -> was_accounted )
874+ dm_end_io_acct (io , bio );
875+ else if (!io_error ) {
876+ /*
877+ * Must handle target that DM_MAPIO_SUBMITTED only to
878+ * then bio_endio() rather than dm_submit_bio_remap()
879+ */
880+ __dm_start_io_acct (io , bio );
881+ dm_end_io_acct (io , bio );
882+ }
853883 free_io (io );
854884 smp_wmb ();
855885 this_cpu_dec (* md -> pending_io );
@@ -1131,6 +1161,56 @@ void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors)
11311161}
11321162EXPORT_SYMBOL_GPL (dm_accept_partial_bio );
11331163
1164+ static inline void __dm_submit_bio_remap (struct bio * clone ,
1165+ dev_t dev , sector_t old_sector )
1166+ {
1167+ trace_block_bio_remap (clone , dev , old_sector );
1168+ submit_bio_noacct (clone );
1169+ }
1170+
1171+ /*
1172+ * @clone: clone bio that DM core passed to target's .map function
1173+ * @tgt_clone: clone of @clone bio that target needs submitted
1174+ * @from_wq: caller is a workqueue thread managed by DM target
1175+ *
1176+ * Targets should use this interface to submit bios they take
1177+ * ownership of when returning DM_MAPIO_SUBMITTED.
1178+ *
1179+ * Target should also enable ti->accounts_remapped_io
1180+ */
1181+ void dm_submit_bio_remap (struct bio * clone , struct bio * tgt_clone ,
1182+ bool from_wq )
1183+ {
1184+ struct dm_target_io * tio = clone_to_tio (clone );
1185+ struct dm_io * io = tio -> io ;
1186+
1187+ /* establish bio that will get submitted */
1188+ if (!tgt_clone )
1189+ tgt_clone = clone ;
1190+
1191+ /*
1192+ * Account io->origin_bio to DM dev on behalf of target
1193+ * that took ownership of IO with DM_MAPIO_SUBMITTED.
1194+ */
1195+ if (!from_wq ) {
1196+ /* Still in target's map function */
1197+ io -> start_io_acct = true;
1198+ } else {
1199+ /*
1200+ * Called by another thread, managed by DM target,
1201+ * wait for dm_split_and_process_bio() to store
1202+ * io->orig_bio
1203+ */
1204+ while (unlikely (!smp_load_acquire (& io -> orig_bio )))
1205+ msleep (1 );
1206+ dm_start_io_acct (io , clone );
1207+ }
1208+
1209+ __dm_submit_bio_remap (tgt_clone , disk_devt (io -> md -> disk ),
1210+ tio -> old_sector );
1211+ }
1212+ EXPORT_SYMBOL_GPL (dm_submit_bio_remap );
1213+
11341214static noinline void __set_swap_bios_limit (struct mapped_device * md , int latch )
11351215{
11361216 mutex_lock (& md -> swap_bios_lock );
@@ -1157,9 +1237,7 @@ static void __map_bio(struct bio *clone)
11571237 clone -> bi_end_io = clone_endio ;
11581238
11591239 /*
1160- * Map the clone. If r == 0 we don't need to do
1161- * anything, the target has assumed ownership of
1162- * this io.
1240+ * Map the clone.
11631241 */
11641242 dm_io_inc_pending (io );
11651243 tio -> old_sector = clone -> bi_iter .bi_sector ;
@@ -1184,12 +1262,18 @@ static void __map_bio(struct bio *clone)
11841262
11851263 switch (r ) {
11861264 case DM_MAPIO_SUBMITTED :
1265+ /* target has assumed ownership of this io */
1266+ if (!ti -> accounts_remapped_io )
1267+ io -> start_io_acct = true;
11871268 break ;
11881269 case DM_MAPIO_REMAPPED :
1189- /* the bio has been remapped so dispatch it */
1190- trace_block_bio_remap (clone , bio_dev (io -> orig_bio ),
1270+ /*
1271+ * the bio has been remapped so dispatch it, but defer
1272+ * dm_start_io_acct() until after possible bio_split().
1273+ */
1274+ __dm_submit_bio_remap (clone , disk_devt (io -> md -> disk ),
11911275 tio -> old_sector );
1192- submit_bio_noacct ( clone ) ;
1276+ io -> start_io_acct = true ;
11931277 break ;
11941278 case DM_MAPIO_KILL :
11951279 case DM_MAPIO_REQUEUE :
@@ -1404,7 +1488,7 @@ static void dm_split_and_process_bio(struct mapped_device *md,
14041488 struct dm_table * map , struct bio * bio )
14051489{
14061490 struct clone_info ci ;
1407- struct bio * b ;
1491+ struct bio * orig_bio = NULL ;
14081492 int error = 0 ;
14091493
14101494 init_clone_info (& ci , md , map , bio );
@@ -1426,15 +1510,18 @@ static void dm_split_and_process_bio(struct mapped_device *md,
14261510 * used by dm_end_io_acct() and for dm_io_dec_pending() to use for
14271511 * completion handling.
14281512 */
1429- b = bio_split (bio , bio_sectors (bio ) - ci .sector_count ,
1430- GFP_NOIO , & md -> queue -> bio_split );
1431- ci .io -> orig_bio = b ;
1432-
1433- bio_chain (b , bio );
1434- trace_block_split (b , bio -> bi_iter .bi_sector );
1513+ orig_bio = bio_split (bio , bio_sectors (bio ) - ci .sector_count ,
1514+ GFP_NOIO , & md -> queue -> bio_split );
1515+ bio_chain (orig_bio , bio );
1516+ trace_block_split (orig_bio , bio -> bi_iter .bi_sector );
14351517 submit_bio_noacct (bio );
14361518out :
1437- dm_start_io_acct (ci .io );
1519+ if (!orig_bio )
1520+ orig_bio = bio ;
1521+ smp_store_release (& ci .io -> orig_bio , orig_bio );
1522+ if (ci .io -> start_io_acct )
1523+ dm_start_io_acct (ci .io , NULL );
1524+
14381525 /* drop the extra reference count */
14391526 dm_io_dec_pending (ci .io , errno_to_blk_status (error ));
14401527}
0 commit comments