@@ -1075,7 +1075,7 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_data *bfqd,
10751075static int bfqq_process_refs (struct bfq_queue * bfqq )
10761076{
10771077 return bfqq -> ref - bfqq -> allocated - bfqq -> entity .on_st_or_in_serv -
1078- (bfqq -> weight_counter != NULL );
1078+ (bfqq -> weight_counter != NULL ) - bfqq -> stable_ref ;
10791079}
10801080
10811081/* Empty burst list and add just bfqq (see comments on bfq_handle_burst) */
@@ -2628,6 +2628,11 @@ static bool bfq_may_be_close_cooperator(struct bfq_queue *bfqq,
26282628 return true;
26292629}
26302630
2631+ static bool idling_boosts_thr_without_issues (struct bfq_data * bfqd ,
2632+ struct bfq_queue * bfqq );
2633+
2634+ static void bfq_put_stable_ref (struct bfq_queue * bfqq );
2635+
26312636/*
26322637 * Attempt to schedule a merge of bfqq with the currently in-service
26332638 * queue or with a close queue among the scheduled queues. Return
@@ -2650,10 +2655,49 @@ static bool bfq_may_be_close_cooperator(struct bfq_queue *bfqq,
26502655 */
26512656static struct bfq_queue *
26522657bfq_setup_cooperator (struct bfq_data * bfqd , struct bfq_queue * bfqq ,
2653- void * io_struct , bool request )
2658+ void * io_struct , bool request , struct bfq_io_cq * bic )
26542659{
26552660 struct bfq_queue * in_service_bfqq , * new_bfqq ;
26562661
2662+ /*
2663+ * Check delayed stable merge for rotational or non-queueing
2664+ * devs. For this branch to be executed, bfqq must not be
2665+ * currently merged with some other queue (i.e., bfqq->bic
2666+ * must be non null). If we considered also merged queues,
2667+ * then we should also check whether bfqq has already been
2668+ * merged with bic->stable_merge_bfqq. But this would be
2669+ * costly and complicated.
2670+ */
2671+ if (unlikely (!bfqd -> nonrot_with_queueing )) {
2672+ if (bic -> stable_merge_bfqq &&
2673+ !bfq_bfqq_just_created (bfqq ) &&
2674+ time_is_after_jiffies (bfqq -> split_time +
2675+ msecs_to_jiffies (200 ))) {
2676+ struct bfq_queue * stable_merge_bfqq =
2677+ bic -> stable_merge_bfqq ;
2678+ int proc_ref = min (bfqq_process_refs (bfqq ),
2679+ bfqq_process_refs (stable_merge_bfqq ));
2680+
2681+ /* deschedule stable merge, because done or aborted here */
2682+ bfq_put_stable_ref (stable_merge_bfqq );
2683+
2684+ bic -> stable_merge_bfqq = NULL ;
2685+
2686+ if (!idling_boosts_thr_without_issues (bfqd , bfqq ) &&
2687+ proc_ref > 0 ) {
2688+ /* next function will take at least one ref */
2689+ struct bfq_queue * new_bfqq =
2690+ bfq_setup_merge (bfqq , stable_merge_bfqq );
2691+
2692+ bic -> stably_merged = true;
2693+ if (new_bfqq && new_bfqq -> bic )
2694+ new_bfqq -> bic -> stably_merged = true;
2695+ return new_bfqq ;
2696+ } else
2697+ return NULL ;
2698+ }
2699+ }
2700+
26572701 /*
26582702 * Do not perform queue merging if the device is non
26592703 * rotational and performs internal queueing. In fact, such a
@@ -2795,6 +2839,17 @@ static void bfq_bfqq_save_state(struct bfq_queue *bfqq)
27952839 }
27962840}
27972841
2842+
2843+ static void
2844+ bfq_reassign_last_bfqq (struct bfq_queue * cur_bfqq , struct bfq_queue * new_bfqq )
2845+ {
2846+ if (cur_bfqq -> entity .parent &&
2847+ cur_bfqq -> entity .parent -> last_bfqq_created == cur_bfqq )
2848+ cur_bfqq -> entity .parent -> last_bfqq_created = new_bfqq ;
2849+ else if (cur_bfqq -> bfqd && cur_bfqq -> bfqd -> last_bfqq_created == cur_bfqq )
2850+ cur_bfqq -> bfqd -> last_bfqq_created = new_bfqq ;
2851+ }
2852+
27982853void bfq_release_process_ref (struct bfq_data * bfqd , struct bfq_queue * bfqq )
27992854{
28002855 /*
@@ -2812,6 +2867,8 @@ void bfq_release_process_ref(struct bfq_data *bfqd, struct bfq_queue *bfqq)
28122867 bfqq != bfqd -> in_service_queue )
28132868 bfq_del_bfqq_busy (bfqd , bfqq , false);
28142869
2870+ bfq_reassign_last_bfqq (bfqq , NULL );
2871+
28152872 bfq_put_queue (bfqq );
28162873}
28172874
@@ -2908,6 +2965,9 @@ bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,
29082965 */
29092966 new_bfqq -> pid = -1 ;
29102967 bfqq -> bic = NULL ;
2968+
2969+ bfq_reassign_last_bfqq (bfqq , new_bfqq );
2970+
29112971 bfq_release_process_ref (bfqd , bfqq );
29122972}
29132973
@@ -2935,7 +2995,7 @@ static bool bfq_allow_bio_merge(struct request_queue *q, struct request *rq,
29352995 * We take advantage of this function to perform an early merge
29362996 * of the queues of possible cooperating processes.
29372997 */
2938- new_bfqq = bfq_setup_cooperator (bfqd , bfqq , bio , false);
2998+ new_bfqq = bfq_setup_cooperator (bfqd , bfqq , bio , false, bfqd -> bio_bic );
29392999 if (new_bfqq ) {
29403000 /*
29413001 * bic still points to bfqq, then it has not yet been
@@ -5034,6 +5094,12 @@ void bfq_put_queue(struct bfq_queue *bfqq)
50345094 bfqg_and_blkg_put (bfqg );
50355095}
50365096
5097+ static void bfq_put_stable_ref (struct bfq_queue * bfqq )
5098+ {
5099+ bfqq -> stable_ref -- ;
5100+ bfq_put_queue (bfqq );
5101+ }
5102+
50375103static void bfq_put_cooperator (struct bfq_queue * bfqq )
50385104{
50395105 struct bfq_queue * __bfqq , * next ;
@@ -5090,6 +5156,24 @@ static void bfq_exit_icq(struct io_cq *icq)
50905156{
50915157 struct bfq_io_cq * bic = icq_to_bic (icq );
50925158
5159+ if (bic -> stable_merge_bfqq ) {
5160+ struct bfq_data * bfqd = bic -> stable_merge_bfqq -> bfqd ;
5161+
5162+ /*
5163+ * bfqd is NULL if scheduler already exited, and in
5164+ * that case this is the last time bfqq is accessed.
5165+ */
5166+ if (bfqd ) {
5167+ unsigned long flags ;
5168+
5169+ spin_lock_irqsave (& bfqd -> lock , flags );
5170+ bfq_put_stable_ref (bic -> stable_merge_bfqq );
5171+ spin_unlock_irqrestore (& bfqd -> lock , flags );
5172+ } else {
5173+ bfq_put_stable_ref (bic -> stable_merge_bfqq );
5174+ }
5175+ }
5176+
50935177 bfq_exit_icq_bfqq (bic , true);
50945178 bfq_exit_icq_bfqq (bic , false);
50955179}
@@ -5150,7 +5234,8 @@ bfq_set_next_ioprio_data(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
51505234
51515235static struct bfq_queue * bfq_get_queue (struct bfq_data * bfqd ,
51525236 struct bio * bio , bool is_sync ,
5153- struct bfq_io_cq * bic );
5237+ struct bfq_io_cq * bic ,
5238+ bool respawn );
51545239
51555240static void bfq_check_ioprio_change (struct bfq_io_cq * bic , struct bio * bio )
51565241{
@@ -5170,7 +5255,7 @@ static void bfq_check_ioprio_change(struct bfq_io_cq *bic, struct bio *bio)
51705255 bfqq = bic_to_bfqq (bic , false);
51715256 if (bfqq ) {
51725257 bfq_release_process_ref (bfqd , bfqq );
5173- bfqq = bfq_get_queue (bfqd , bio , BLK_RW_ASYNC , bic );
5258+ bfqq = bfq_get_queue (bfqd , bio , BLK_RW_ASYNC , bic , true );
51745259 bic_set_bfqq (bic , bfqq , false);
51755260 }
51765261
@@ -5213,6 +5298,8 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
52135298 /* set end request to minus infinity from now */
52145299 bfqq -> ttime .last_end_request = now_ns + 1 ;
52155300
5301+ bfqq -> creation_time = jiffies ;
5302+
52165303 bfqq -> io_start_time = now_ns ;
52175304
52185305 bfq_mark_bfqq_IO_bound (bfqq );
@@ -5262,9 +5349,156 @@ static struct bfq_queue **bfq_async_queue_prio(struct bfq_data *bfqd,
52625349 }
52635350}
52645351
5352+ static struct bfq_queue *
5353+ bfq_do_early_stable_merge (struct bfq_data * bfqd , struct bfq_queue * bfqq ,
5354+ struct bfq_io_cq * bic ,
5355+ struct bfq_queue * last_bfqq_created )
5356+ {
5357+ struct bfq_queue * new_bfqq =
5358+ bfq_setup_merge (bfqq , last_bfqq_created );
5359+
5360+ if (!new_bfqq )
5361+ return bfqq ;
5362+
5363+ if (new_bfqq -> bic )
5364+ new_bfqq -> bic -> stably_merged = true;
5365+ bic -> stably_merged = true;
5366+
5367+ /*
5368+ * Reusing merge functions. This implies that
5369+ * bfqq->bic must be set too, for
5370+ * bfq_merge_bfqqs to correctly save bfqq's
5371+ * state before killing it.
5372+ */
5373+ bfqq -> bic = bic ;
5374+ bfq_merge_bfqqs (bfqd , bic , bfqq , new_bfqq );
5375+
5376+ return new_bfqq ;
5377+ }
5378+
5379+ /*
5380+ * Many throughput-sensitive workloads are made of several parallel
5381+ * I/O flows, with all flows generated by the same application, or
5382+ * more generically by the same task (e.g., system boot). The most
5383+ * counterproductive action with these workloads is plugging I/O
5384+ * dispatch when one of the bfq_queues associated with these flows
5385+ * remains temporarily empty.
5386+ *
5387+ * To avoid this plugging, BFQ has been using a burst-handling
5388+ * mechanism for years now. This mechanism has proven effective for
5389+ * throughput, and not detrimental for service guarantees. The
5390+ * following function pushes this mechanism a little bit further,
5391+ * basing on the following two facts.
5392+ *
5393+ * First, all the I/O flows of a the same application or task
5394+ * contribute to the execution/completion of that common application
5395+ * or task. So the performance figures that matter are total
5396+ * throughput of the flows and task-wide I/O latency. In particular,
5397+ * these flows do not need to be protected from each other, in terms
5398+ * of individual bandwidth or latency.
5399+ *
5400+ * Second, the above fact holds regardless of the number of flows.
5401+ *
5402+ * Putting these two facts together, this commits merges stably the
5403+ * bfq_queues associated with these I/O flows, i.e., with the
5404+ * processes that generate these IO/ flows, regardless of how many the
5405+ * involved processes are.
5406+ *
5407+ * To decide whether a set of bfq_queues is actually associated with
5408+ * the I/O flows of a common application or task, and to merge these
5409+ * queues stably, this function operates as follows: given a bfq_queue,
5410+ * say Q2, currently being created, and the last bfq_queue, say Q1,
5411+ * created before Q2, Q2 is merged stably with Q1 if
5412+ * - very little time has elapsed since when Q1 was created
5413+ * - Q2 has the same ioprio as Q1
5414+ * - Q2 belongs to the same group as Q1
5415+ *
5416+ * Merging bfq_queues also reduces scheduling overhead. A fio test
5417+ * with ten random readers on /dev/nullb shows a throughput boost of
5418+ * 40%, with a quadcore. Since BFQ's execution time amounts to ~50% of
5419+ * the total per-request processing time, the above throughput boost
5420+ * implies that BFQ's overhead is reduced by more than 50%.
5421+ *
5422+ * This new mechanism most certainly obsoletes the current
5423+ * burst-handling heuristics. We keep those heuristics for the moment.
5424+ */
5425+ static struct bfq_queue * bfq_do_or_sched_stable_merge (struct bfq_data * bfqd ,
5426+ struct bfq_queue * bfqq ,
5427+ struct bfq_io_cq * bic )
5428+ {
5429+ struct bfq_queue * * source_bfqq = bfqq -> entity .parent ?
5430+ & bfqq -> entity .parent -> last_bfqq_created :
5431+ & bfqd -> last_bfqq_created ;
5432+
5433+ struct bfq_queue * last_bfqq_created = * source_bfqq ;
5434+
5435+ /*
5436+ * If last_bfqq_created has not been set yet, then init it. If
5437+ * it has been set already, but too long ago, then move it
5438+ * forward to bfqq. Finally, move also if bfqq belongs to a
5439+ * different group than last_bfqq_created, or if bfqq has a
5440+ * different ioprio or ioprio_class. If none of these
5441+ * conditions holds true, then try an early stable merge or
5442+ * schedule a delayed stable merge.
5443+ *
5444+ * A delayed merge is scheduled (instead of performing an
5445+ * early merge), in case bfqq might soon prove to be more
5446+ * throughput-beneficial if not merged. Currently this is
5447+ * possible only if bfqd is rotational with no queueing. For
5448+ * such a drive, not merging bfqq is better for throughput if
5449+ * bfqq happens to contain sequential I/O. So, we wait a
5450+ * little bit for enough I/O to flow through bfqq. After that,
5451+ * if such an I/O is sequential, then the merge is
5452+ * canceled. Otherwise the merge is finally performed.
5453+ */
5454+ if (!last_bfqq_created ||
5455+ time_before (last_bfqq_created -> creation_time +
5456+ bfqd -> bfq_burst_interval ,
5457+ bfqq -> creation_time ) ||
5458+ bfqq -> entity .parent != last_bfqq_created -> entity .parent ||
5459+ bfqq -> ioprio != last_bfqq_created -> ioprio ||
5460+ bfqq -> ioprio_class != last_bfqq_created -> ioprio_class )
5461+ * source_bfqq = bfqq ;
5462+ else if (time_after_eq (last_bfqq_created -> creation_time +
5463+ bfqd -> bfq_burst_interval ,
5464+ bfqq -> creation_time )) {
5465+ if (likely (bfqd -> nonrot_with_queueing ))
5466+ /*
5467+ * With this type of drive, leaving
5468+ * bfqq alone may provide no
5469+ * throughput benefits compared with
5470+ * merging bfqq. So merge bfqq now.
5471+ */
5472+ bfqq = bfq_do_early_stable_merge (bfqd , bfqq ,
5473+ bic ,
5474+ last_bfqq_created );
5475+ else { /* schedule tentative stable merge */
5476+ /*
5477+ * get reference on last_bfqq_created,
5478+ * to prevent it from being freed,
5479+ * until we decide whether to merge
5480+ */
5481+ last_bfqq_created -> ref ++ ;
5482+ /*
5483+ * need to keep track of stable refs, to
5484+ * compute process refs correctly
5485+ */
5486+ last_bfqq_created -> stable_ref ++ ;
5487+ /*
5488+ * Record the bfqq to merge to.
5489+ */
5490+ bic -> stable_merge_bfqq = last_bfqq_created ;
5491+ }
5492+ }
5493+
5494+ return bfqq ;
5495+ }
5496+
5497+
52655498static struct bfq_queue * bfq_get_queue (struct bfq_data * bfqd ,
52665499 struct bio * bio , bool is_sync ,
5267- struct bfq_io_cq * bic )
5500+ struct bfq_io_cq * bic ,
5501+ bool respawn )
52685502{
52695503 const int ioprio = IOPRIO_PRIO_DATA (bic -> ioprio );
52705504 const int ioprio_class = IOPRIO_PRIO_CLASS (bic -> ioprio );
@@ -5322,7 +5556,10 @@ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,
53225556
53235557out :
53245558 bfqq -> ref ++ ; /* get a process reference to this queue */
5325- bfq_log_bfqq (bfqd , bfqq , "get_queue, at end: %p, %d" , bfqq , bfqq -> ref );
5559+
5560+ if (bfqq != & bfqd -> oom_bfqq && is_sync && !respawn )
5561+ bfqq = bfq_do_or_sched_stable_merge (bfqd , bfqq , bic );
5562+
53265563 rcu_read_unlock ();
53275564 return bfqq ;
53285565}
@@ -5572,7 +5809,8 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,
55725809static bool __bfq_insert_request (struct bfq_data * bfqd , struct request * rq )
55735810{
55745811 struct bfq_queue * bfqq = RQ_BFQQ (rq ),
5575- * new_bfqq = bfq_setup_cooperator (bfqd , bfqq , rq , true);
5812+ * new_bfqq = bfq_setup_cooperator (bfqd , bfqq , rq , true,
5813+ RQ_BIC (rq ));
55765814 bool waiting , idle_timer_disabled = false;
55775815
55785816 if (new_bfqq ) {
@@ -6227,7 +6465,7 @@ static struct bfq_queue *bfq_get_bfqq_handle_split(struct bfq_data *bfqd,
62276465
62286466 if (bfqq )
62296467 bfq_put_queue (bfqq );
6230- bfqq = bfq_get_queue (bfqd , bio , is_sync , bic );
6468+ bfqq = bfq_get_queue (bfqd , bio , is_sync , bic , split );
62316469
62326470 bic_set_bfqq (bic , bfqq , is_sync );
62336471 if (split && is_sync ) {
@@ -6348,7 +6586,8 @@ static struct bfq_queue *bfq_init_rq(struct request *rq)
63486586
63496587 if (likely (!new_queue )) {
63506588 /* If the queue was seeky for too long, break it apart. */
6351- if (bfq_bfqq_coop (bfqq ) && bfq_bfqq_split_coop (bfqq )) {
6589+ if (bfq_bfqq_coop (bfqq ) && bfq_bfqq_split_coop (bfqq ) &&
6590+ !bic -> stably_merged ) {
63526591 struct bfq_queue * old_bfqq = bfqq ;
63536592
63546593 /* Update bic before losing reference to bfqq */
0 commit comments