@@ -226,6 +226,8 @@ enum {
226226	MARGIN_TARGET_PCT 	=  50 ,
227227	MARGIN_MAX_PCT 		=  100 ,
228228
229+ 	INUSE_ADJ_STEP_PCT 	=  25 ,
230+ 
229231	/* Have some play in timer operations */ 
230232	TIMER_SLACK_PCT 		=  1 ,
231233
@@ -443,12 +445,17 @@ struct ioc_gq {
443445	 * 
444446	 * `last_inuse` remembers `inuse` while an iocg is idle to persist 
445447	 * surplus adjustments. 
448+ 	 * 
449+ 	 * `inuse` may be adjusted dynamically during period. `saved_*` are used 
450+ 	 * to determine and track adjustments. 
446451	 */ 
447452	u32 				cfg_weight ;
448453	u32 				weight ;
449454	u32 				active ;
450455	u32 				inuse ;
456+ 
451457	u32 				last_inuse ;
458+ 	s64 				saved_margin ;
452459
453460	sector_t 			cursor ;		/* to detect randio */ 
454461
@@ -934,9 +941,11 @@ static void ioc_start_period(struct ioc *ioc, struct ioc_now *now)
934941
935942/* 
936943 * Update @iocg's `active` and `inuse` to @active and @inuse, update level 
937-  * weight sums and propagate upwards accordingly. 
944+  * weight sums and propagate upwards accordingly. If @save, the current margin 
945+  * is saved to be used as reference for later inuse in-period adjustments. 
938946 */ 
939- static  void  __propagate_weights (struct  ioc_gq  * iocg , u32  active , u32  inuse )
947+ static  void  __propagate_weights (struct  ioc_gq  * iocg , u32  active , u32  inuse ,
948+ 				bool  save , struct  ioc_now  * now )
940949{
941950	struct  ioc  * ioc  =  iocg -> ioc ;
942951	int  lvl ;
@@ -945,6 +954,10 @@ static void __propagate_weights(struct ioc_gq *iocg, u32 active, u32 inuse)
945954
946955	inuse  =  clamp_t (u32 , inuse , 1 , active );
947956
957+ 	iocg -> last_inuse  =  iocg -> inuse ;
958+ 	if  (save )
959+ 		iocg -> saved_margin  =  now -> vnow  -  atomic64_read (& iocg -> vtime );
960+ 
948961	if  (active  ==  iocg -> active  &&  inuse  ==  iocg -> inuse )
949962		return ;
950963
@@ -996,9 +1009,10 @@ static void commit_weights(struct ioc *ioc)
9961009	}
9971010}
9981011
999- static  void  propagate_weights (struct  ioc_gq  * iocg , u32  active , u32  inuse )
1012+ static  void  propagate_weights (struct  ioc_gq  * iocg , u32  active , u32  inuse ,
1013+ 			      bool  save , struct  ioc_now  * now )
10001014{
1001- 	__propagate_weights (iocg , active , inuse );
1015+ 	__propagate_weights (iocg , active , inuse ,  save ,  now );
10021016	commit_weights (iocg -> ioc );
10031017}
10041018
@@ -1082,7 +1096,7 @@ static u32 current_hweight_max(struct ioc_gq *iocg)
10821096	return  max_t (u32 , hwm , 1 );
10831097}
10841098
1085- static  void  weight_updated (struct  ioc_gq  * iocg )
1099+ static  void  weight_updated (struct  ioc_gq  * iocg ,  struct   ioc_now   * now )
10861100{
10871101	struct  ioc  * ioc  =  iocg -> ioc ;
10881102	struct  blkcg_gq  * blkg  =  iocg_to_blkg (iocg );
@@ -1093,9 +1107,7 @@ static void weight_updated(struct ioc_gq *iocg)
10931107
10941108	weight  =  iocg -> cfg_weight  ?: iocc -> dfl_weight ;
10951109	if  (weight  !=  iocg -> weight  &&  iocg -> active )
1096- 		propagate_weights (iocg , weight ,
1097- 				  DIV64_U64_ROUND_UP ((u64 )iocg -> inuse  *  weight ,
1098- 						     iocg -> weight ));
1110+ 		propagate_weights (iocg , weight , iocg -> inuse , true, now );
10991111	iocg -> weight  =  weight ;
11001112}
11011113
@@ -1165,8 +1177,9 @@ static bool iocg_activate(struct ioc_gq *iocg, struct ioc_now *now)
11651177	 */ 
11661178	iocg -> hweight_gen  =  atomic_read (& ioc -> hweight_gen ) -  1 ;
11671179	list_add (& iocg -> active_list , & ioc -> active_iocgs );
1180+ 
11681181	propagate_weights (iocg , iocg -> weight ,
1169- 			  iocg -> last_inuse  ?: iocg -> weight );
1182+ 			  iocg -> last_inuse  ?: iocg -> weight , true,  now );
11701183
11711184	TRACE_IOCG_PATH (iocg_activate , iocg , now ,
11721185			last_period , cur_period , vtime );
@@ -1789,7 +1802,7 @@ static void transfer_surpluses(struct list_head *surpluses, struct ioc_now *now)
17891802		inuse  =  DIV64_U64_ROUND_UP (
17901803			parent -> child_adjusted_sum  *  iocg -> hweight_after_donation ,
17911804			parent -> hweight_inuse );
1792- 		__propagate_weights (iocg , iocg -> active , inuse );
1805+ 		__propagate_weights (iocg , iocg -> active , inuse , true,  now );
17931806	}
17941807
17951808	/* walk list should be dissolved after use */ 
@@ -1844,8 +1857,7 @@ static void ioc_timer_fn(struct timer_list *timer)
18441857			iocg_kick_waitq (iocg , true, & now );
18451858		} else  if  (iocg_is_idle (iocg )) {
18461859			/* no waiter and idle, deactivate */ 
1847- 			iocg -> last_inuse  =  iocg -> inuse ;
1848- 			__propagate_weights (iocg , 0 , 0 );
1860+ 			__propagate_weights (iocg , 0 , 0 , false, & now );
18491861			list_del_init (& iocg -> active_list );
18501862		}
18511863
@@ -1925,7 +1937,7 @@ static void ioc_timer_fn(struct timer_list *timer)
19251937				list_add (& iocg -> surplus_list , & surpluses );
19261938			} else  {
19271939				__propagate_weights (iocg , iocg -> active ,
1928- 						    iocg -> active );
1940+ 						    iocg -> active , true,  & now );
19291941				nr_shortages ++ ;
19301942			}
19311943		} else  {
@@ -2055,6 +2067,50 @@ static void ioc_timer_fn(struct timer_list *timer)
20552067	spin_unlock_irq (& ioc -> lock );
20562068}
20572069
2070+ static  u64  adjust_inuse_and_calc_cost (struct  ioc_gq  * iocg , u64  vtime ,
2071+ 				      u64  abs_cost , struct  ioc_now  * now )
2072+ {
2073+ 	struct  ioc  * ioc  =  iocg -> ioc ;
2074+ 	struct  ioc_margins  * margins  =  & ioc -> margins ;
2075+ 	u32  adj_step  =  DIV_ROUND_UP (iocg -> active  *  INUSE_ADJ_STEP_PCT , 100 );
2076+ 	u32  hwi ;
2077+ 	s64  margin ;
2078+ 	u64  cost , new_inuse ;
2079+ 
2080+ 	current_hweight (iocg , NULL , & hwi );
2081+ 	cost  =  abs_cost_to_cost (abs_cost , hwi );
2082+ 	margin  =  now -> vnow  -  vtime  -  cost ;
2083+ 
2084+ 	/* 
2085+ 	 * We only increase inuse during period and do so iff the margin has 
2086+ 	 * deteriorated since the previous adjustment. 
2087+ 	 */ 
2088+ 	if  (margin  >= iocg -> saved_margin  ||  margin  >= margins -> low  || 
2089+ 	    iocg -> inuse  ==  iocg -> active )
2090+ 		return  cost ;
2091+ 
2092+ 	spin_lock_irq (& ioc -> lock );
2093+ 
2094+ 	/* we own inuse only when @iocg is in the normal active state */ 
2095+ 	if  (list_empty (& iocg -> active_list )) {
2096+ 		spin_unlock_irq (& ioc -> lock );
2097+ 		return  cost ;
2098+ 	}
2099+ 
2100+ 	/* bump up inuse till @abs_cost fits in the existing budget */ 
2101+ 	new_inuse  =  iocg -> inuse ;
2102+ 	do  {
2103+ 		new_inuse  =  new_inuse  +  adj_step ;
2104+ 		propagate_weights (iocg , iocg -> active , new_inuse , true, now );
2105+ 		current_hweight (iocg , NULL , & hwi );
2106+ 		cost  =  abs_cost_to_cost (abs_cost , hwi );
2107+ 	} while  (time_after64 (vtime  +  cost , now -> vnow ) && 
2108+ 		 iocg -> inuse  !=  iocg -> active );
2109+ 
2110+ 	spin_unlock_irq (& ioc -> lock );
2111+ 	return  cost ;
2112+ }
2113+ 
20582114static  void  calc_vtime_cost_builtin (struct  bio  * bio , struct  ioc_gq  * iocg ,
20592115				    bool  is_merge , u64  * costp )
20602116{
@@ -2136,7 +2192,6 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio)
21362192	struct  ioc_gq  * iocg  =  blkg_to_iocg (blkg );
21372193	struct  ioc_now  now ;
21382194	struct  iocg_wait  wait ;
2139- 	u32  hw_active , hw_inuse ;
21402195	u64  abs_cost , cost , vtime ;
21412196	bool  use_debt , ioc_locked ;
21422197	unsigned long  flags ;
@@ -2154,21 +2209,8 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio)
21542209		return ;
21552210
21562211	iocg -> cursor  =  bio_end_sector (bio );
2157- 
21582212	vtime  =  atomic64_read (& iocg -> vtime );
2159- 	current_hweight (iocg , & hw_active , & hw_inuse );
2160- 
2161- 	if  (hw_inuse  <  hw_active  && 
2162- 	    time_after_eq64 (vtime  +  ioc -> margins .min , now .vnow )) {
2163- 		TRACE_IOCG_PATH (inuse_reset , iocg , & now ,
2164- 				iocg -> inuse , iocg -> weight , hw_inuse , hw_active );
2165- 		spin_lock_irq (& ioc -> lock );
2166- 		propagate_weights (iocg , iocg -> weight , iocg -> weight );
2167- 		spin_unlock_irq (& ioc -> lock );
2168- 		current_hweight (iocg , & hw_active , & hw_inuse );
2169- 	}
2170- 
2171- 	cost  =  abs_cost_to_cost (abs_cost , hw_inuse );
2213+ 	cost  =  adjust_inuse_and_calc_cost (iocg , vtime , abs_cost , & now );
21722214
21732215	/* 
21742216	 * If no one's waiting and within budget, issue right away.  The 
@@ -2190,7 +2232,7 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio)
21902232	 */ 
21912233	use_debt  =  bio_issue_as_root_blkg (bio ) ||  fatal_signal_pending (current );
21922234	ioc_locked  =  use_debt  ||  READ_ONCE (iocg -> abs_vdebt );
2193- 
2235+ retry_lock : 
21942236	iocg_lock (iocg , ioc_locked , & flags );
21952237
21962238	/* 
@@ -2232,6 +2274,17 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio)
22322274		return ;
22332275	}
22342276
2277+ 	/* guarantee that iocgs w/ waiters have maximum inuse */ 
2278+ 	if  (iocg -> inuse  !=  iocg -> active ) {
2279+ 		if  (!ioc_locked ) {
2280+ 			iocg_unlock (iocg , false, & flags );
2281+ 			ioc_locked  =  true;
2282+ 			goto retry_lock ;
2283+ 		}
2284+ 		propagate_weights (iocg , iocg -> active , iocg -> active , true,
2285+ 				  & now );
2286+ 	}
2287+ 
22352288	/* 
22362289	 * Append self to the waitq and schedule the wakeup timer if we're 
22372290	 * the first waiter.  The timer duration is calculated based on the 
@@ -2274,8 +2327,7 @@ static void ioc_rqos_merge(struct rq_qos *rqos, struct request *rq,
22742327	struct  ioc  * ioc  =  iocg -> ioc ;
22752328	sector_t  bio_end  =  bio_end_sector (bio );
22762329	struct  ioc_now  now ;
2277- 	u32  hw_inuse ;
2278- 	u64  abs_cost , cost ;
2330+ 	u64  vtime , abs_cost , cost ;
22792331	unsigned long  flags ;
22802332
22812333	/* bypass if disabled or for root cgroup */ 
@@ -2287,8 +2339,9 @@ static void ioc_rqos_merge(struct rq_qos *rqos, struct request *rq,
22872339		return ;
22882340
22892341	ioc_now (ioc , & now );
2290- 	current_hweight (iocg , NULL , & hw_inuse );
2291- 	cost  =  abs_cost_to_cost (abs_cost , hw_inuse );
2342+ 
2343+ 	vtime  =  atomic64_read (& iocg -> vtime );
2344+ 	cost  =  adjust_inuse_and_calc_cost (iocg , vtime , abs_cost , & now );
22922345
22932346	/* update cursor if backmerging into the request at the cursor */ 
22942347	if  (blk_rq_pos (rq ) <  bio_end  && 
@@ -2530,7 +2583,7 @@ static void ioc_pd_init(struct blkg_policy_data *pd)
25302583	}
25312584
25322585	spin_lock_irqsave (& ioc -> lock , flags );
2533- 	weight_updated (iocg );
2586+ 	weight_updated (iocg ,  & now );
25342587	spin_unlock_irqrestore (& ioc -> lock , flags );
25352588}
25362589
@@ -2544,7 +2597,10 @@ static void ioc_pd_free(struct blkg_policy_data *pd)
25442597		spin_lock_irqsave (& ioc -> lock , flags );
25452598
25462599		if  (!list_empty (& iocg -> active_list )) {
2547- 			propagate_weights (iocg , 0 , 0 );
2600+ 			struct  ioc_now  now ;
2601+ 
2602+ 			ioc_now (ioc , & now );
2603+ 			propagate_weights (iocg , 0 , 0 , false, & now );
25482604			list_del_init (& iocg -> active_list );
25492605		}
25502606
@@ -2612,6 +2668,7 @@ static ssize_t ioc_weight_write(struct kernfs_open_file *of, char *buf,
26122668	struct  blkcg  * blkcg  =  css_to_blkcg (of_css (of ));
26132669	struct  ioc_cgrp  * iocc  =  blkcg_to_iocc (blkcg );
26142670	struct  blkg_conf_ctx  ctx ;
2671+ 	struct  ioc_now  now ;
26152672	struct  ioc_gq  * iocg ;
26162673	u32  v ;
26172674	int  ret ;
@@ -2632,7 +2689,8 @@ static ssize_t ioc_weight_write(struct kernfs_open_file *of, char *buf,
26322689
26332690			if  (iocg ) {
26342691				spin_lock_irq (& iocg -> ioc -> lock );
2635- 				weight_updated (iocg );
2692+ 				ioc_now (iocg -> ioc , & now );
2693+ 				weight_updated (iocg , & now );
26362694				spin_unlock_irq (& iocg -> ioc -> lock );
26372695			}
26382696		}
@@ -2658,7 +2716,8 @@ static ssize_t ioc_weight_write(struct kernfs_open_file *of, char *buf,
26582716
26592717	spin_lock (& iocg -> ioc -> lock );
26602718	iocg -> cfg_weight  =  v  *  WEIGHT_ONE ;
2661- 	weight_updated (iocg );
2719+ 	ioc_now (iocg -> ioc , & now );
2720+ 	weight_updated (iocg , & now );
26622721	spin_unlock (& iocg -> ioc -> lock );
26632722
26642723	blkg_conf_finish (& ctx );
0 commit comments