@@ -217,12 +217,14 @@ enum {
217217	MAX_PERIOD 		=  USEC_PER_SEC ,
218218
219219	/* 
220- 	 * A cgroup's  vtime can run  50% behind the device vtime, which 
220+ 	 * iocg-> vtime is targeted at  50% behind the device vtime, which 
221221	 * serves as its IO credit buffer.  Surplus weight adjustment is 
222222	 * immediately canceled if the vtime margin runs below 10%. 
223223	 */ 
224224	MARGIN_MIN_PCT 		=  10 ,
225- 	MARGIN_MAX_PCT 		=  50 ,
225+ 	MARGIN_LOW_PCT 		=  20 ,
226+ 	MARGIN_TARGET_PCT 	=  50 ,
227+ 	MARGIN_MAX_PCT 		=  100 ,
226228
227229	/* Have some play in timer operations */ 
228230	TIMER_SLACK_PCT 		=  1 ,
@@ -234,17 +236,6 @@ enum {
234236	 */ 
235237	VTIME_VALID_DUR 		=  300  *  USEC_PER_SEC ,
236238
237- 	/* 
238- 	 * Remember the past three non-zero usages and use the max for 
239- 	 * surplus calculation.  Three slots guarantee that we remember one 
240- 	 * full period usage from the last active stretch even after 
241- 	 * partial deactivation and re-activation periods.  Don't start 
242- 	 * giving away weight before collecting two data points to prevent 
243- 	 * hweight adjustments based on one partial activation period. 
244- 	 */ 
245- 	NR_USAGE_SLOTS 		=  3 ,
246- 	MIN_VALID_USAGES 	=  2 ,
247- 
248239	/* 1/64k is granular enough and can easily be handled w/ u32 */ 
249240	WEIGHT_ONE 		=  1  << 16 ,
250241
@@ -280,14 +271,6 @@ enum {
280271	/* don't let cmds which take a very long time pin lagging for too long */ 
281272	MAX_LAGGING_PERIODS 	=  10 ,
282273
283- 	/* 
284- 	 * If usage% * 1.25 + 2% is lower than hweight% by more than 3%, 
285- 	 * donate the surplus. 
286- 	 */ 
287- 	SURPLUS_SCALE_PCT 	=  125 ,			/* * 125% */ 
288- 	SURPLUS_SCALE_ABS 	=  WEIGHT_ONE  / 50 ,	/* + 2% */ 
289- 	SURPLUS_MIN_ADJ_DELTA 	=  WEIGHT_ONE  / 33 ,	/* 3% */ 
290- 
291274	/* switch iff the conditions are met for longer than this */ 
292275	AUTOP_CYCLE_NSEC 	=  10LLU  *  NSEC_PER_SEC ,
293276
@@ -376,6 +359,8 @@ struct ioc_params {
376359
377360struct  ioc_margins  {
378361	s64 				min ;
362+ 	s64 				low ;
363+ 	s64 				target ;
379364	s64 				max ;
380365};
381366
@@ -514,11 +499,7 @@ struct ioc_gq {
514499	struct  iocg_stat 		desc_stat ;
515500	struct  iocg_stat 		last_stat ;
516501	u64 				last_stat_abs_vusage ;
517- 
518- 	/* usage is recorded as fractions of WEIGHT_ONE */ 
519- 	u32 				usage_delta_us ;
520- 	int 				usage_idx ;
521- 	u32 				usages [NR_USAGE_SLOTS ];
502+ 	u64 				usage_delta_us ;
522503
523504	/* this iocg's depth in the hierarchy and ancestors including self */ 
524505	int 				level ;
@@ -737,6 +718,8 @@ static void ioc_refresh_margins(struct ioc *ioc)
737718	u64  vrate  =  atomic64_read (& ioc -> vtime_rate );
738719
739720	margins -> min  =  (period_us  *  MARGIN_MIN_PCT  / 100 ) *  vrate ;
721+ 	margins -> low  =  (period_us  *  MARGIN_LOW_PCT  / 100 ) *  vrate ;
722+ 	margins -> target  =  (period_us  *  MARGIN_TARGET_PCT  / 100 ) *  vrate ;
740723	margins -> max  =  (period_us  *  MARGIN_MAX_PCT  / 100 ) *  vrate ;
741724}
742725
@@ -1228,7 +1211,7 @@ static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now)
12281211		return  false;
12291212	}
12301213	if  (!atomic_read (& blkg -> use_delay ) && 
1231- 	    time_before_eq64 (vtime , now -> vnow  +  ioc -> margins .max ))
1214+ 	    time_before_eq64 (vtime , now -> vnow  +  ioc -> margins .target ))
12321215		return  false;
12331216
12341217	/* use delay */ 
@@ -1527,7 +1510,7 @@ static u32 hweight_after_donation(struct ioc_gq *iocg, u32 hwm, u32 usage,
15271510{
15281511	struct  ioc  * ioc  =  iocg -> ioc ;
15291512	u64  vtime  =  atomic64_read (& iocg -> vtime );
1530- 	s64  excess ;
1513+ 	s64  excess ,  delta ,  target ,  new_hwi ;
15311514
15321515	/* see whether minimum margin requirement is met */ 
15331516	if  (waitqueue_active (& iocg -> waitq ) || 
@@ -1542,15 +1525,28 @@ static u32 hweight_after_donation(struct ioc_gq *iocg, u32 hwm, u32 usage,
15421525		vtime  +=  excess ;
15431526	}
15441527
1545- 	/* add margin */ 
1546- 	usage  =  DIV_ROUND_UP (usage  *  SURPLUS_SCALE_PCT , 100 );
1547- 	usage  +=  SURPLUS_SCALE_ABS ;
1548- 
1549- 	/* don't bother if the surplus is too small */ 
1550- 	if  (usage  +  SURPLUS_MIN_ADJ_DELTA  >  hwm )
1551- 		return  hwm ;
1528+ 	/* 
1529+ 	 * Let's say the distance between iocg's and device's vtimes as a 
1530+ 	 * fraction of period duration is delta. Assuming that the iocg will 
1531+ 	 * consume the usage determined above, we want to determine new_hwi so 
1532+ 	 * that delta equals MARGIN_TARGET at the end of the next period. 
1533+ 	 * 
1534+ 	 * We need to execute usage worth of IOs while spending the sum of the 
1535+ 	 * new budget (1 - MARGIN_TARGET) and the leftover from the last period 
1536+ 	 * (delta): 
1537+ 	 * 
1538+ 	 *   usage = (1 - MARGIN_TARGET + delta) * new_hwi 
1539+ 	 * 
1540+ 	 * Therefore, the new_hwi is: 
1541+ 	 * 
1542+ 	 *   new_hwi = usage / (1 - MARGIN_TARGET + delta) 
1543+ 	 */ 
1544+ 	delta  =  div64_s64 (WEIGHT_ONE  *  (now -> vnow  -  vtime ),
1545+ 			  now -> vnow  -  ioc -> period_at_vtime );
1546+ 	target  =  WEIGHT_ONE  *  MARGIN_TARGET_PCT  / 100 ;
1547+ 	new_hwi  =  div64_s64 (WEIGHT_ONE  *  usage , WEIGHT_ONE  -  target  +  delta );
15521548
1553- 	return  usage ;
1549+ 	return  clamp_t ( s64 ,  new_hwi ,  1 ,  hwm ) ;
15541550}
15551551
15561552/* 
@@ -1812,7 +1808,7 @@ static void ioc_timer_fn(struct timer_list *timer)
18121808	u32  ppm_wthr  =  MILLION  -  ioc -> params .qos [QOS_WPPM ];
18131809	u32  missed_ppm [2 ], rq_wait_pct ;
18141810	u64  period_vtime ;
1815- 	int  prev_busy_level ,  i ;
1811+ 	int  prev_busy_level ;
18161812
18171813	/* how were the latencies during the period? */ 
18181814	ioc_lat_stat (ioc , missed_ppm , & rq_wait_pct );
@@ -1857,11 +1853,10 @@ static void ioc_timer_fn(struct timer_list *timer)
18571853	}
18581854	commit_weights (ioc );
18591855
1860- 	/* calc usages  and see whether some weights need to be moved around */ 
1856+ 	/* calc usage  and see whether some weights need to be moved around */ 
18611857	list_for_each_entry (iocg , & ioc -> active_iocgs , active_list ) {
1862- 		u64  vdone , vtime , usage_us ;
1863- 		u32  hw_active , hw_inuse , usage ;
1864- 		int  uidx , nr_valid ;
1858+ 		u64  vdone , vtime , usage_us , usage_dur ;
1859+ 		u32  usage , hw_active , hw_inuse ;
18651860
18661861		/* 
18671862		 * Collect unused and wind vtime closer to vnow to prevent 
@@ -1886,59 +1881,34 @@ static void ioc_timer_fn(struct timer_list *timer)
18861881			nr_lagging ++ ;
18871882
18881883		/* 
1889- 		 * Determine absolute usage factoring in pending and in-flight 
1890- 		 * IOs to avoid stalls and high-latency completions appearing as 
1891- 		 * idle. 
1884+ 		 * Determine absolute usage factoring in in-flight IOs to avoid 
1885+ 		 * high-latency completions appearing as idle. 
18921886		 */ 
18931887		usage_us  =  iocg -> usage_delta_us ;
1894- 		if  (waitqueue_active (& iocg -> waitq ) &&  time_before64 (vtime , now .vnow ))
1895- 			usage_us  +=  DIV64_U64_ROUND_UP (
1896- 				cost_to_abs_cost (now .vnow  -  vtime , hw_inuse ),
1897- 				now .vrate );
1888+ 
18981889		if  (vdone  !=  vtime ) {
18991890			u64  inflight_us  =  DIV64_U64_ROUND_UP (
19001891				cost_to_abs_cost (vtime  -  vdone , hw_inuse ),
19011892				now .vrate );
19021893			usage_us  =  max (usage_us , inflight_us );
19031894		}
19041895
1905- 		/* convert to hweight based usage ratio and record */ 
1906- 		uidx  =  (iocg -> usage_idx  +  1 ) % NR_USAGE_SLOTS ;
1907- 
1908- 		if  (time_after64 (vtime , now .vnow  -  ioc -> margins .min )) {
1909- 			iocg -> usage_idx  =  uidx ;
1910- 			iocg -> usages [uidx ] =  WEIGHT_ONE ;
1911- 		} else  if  (usage_us ) {
1912- 			u64  started_at , dur ;
1913- 
1914- 			if  (time_after64 (iocg -> activated_at , ioc -> period_at ))
1915- 				started_at  =  iocg -> activated_at ;
1916- 			else 
1917- 				started_at  =  ioc -> period_at ;
1918- 
1919- 			dur  =  max_t (u64 , now .now  -  started_at , 1 );
1896+ 		/* convert to hweight based usage ratio */ 
1897+ 		if  (time_after64 (iocg -> activated_at , ioc -> period_at ))
1898+ 			usage_dur  =  max_t (u64 , now .now  -  iocg -> activated_at , 1 );
1899+ 		else 
1900+ 			usage_dur  =  max_t (u64 , now .now  -  ioc -> period_at , 1 );
19201901
1921- 			 iocg -> usage_idx  =  uidx ; 
1922- 			iocg -> usages [ uidx ]  =   clamp_t ( u32 ,
1923- 				DIV64_U64_ROUND_UP ( usage_us   *   WEIGHT_ONE ,  dur ),
1902+ 		usage  =  clamp_t ( u32 , 
1903+ 				 DIV64_U64_ROUND_UP ( usage_us   *   WEIGHT_ONE ,
1904+ 						    usage_dur ),
19241905				1 , WEIGHT_ONE );
1925- 		}
1926- 
1927- 		/* base the decision on max historical usage */ 
1928- 		for  (i  =  0 , usage  =  0 , nr_valid  =  0 ; i  <  NR_USAGE_SLOTS ; i ++ ) {
1929- 			if  (iocg -> usages [i ]) {
1930- 				usage  =  max (usage , iocg -> usages [i ]);
1931- 				nr_valid ++ ;
1932- 			}
1933- 		}
1934- 		if  (nr_valid  <  MIN_VALID_USAGES )
1935- 			usage  =  WEIGHT_ONE ;
19361906
19371907		/* see whether there's surplus vtime */ 
19381908		WARN_ON_ONCE (!list_empty (& iocg -> surplus_list ));
19391909		if  (hw_inuse  <  hw_active  || 
19401910		    (!waitqueue_active (& iocg -> waitq ) && 
1941- 		     time_before64 (vtime , now .vnow  -  ioc -> margins .max ))) {
1911+ 		     time_before64 (vtime , now .vnow  -  ioc -> margins .low ))) {
19421912			u32  hwa , hwm , new_hwi ;
19431913
19441914			/* 
@@ -2175,15 +2145,14 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio)
21752145	if  (!ioc -> enabled  ||  !iocg -> level )
21762146		return ;
21772147
2178- 	/* always activate so that even 0 cost IOs get protected to some level */ 
2179- 	if  (!iocg_activate (iocg , & now ))
2180- 		return ;
2181- 
21822148	/* calculate the absolute vtime cost */ 
21832149	abs_cost  =  calc_vtime_cost (bio , iocg , false);
21842150	if  (!abs_cost )
21852151		return ;
21862152
2153+ 	if  (!iocg_activate (iocg , & now ))
2154+ 		return ;
2155+ 
21872156	iocg -> cursor  =  bio_end_sector (bio );
21882157
21892158	vtime  =  atomic64_read (& iocg -> vtime );
0 commit comments