5555ATTR_TEARDOWN = '__teardown__'
5656
5757
58+ # ----------------------------------------------------------------------------------------------------------------------
59+ #
60+ # the CUSUM method adapted for warmup detection within a given threshold (initial iterations)
61+ #
62+ # ----------------------------------------------------------------------------------------------------------------------
63+ def zeros (n ):
64+ return [0 for _ in range (n )]
65+
66+
67+ def append (arr , val ):
68+ if isinstance (arr , list ):
69+ return arr + [val ]
70+ else :
71+ return [val ] + arr
72+
73+
74+ def cusum (values , threshold = 1.0 , drift = 0.0 ):
75+ csum_pos , csum_neg = zeros (len (values )), zeros (len (values ))
76+ change_points = []
77+ for i in range (1 , len (values )):
78+ diff = values [i ] - values [i - 1 ]
79+ csum_pos [i ] = csum_pos [i - 1 ] + diff - drift
80+ csum_neg [i ] = csum_neg [i - 1 ] - diff - drift
81+
82+ if csum_pos [i ] < 0 :
83+ csum_pos [i ] = 0
84+ if csum_neg [i ] < 0 :
85+ csum_neg [i ] = 0
86+
87+ if csum_pos [i ] > threshold or csum_neg [i ] > threshold :
88+ change_points = append (change_points , i )
89+ csum_pos [i ], csum_neg [i ] = 0 , 0
90+
91+ return change_points
92+
93+
94+ def avg (values ):
95+ return float (sum (values )) / len (values )
96+
97+
98+ def norm (values ):
99+ _max , _min = max (values ), min (values )
100+ return [float (v - _min ) / (_max - _min ) * 100.0 for v in values ]
101+
102+
103+ def pairwise_slopes (values , cp ):
104+ return [abs (float (values [i + 1 ] - values [i ]) / float (cp [i + 1 ] - cp [i ])) for i in range (len (values )- 1 )]
105+
106+
107+ def last_n_percent_runs (values , n = 0.1 ):
108+ assert 0.0 < n <= 1.0
109+ end_runs_idx = len (values ) - int (len (values ) * n )
110+ end_runs_idx = len (values ) - 1 if end_runs_idx >= len (values ) else end_runs_idx
111+ return values [end_runs_idx :], list (range (end_runs_idx , len (values )))
112+
113+
114+ def first_n_percent_runs (values , n = 0.1 ):
115+ assert 0.0 < n <= 1.0
116+ first_run_idx = int (len (values ) * n )
117+ return first_run_idx - 1 if first_run_idx == len (values ) else first_run_idx
118+
119+
120+ def detect_warmup (values , cp_threshold = 0.03 , stability_slope_grade = 0.01 ):
121+ """
122+ detect the point of warmup point (iteration / run)
123+
124+ :param values: the durations for each run
125+ :param cp_threshold: the percent in value difference for a point to be considered a change point (percentage)
126+ :param stability_slope_grade: the slope grade (percentage). A grade of 1% corresponds to a slope of 0.5 degrees
127+ :return: the change point or -1 if not detected
128+ """
129+ # normalize all
130+ stability_slope_grade *= 100.0
131+ cp_threshold *= 100
132+ values = norm (values )
133+
134+ try :
135+ cp = cusum (values , threshold = cp_threshold )
136+ rolling_avg = [avg (values [i :]) for i in cp ]
137+
138+ def warmup (cp_index ):
139+ val_idx = cp [cp_index ] + 1
140+ return val_idx if val_idx < len (values ) else - 1
141+
142+ # find the point where the duration avg is below the cp threshold
143+ for i , d in enumerate (rolling_avg ):
144+ if d <= cp_threshold :
145+ return warmup (i )
146+
147+ # could not find something below the CP threshold (noise in the data), use the stabilisation of slopes
148+ last_n_vals , last_n_idx = last_n_percent_runs (values , 0.1 )
149+ slopes = pairwise_slopes (rolling_avg + last_n_vals , cp + last_n_idx )
150+
151+ for i , d in enumerate (slopes ):
152+ if d <= stability_slope_grade :
153+ return warmup (i )
154+
155+ return - 1
156+ except Exception as e :
157+ print ("exception occurred while detecting warmup: %s" % e )
158+ return - 1
159+
160+
58161def ccompile (name , code ):
59162 from importlib import invalidate_caches
60163 from distutils .core import setup , Extension
@@ -89,7 +192,14 @@ def _as_int(value):
89192
90193
91194class BenchRunner (object ):
92- def __init__ (self , bench_file , bench_args = None , iterations = 1 , warmup = 0 ):
195+ def __init__ (self , bench_file , bench_args = None , iterations = 1 , warmup = - 1 , warmup_runs = 0 ):
196+ assert isinstance (iterations , int ), \
197+ "BenchRunner iterations argument must be an int, got %s instead" % iterations
198+ assert isinstance (warmup , int ), \
199+ "BenchRunner warmup argument must be an int, got %s instead" % warmup
200+ assert isinstance (warmup_runs , int ), \
201+ "BenchRunner warmup_runs argument must be an int, got %s instead" % warmup_runs
202+
93203 if bench_args is None :
94204 bench_args = []
95205 self .bench_module = BenchRunner .get_bench_module (bench_file )
@@ -98,10 +208,8 @@ def __init__(self, bench_file, bench_args=None, iterations=1, warmup=0):
98208 _iterations = _as_int (iterations )
99209 self ._run_once = _iterations <= 1
100210 self .iterations = 1 if self ._run_once else _iterations
101-
102- assert isinstance (self .iterations , int )
103- self .warmup = _as_int (warmup )
104- assert isinstance (self .warmup , int )
211+ self .warmup_runs = warmup_runs if warmup_runs > 0 else 0
212+ self .warmup = warmup if warmup > 0 else - 1
105213
106214 @staticmethod
107215 def get_bench_module (bench_file ):
@@ -139,9 +247,10 @@ def _call_attr(self, attr_name, *args):
139247
140248 def run (self ):
141249 if self ._run_once :
142- print ("### %s, exactly one iteration (no warmup curves)" % ( self .bench_module .__name__ ) )
250+ print ("### %s, exactly one iteration (no warmup curves)" % self .bench_module .__name__ )
143251 else :
144- print ("### %s, %s warmup iterations, %s bench iterations " % (self .bench_module .__name__ , self .warmup , self .iterations ))
252+ print ("### %s, %s warmup iterations, %s bench iterations " % (self .bench_module .__name__ ,
253+ self .warmup_runs , self .iterations ))
145254
146255 # process the args if the processor function is defined
147256 args = self ._call_attr (ATTR_PROCESS_ARGS , * self .bench_args )
@@ -159,9 +268,9 @@ def run(self):
159268 bench_func = self ._get_attr (ATTR_BENCHMARK )
160269 durations = []
161270 if bench_func and hasattr (bench_func , '__call__' ):
162- if self .warmup :
163- print ("### warming up for %s iterations ... " % self .warmup )
164- for _ in range (self .warmup ):
271+ if self .warmup_runs :
272+ print ("### (pre) warming up for %s iterations ... " % self .warmup_runs )
273+ for _ in range (self .warmup_runs ):
165274 bench_func (* args )
166275
167276 for iteration in range (self .iterations ):
@@ -173,21 +282,46 @@ def run(self):
173282 if self ._run_once :
174283 print ("@@@ name=%s, duration=%s" % (self .bench_module .__name__ , duration_str ))
175284 else :
176- print ("### iteration=%s, name=%s, duration=%s" % (iteration , self .bench_module .__name__ , duration_str ))
285+ print ("### iteration=%s, name=%s, duration=%s" % (iteration , self .bench_module .__name__ ,
286+ duration_str ))
177287
178288 print (_HRULE )
179289 print ("### teardown ... " )
180290 self ._call_attr (ATTR_TEARDOWN )
181291 print ("### benchmark complete" )
182292 print (_HRULE )
183- print ("### BEST duration: %.3f s" % min (durations ))
184- print ("### WORST duration: %.3f s" % max (durations ))
185- print ("### AVG duration: %.3f" % (sum (durations ) / len (durations )))
293+
294+ # summary
295+ if self ._run_once :
296+ print ("### SINGLE RUN duration: %.3f s" % durations [0 ])
297+ else :
298+ print ("### BEST duration: %.3f s" % min (durations ))
299+ print ("### WORST duration: %.3f s" % max (durations ))
300+ print ("### AVG (all runs) duration: %.3f s" % (sum (durations ) / len (durations )))
301+ warmup_iter = self .warmup if self .warmup > 0 else detect_warmup (durations )
302+ # if we cannot detect a warmup starting point but we performed some pre runs, we take a starting point
303+ # after the 10% of the first runs ...
304+ if warmup_iter < 0 and self .warmup_runs > 0 :
305+ print ("### warmup could not be detected, but %s pre-runs were executed.\n "
306+ "### we assume the benchmark is warmed up and pick an iteration "
307+ "in the first 10%% of the runs" % self .warmup_runs )
308+ warmup_iter = first_n_percent_runs (durations , 0.1 )
309+
310+ if warmup_iter > 0 :
311+ print ("### WARMUP %s at iteration: %d" % ("specified" if self .warmup > 0 else "detected" , warmup_iter ))
312+ no_warmup_durations = durations [warmup_iter :]
313+ print ("### AVG (no warmup) duration: %.3f s" % (sum (no_warmup_durations ) / len (no_warmup_durations )))
314+ else :
315+ print ("### WARMUP iteration not specified or could not be detected" )
316+
317+ print (_HRULE )
318+ print ("### RAW DURATIONS: %s" % str (durations ))
186319 print (_HRULE )
187320
188321
189322def run_benchmark (args ):
190- warmup = 0
323+ warmup = - 1
324+ warmup_runs = 0
191325 iterations = 1
192326 bench_file = None
193327 bench_args = []
@@ -208,6 +342,12 @@ def run_benchmark(args):
208342 elif arg .startswith ("--warmup" ):
209343 warmup = _as_int (arg .split ("=" )[1 ])
210344
345+ elif arg == '-r' :
346+ i += 1
347+ warmup_runs = _as_int (args [i ])
348+ elif arg .startswith ("--warmup-runs" ):
349+ warmup_runs = _as_int (arg .split ("=" )[1 ])
350+
211351 elif arg == '-p' :
212352 i += 1
213353 paths = args [i ].split ("," )
@@ -229,7 +369,7 @@ def run_benchmark(args):
229369 else :
230370 print ("### no extra module search paths specified" )
231371
232- BenchRunner (bench_file , bench_args = bench_args , iterations = iterations , warmup = warmup ).run ()
372+ BenchRunner (bench_file , bench_args = bench_args , iterations = iterations , warmup = warmup , warmup_runs = warmup_runs ).run ()
233373
234374
235375if __name__ == '__main__' :
0 commit comments