3
3
# See LICENSE.TXT
4
4
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5
5
6
+ from itertools import product
6
7
import os
7
8
import csv
8
9
import io
@@ -24,6 +25,11 @@ class RUNTIMES(Enum):
24
25
UR = "ur"
25
26
26
27
28
+ class PROFILERS (Enum ):
29
+ TIMER = "timer"
30
+ CPU_COUNTER = "cpuCounter"
31
+
32
+
27
33
def runtime_to_name (runtime : RUNTIMES ) -> str :
28
34
return {
29
35
RUNTIMES .SYCL_PREVIEW : "SYCL Preview" ,
@@ -171,69 +177,116 @@ def benchmarks(self) -> list[Benchmark]:
171
177
172
178
# hand-picked value so that total execution time of the benchmark is
173
179
# similar on all architectures
174
- long_lernel_exec_time_ioq = [20 ]
180
+ long_kernel_exec_time_ioq = [20 ]
175
181
# For BMG server, a new value 200 is used, but we have to create metadata
176
182
# for both values to keep the dashboard consistent.
177
183
# See SubmitKernel.enabled()
178
184
long_kernel_exec_time_ooo = [20 , 200 ]
179
185
180
- # The Combo Profiler is available only for selected sycl benchmarks
181
- profiler_types = [ "timer" , "cpuCounter" ]
182
-
183
- for runtime in list ( RUNTIMES ):
184
- # Add SubmitKernel benchmarks using loops
185
- for in_order_queue in [ 0 , 1 ]:
186
- for measure_completion in [ 0 , 1 ]:
187
- for use_events in [ 0 , 1 ]:
188
- long_kernel_exec_time = (
189
- long_lernel_exec_time_ioq
190
- if in_order_queue
191
- else long_kernel_exec_time_ooo
192
- )
193
- for kernel_exec_time in [ 1 , * long_kernel_exec_time ]:
194
- for profiler_type in profiler_types :
195
- benches . append (
196
- SubmitKernel (
197
- self ,
198
- runtime ,
199
- in_order_queue ,
200
- measure_completion ,
201
- use_events ,
202
- kernel_exec_time ,
203
- profiler_type ,
204
- )
205
- )
206
-
207
- # Add SinKernelGraph benchmarks
208
- for with_graphs in [ 0 , 1 ] :
209
- for num_kernels in [ 5 , 100 ]:
186
+ submit_kernel_params = product (
187
+ list ( RUNTIMES ),
188
+ [ 0 , 1 ], # in_order_queue
189
+ [ 0 , 1 ], # measure_completion
190
+ [ 0 , 1 ], # use_events
191
+ )
192
+ for (
193
+ runtime ,
194
+ in_order_queue ,
195
+ measure_completion ,
196
+ use_events ,
197
+ ) in submit_kernel_params :
198
+ long_kernel_exec_time = (
199
+ long_kernel_exec_time_ioq
200
+ if in_order_queue
201
+ else long_kernel_exec_time_ooo
202
+ )
203
+ for kernel_exec_time in [ 1 , * long_kernel_exec_time ]:
204
+ benches . append (
205
+ SubmitKernel (
206
+ self ,
207
+ runtime ,
208
+ in_order_queue ,
209
+ measure_completion ,
210
+ use_events ,
211
+ kernel_exec_time ,
212
+ )
213
+ )
214
+ if runtime == RUNTIMES . SYCL :
215
+ # Create CPU count variant
210
216
benches .append (
211
- GraphApiSinKernelGraph (self , runtime , with_graphs , num_kernels )
217
+ SubmitKernel (
218
+ self ,
219
+ runtime ,
220
+ in_order_queue ,
221
+ measure_completion ,
222
+ use_events ,
223
+ kernel_exec_time ,
224
+ profiler_type = PROFILERS .CPU_COUNTER ,
225
+ )
212
226
)
213
227
228
+ # Add SinKernelGraph benchmarks
229
+ sin_kernel_graph_params = product (
230
+ list (RUNTIMES ),
231
+ [0 , 1 ], # with_graphs
232
+ [5 , 100 ], # num_kernels
233
+ )
234
+ for runtime , with_graphs , num_kernels in sin_kernel_graph_params :
235
+ benches .append (
236
+ GraphApiSinKernelGraph (self , runtime , with_graphs , num_kernels )
237
+ )
238
+
214
239
# Add ULLS benchmarks
215
- for profiler_type in profiler_types :
216
- benches .append (UllsEmptyKernel (self , runtime , 1000 , 256 , profiler_type ))
240
+ for runtime in list (RUNTIMES ):
241
+ if runtime == RUNTIMES .SYCL :
242
+ benches .append (
243
+ UllsEmptyKernel (
244
+ self , runtime , 1000 , 256 , profiler_type = PROFILERS .CPU_COUNTER
245
+ )
246
+ )
247
+ benches .append (UllsEmptyKernel (self , runtime , 1000 , 256 ))
217
248
benches .append (UllsKernelSwitch (self , runtime , 8 , 200 , 0 , 0 , 1 , 1 ))
218
249
219
- # Add GraphApiSubmitGraph benchmarks
220
- for in_order_queue in [0 , 1 ]:
221
- for num_kernels in self .submit_graph_num_kernels :
222
- for measure_completion_time in [0 , 1 ]:
223
- for use_events in [0 , 1 ]:
224
- for profiler_type in profiler_types :
225
- benches .append (
226
- GraphApiSubmitGraph (
227
- self ,
228
- runtime ,
229
- in_order_queue ,
230
- num_kernels ,
231
- measure_completion_time ,
232
- profiler_type ,
233
- use_events ,
234
- useHostTasks = 0 ,
235
- )
236
- )
250
+ # Add GraphApiSubmitGraph benchmarks
251
+ submit_graph_params = product (
252
+ list (RUNTIMES ),
253
+ [0 , 1 ], # in_order_queue
254
+ self .submit_graph_num_kernels ,
255
+ [0 , 1 ], # measure_completion_time
256
+ [0 , 1 ], # use_events
257
+ )
258
+ for (
259
+ runtime ,
260
+ in_order_queue ,
261
+ num_kernels ,
262
+ measure_completion_time ,
263
+ use_events ,
264
+ ) in submit_graph_params :
265
+ benches .append (
266
+ GraphApiSubmitGraph (
267
+ self ,
268
+ runtime ,
269
+ in_order_queue ,
270
+ num_kernels ,
271
+ measure_completion_time ,
272
+ use_events ,
273
+ useHostTasks = 0 ,
274
+ )
275
+ )
276
+ if runtime == RUNTIMES .SYCL :
277
+ # Create CPU count variant
278
+ benches .append (
279
+ GraphApiSubmitGraph (
280
+ self ,
281
+ runtime ,
282
+ in_order_queue ,
283
+ num_kernels ,
284
+ measure_completion_time ,
285
+ use_events ,
286
+ useHostTasks = 0 ,
287
+ profiler_type = PROFILERS .CPU_COUNTER ,
288
+ )
289
+ )
237
290
238
291
# Add other benchmarks
239
292
benches += [
@@ -244,7 +297,7 @@ def benchmarks(self) -> list[Benchmark]:
244
297
GraphApiFinalizeGraph (self , RUNTIMES .SYCL , 0 , "Llama" ),
245
298
GraphApiFinalizeGraph (self , RUNTIMES .SYCL , 1 , "Llama" ),
246
299
]
247
- for profiler_type in profiler_types :
300
+ for profiler_type in list ( PROFILERS ) :
248
301
benches .append (
249
302
QueueInOrderMemcpy (self , 0 , "Device" , "Device" , 1024 , profiler_type )
250
303
)
@@ -310,7 +363,12 @@ def parse_unit_type(compute_unit):
310
363
311
364
class ComputeBenchmark (Benchmark ):
312
365
def __init__ (
313
- self , bench , name , test , runtime : RUNTIMES = None , profiler_type : str = ""
366
+ self ,
367
+ bench ,
368
+ name ,
369
+ test ,
370
+ runtime : RUNTIMES = None ,
371
+ profiler_type : PROFILERS = PROFILERS .TIMER ,
314
372
):
315
373
super ().__init__ (bench .directory , bench )
316
374
self .bench = bench
@@ -478,7 +536,7 @@ def __init__(
478
536
MeasureCompletion = 0 ,
479
537
UseEvents = 0 ,
480
538
KernelExecTime = 1 ,
481
- profiler_type = "" ,
539
+ profiler_type = PROFILERS . TIMER ,
482
540
):
483
541
self .ioq = ioq
484
542
self .MeasureCompletion = MeasureCompletion
@@ -578,7 +636,7 @@ def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]:
578
636
f"--UseEvents={ self .UseEvents } " ,
579
637
]
580
638
if self .runtime == RUNTIMES .SYCL :
581
- bin_args .append (f"--profilerType={ self .profiler_type } " )
639
+ bin_args .append (f"--profilerType={ self .profiler_type . value } " )
582
640
return bin_args
583
641
584
642
def get_metadata (self ) -> dict [str , BenchmarkMetadata ]:
@@ -647,7 +705,7 @@ def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]:
647
705
f"--dst={ self .destination } " ,
648
706
f"--size={ self .size } " ,
649
707
"--withCopyOffload=0" ,
650
- f"--profilerType={ self .profiler_type } " ,
708
+ f"--profilerType={ self .profiler_type . value } " ,
651
709
]
652
710
653
711
@@ -693,7 +751,7 @@ def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]:
693
751
f"--size={ self .size } " ,
694
752
"--count=100" ,
695
753
"--withCopyOffload=0" ,
696
- f"--profilerType={ self .profiler_type } " ,
754
+ f"--profilerType={ self .profiler_type . value } " ,
697
755
]
698
756
699
757
@@ -731,7 +789,7 @@ def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]:
731
789
f"--sourcePlacement={ self .source } " ,
732
790
f"--destinationPlacement={ self .destination } " ,
733
791
f"--size={ self .size } " ,
734
- f"--profilerType={ self .profiler_type } " ,
792
+ f"--profilerType={ self .profiler_type . value } " ,
735
793
]
736
794
737
795
@@ -970,9 +1028,9 @@ def __init__(
970
1028
inOrderQueue ,
971
1029
numKernels ,
972
1030
measureCompletionTime ,
973
- profiler_type ,
974
1031
useEvents ,
975
1032
useHostTasks ,
1033
+ profiler_type = PROFILERS .TIMER ,
976
1034
):
977
1035
self .inOrderQueue = inOrderQueue
978
1036
self .numKernels = numKernels
@@ -1037,12 +1095,14 @@ def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]:
1037
1095
f"--UseHostTasks={ self .useHostTasks } " ,
1038
1096
]
1039
1097
if self .runtime == RUNTIMES .SYCL :
1040
- bin_args .append (f"--profilerType={ self .profiler_type } " )
1098
+ bin_args .append (f"--profilerType={ self .profiler_type . value } " )
1041
1099
return bin_args
1042
1100
1043
1101
1044
1102
class UllsEmptyKernel (ComputeBenchmark ):
1045
- def __init__ (self , bench , runtime : RUNTIMES , wgc , wgs , profiler_type ):
1103
+ def __init__ (
1104
+ self , bench , runtime : RUNTIMES , wgc , wgs , profiler_type = PROFILERS .TIMER
1105
+ ):
1046
1106
self .wgc = wgc
1047
1107
self .wgs = wgs
1048
1108
# iterations per bin_args: --iterations=10000
@@ -1084,7 +1144,7 @@ def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]:
1084
1144
f"--wgc={ self .wgc } " ,
1085
1145
]
1086
1146
if self .runtime == RUNTIMES .SYCL :
1087
- bin_args .append (f"--profilerType={ self .profiler_type } " )
1147
+ bin_args .append (f"--profilerType={ self .profiler_type . value } " )
1088
1148
return bin_args
1089
1149
1090
1150
0 commit comments