File tree Expand file tree Collapse file tree 2 files changed +19
-8
lines changed
tests/integration/test_lists/qa Expand file tree Collapse file tree 2 files changed +19
-8
lines changed Original file line number Diff line number Diff line change @@ -274,15 +274,14 @@ llm_perf_core:
274274
275275- condition :
276276 ranges :
277+ compute_capability :
278+ gte : 9.0
279+ lt : 12.0
277280 system_gpu_count :
278281 gte : 8
279282 gpu_memory :
280283 gt : 80000
281- wildcards :
282- gpu :
283- - ' *h100*'
284- - ' *h200*'
285- - ' *h20*'
284+
286285 tests :
287286 # E2E trtllm-bench
288287 # mixtral_8x7b_v0.1_instruct
@@ -309,7 +308,7 @@ llm_perf_core:
309308 - perf/test_perf.py::test_perf[llama_v4_maverick_17b_128e_instruct_fp8-bench-pytorch-float8-maxbs:1024-maxnt:4096-kv_frac:0.85-input_output_len:1000,1000-reqs:3000-ep:8-tp:8-gpus:8]
310309 - perf/test_perf.py::test_perf[llama_v4_maverick_17b_128e_instruct_fp8-bench-pytorch-float8-input_output_len:128,128-ep:8-tp:8-gpus:8]
311310 - perf/test_perf.py::test_perf[llama_v4_maverick_17b_128e_instruct_fp8-bench-pytorch-float8-input_output_len:512,32-ep:8-tp:8-gpus:8]
312- # rcca case
311+ # chunked attention case
313312 - perf/test_perf.py::test_perf[llama_v4_maverick_17b_128e_instruct_fp8-bench-pytorch-float8-maxbs:1024-maxnt:20000-kv_frac:0.6-input_output_len:20000,2000-reqs:1000-ep:8-tp:8-gpus:8]
314313
315314 # llama_v4_scout_17b_16e_instruct_fp8
Original file line number Diff line number Diff line change @@ -168,11 +168,23 @@ llm_perf_sanity:
168168 # for chunked prefill cases
169169 - perf/test_perf.py::test_perf[deepseek_v3_lite_fp8-bench-pytorch-float8-maxbs:512-maxnt:2048-kv_frac:0.85-input_output_len:3000,500-reqs:200]
170170 - perf/test_perf.py::test_perf[llama_v3.1_405b_instruct_fp8-bench-pytorch-float8-input_output_len:128,128-tp:8-gpus:8]
171- - perf/test_perf.py::test_perf[llama_v4_maverick_17b_128e_instruct_fp8-bench-pytorch-float8-input_output_len:512,32-ep:8-tp:8-gpus:8]
172- - perf/test_perf.py::test_perf[llama_v4_maverick_17b_128e_instruct_fp8-bench-pytorch-float8-maxbs:1024-maxnt:20000-kv_frac:0.6-input_output_len:20000,2000-reqs:1000-ep:8-tp:8-gpus:8]
171+ - perf/test_perf.py::test_perf[llama_v4_maverick_17b_128e_instruct_fp8-bench-pytorch-float8-input_output_len:512,32-ep:8-tp:8-gpus:8] TIMEOUT(100)
173172 - perf/test_perf.py::test_perf[llama_v4_scout_17b_16e_instruct_fp8-bench-pytorch-float8-maxbs:1024-maxnt:20000-kv_frac:0.85-input_output_len:20000,2000-reqs:1000-ep:8-tp:8-gpus:8] TIMEOUT(100)
174173 - perf/test_perf.py::test_perf[qwen3_235b_a22b_fp8-bench-pytorch-float8-input_output_len:1000,2000-con:256-ep:8-gpus:8] TIMEOUT(60)
175174 - perf/test_perf.py::test_perf[deepseek_v3_lite_fp8-disagg_server-ctx_dp:4-gen_tp:4]
176175 - perf/test_perf.py::test_perf[llama_v3.1_8b-disagg_server-ctx_dp:4-gen_tp:4]
177176 # gpt_oss_20b_fp4
178177 - perf/test_perf.py::test_perf[gpt_oss_20b_fp4-bench-pytorch-float4-input_output_len:512,512]
178+
179+ # gpu_arch > Hopper, exculde GB20X, RTX 6000 for not supported
180+ - condition :
181+ ranges :
182+ system_gpu_count :
183+ gte : 8
184+ compute_capability :
185+ gte : 9.0
186+ lt : 12.0
187+
188+ tests :
189+ # chunked attention case
190+ - perf/test_perf.py::test_perf[llama_v4_maverick_17b_128e_instruct_fp8-bench-pytorch-float8-maxbs:1024-maxnt:20000-kv_frac:0.6-input_output_len:20000,2000-reqs:1000-ep:8-tp:8-gpus:8]
You can’t perform that action at this time.
0 commit comments