Skip to content

Commit 3827db3

Browse files
committed
Fix test case as chunked attention not Supported on sm_120
1 parent 07343bb commit 3827db3

File tree

2 files changed

+19
-8
lines changed

2 files changed

+19
-8
lines changed

tests/integration/test_lists/qa/llm_perf_core.yml

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -274,15 +274,14 @@ llm_perf_core:
274274

275275
- condition:
276276
ranges:
277+
compute_capability:
278+
gte: 9.0
279+
lt: 12.0
277280
system_gpu_count:
278281
gte: 8
279282
gpu_memory:
280283
gt: 80000
281-
wildcards:
282-
gpu:
283-
- '*h100*'
284-
- '*h200*'
285-
- '*h20*'
284+
286285
tests:
287286
# E2E trtllm-bench
288287
#mixtral_8x7b_v0.1_instruct
@@ -309,7 +308,7 @@ llm_perf_core:
309308
- perf/test_perf.py::test_perf[llama_v4_maverick_17b_128e_instruct_fp8-bench-pytorch-float8-maxbs:1024-maxnt:4096-kv_frac:0.85-input_output_len:1000,1000-reqs:3000-ep:8-tp:8-gpus:8]
310309
- perf/test_perf.py::test_perf[llama_v4_maverick_17b_128e_instruct_fp8-bench-pytorch-float8-input_output_len:128,128-ep:8-tp:8-gpus:8]
311310
- perf/test_perf.py::test_perf[llama_v4_maverick_17b_128e_instruct_fp8-bench-pytorch-float8-input_output_len:512,32-ep:8-tp:8-gpus:8]
312-
#rcca case
311+
# chunked attention case
313312
- perf/test_perf.py::test_perf[llama_v4_maverick_17b_128e_instruct_fp8-bench-pytorch-float8-maxbs:1024-maxnt:20000-kv_frac:0.6-input_output_len:20000,2000-reqs:1000-ep:8-tp:8-gpus:8]
314313

315314
#llama_v4_scout_17b_16e_instruct_fp8

tests/integration/test_lists/qa/llm_perf_sanity.yml

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -168,11 +168,23 @@ llm_perf_sanity:
168168
# for chunked prefill cases
169169
- perf/test_perf.py::test_perf[deepseek_v3_lite_fp8-bench-pytorch-float8-maxbs:512-maxnt:2048-kv_frac:0.85-input_output_len:3000,500-reqs:200]
170170
- perf/test_perf.py::test_perf[llama_v3.1_405b_instruct_fp8-bench-pytorch-float8-input_output_len:128,128-tp:8-gpus:8]
171-
- perf/test_perf.py::test_perf[llama_v4_maverick_17b_128e_instruct_fp8-bench-pytorch-float8-input_output_len:512,32-ep:8-tp:8-gpus:8]
172-
- perf/test_perf.py::test_perf[llama_v4_maverick_17b_128e_instruct_fp8-bench-pytorch-float8-maxbs:1024-maxnt:20000-kv_frac:0.6-input_output_len:20000,2000-reqs:1000-ep:8-tp:8-gpus:8]
171+
- perf/test_perf.py::test_perf[llama_v4_maverick_17b_128e_instruct_fp8-bench-pytorch-float8-input_output_len:512,32-ep:8-tp:8-gpus:8] TIMEOUT(100)
173172
- perf/test_perf.py::test_perf[llama_v4_scout_17b_16e_instruct_fp8-bench-pytorch-float8-maxbs:1024-maxnt:20000-kv_frac:0.85-input_output_len:20000,2000-reqs:1000-ep:8-tp:8-gpus:8] TIMEOUT(100)
174173
- perf/test_perf.py::test_perf[qwen3_235b_a22b_fp8-bench-pytorch-float8-input_output_len:1000,2000-con:256-ep:8-gpus:8] TIMEOUT(60)
175174
- perf/test_perf.py::test_perf[deepseek_v3_lite_fp8-disagg_server-ctx_dp:4-gen_tp:4]
176175
- perf/test_perf.py::test_perf[llama_v3.1_8b-disagg_server-ctx_dp:4-gen_tp:4]
177176
# gpt_oss_20b_fp4
178177
- perf/test_perf.py::test_perf[gpt_oss_20b_fp4-bench-pytorch-float4-input_output_len:512,512]
178+
179+
# gpu_arch > Hopper, exculde GB20X, RTX 6000 for not supported
180+
- condition:
181+
ranges:
182+
system_gpu_count:
183+
gte: 8
184+
compute_capability:
185+
gte: 9.0
186+
lt: 12.0
187+
188+
tests:
189+
# chunked attention case
190+
- perf/test_perf.py::test_perf[llama_v4_maverick_17b_128e_instruct_fp8-bench-pytorch-float8-maxbs:1024-maxnt:20000-kv_frac:0.6-input_output_len:20000,2000-reqs:1000-ep:8-tp:8-gpus:8]

0 commit comments

Comments
 (0)