From 45633330d8ba1db8f3ae8f47503d4f4841a62f46 Mon Sep 17 00:00:00 2001 From: xinhe-nv <200704525+xinhe-nv@users.noreply.github.com> Date: Sun, 16 Nov 2025 20:14:33 +0800 Subject: [PATCH 1/2] update waive list Signed-off-by: xinhe-nv <200704525+xinhe-nv@users.noreply.github.com> --- tests/integration/test_lists/waives.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt index 3fd599358f3..bbfc7fbec6c 100644 --- a/tests/integration/test_lists/waives.txt +++ b/tests/integration/test_lists/waives.txt @@ -420,3 +420,4 @@ accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales_4gpu unittest/_torch/modules/test_fused_moe.py::test_fused_moe_alltoall[DeepEPLowLatency] SKIP (https://nvbugs/5664904) unittest/_torch/modules/test_fused_moe.py::test_fused_moe_alltoall[DeepEP] SKIP (https://nvbugs/5664904) unittest/_torch/modules/test_fused_moe.py::test_fused_moe_alltoall[MNNVL] SKIP (https://nvbugs/5664904) +examples/test_qwen.py::test_llm_qwen_int4_single_gpu_summary[qwen2.5_14b_instruct_int4-nb:4] SKIP (https://nvbugs/5481075) From 3e19c87e73a8d09f30aa780365e2589c1159b24e Mon Sep 17 00:00:00 2001 From: "Xin He (SW-GPU)" <200704525+xinhe-nv@users.noreply.github.com> Date: Mon, 17 Nov 2025 11:05:54 +0800 Subject: [PATCH 2/2] update waives Signed-off-by: Xin He (SW-GPU) <200704525+xinhe-nv@users.noreply.github.com> --- .../defs/examples/serve/test_serve.py | 3 +- tests/integration/test_lists/waives.txt | 48 +++++++------------ 2 files changed, 19 insertions(+), 32 deletions(-) diff --git a/tests/integration/defs/examples/serve/test_serve.py b/tests/integration/defs/examples/serve/test_serve.py index 1a8b07aa4af..1c2a5d68d30 100755 --- a/tests/integration/defs/examples/serve/test_serve.py +++ b/tests/integration/defs/examples/serve/test_serve.py @@ -2,7 +2,7 @@ import time import requests -from defs.conftest import llm_models_root, skip_pre_hopper +from defs.conftest import llm_models_root, skip_post_blackwell, skip_pre_hopper from defs.trt_test_alternative import popen, print_error, print_info from openai import OpenAI from requests.exceptions import RequestException @@ -93,6 +93,7 @@ def check_openai_chat_completion(http_port="8000", @skip_pre_hopper +@skip_post_blackwell def test_extra_llm_api_options(serve_test_root): test_configs_root = f"{serve_test_root}/test_configs" config_file = f"{test_configs_root}/Qwen3-30B-A3B-FP8.yml" diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt index bbfc7fbec6c..50161758c86 100644 --- a/tests/integration/test_lists/waives.txt +++ b/tests/integration/test_lists/waives.txt @@ -35,36 +35,21 @@ examples/test_multimodal.py::test_llm_multimodal_general[video-neva-pp:1-tp:1-bf examples/test_whisper.py::test_llm_whisper_general[large-v3-enable_gemm_plugin-enable_attention_plugin-disable_weight_only-float16-nb:1-use_python_runtime] SKIP (https://nvbugs/4866931) examples/test_nemotron.py::test_llm_nemotron_3_8b_1gpu[bfloat16-fp8] SKIP (https://nvbugs/4961624) examples/test_mistral.py::test_llm_mistral_v1_1gpu[mistral-7b-v0.1-float16-max_attention_window_size_4096-chunked_summarization_long] SKIP (https://nvbugs/5321371) -full:B200_PCIe/unittest/trt/functional SKIP (Disable for Blackwell) -full:B200_PCIe/unittest/trt/quantization SKIP (Disable for Blackwell) -full:B200_PCIe/unittest/trt/attention/test_bert_attention.py SKIP (Disable for Blackwell) -full:B200_PCIe/unittest/trt/model/test_mamba.py SKIP (Disable for Blackwell) -full:B200_PCIe/unittest/bindings SKIP (Disable for Blackwell) -full:B200_PCIe/unittest/trt/attention/test_sage_attention.py unittest/llmapi/test_llm_download.py unittest/llmapi/test_llm_kv_cache_events.py unittest/trt/model/redrafter unittest/trt/model/test_phi.py unittest/trt/model/test_unet.py unittest/trt/python_plugin unittest/tools unittest/utils unittest/others SKIP (Disable for Blackwell) -full:B200_PCIe/unittest/trt/quantization/test_weight_only_quant_matmul.py SKIP (Disable for Blackwell) -full:B200_PCIe/unittest/trt/quantization/test_weight_only_groupwise_quant_matmul.py SKIP (Disable for Blackwell) -full:B200_PCIe/unittest/trt/model/test_gpt.py -k "partition0" SKIP (Disable for Blackwell) -full:B200_PCIe/unittest/test_model_runner_cpp.py SKIP (Disable for Blackwell) -full:B200_PCIe/unittest/llmapi/test_llm_models.py -m "part0" SKIP (Disable for Blackwell for context fmha doesn't support when headsize is 80/96) -full:B200_PCIe/examples/test_nemotron.py::test_llm_nemotron_3_8b_1gpu[bfloat16-fp8] SKIP (megatron-core 0.8 is not supported in python 3.12) -full:B200_PCIe/accuracy/test_cli_flow.py::TestMixtral8x7B::test_fp4_plugin SKIP (Disable for Blackwell OOM) -full:B200_PCIe/unittest/llmapi/test_llm_models.py -m "not (part0 or part1)" SKIP (Disable for Blackwell OOM) -full:B200/unittest/trt/functional SKIP (Disable for Blackwell) -full:B200/unittest/trt/quantization SKIP (Disable for Blackwell) -full:B200/unittest/trt/attention/test_bert_attention.py SKIP (Disable for Blackwell) -full:B200/unittest/trt/model/test_mamba.py SKIP (Disable for Blackwell) -full:B200/unittest/bindings SKIP (Disable for Blackwell) -full:B200/unittest/trt/attention/test_sage_attention.py unittest/llmapi/test_llm_download.py unittest/llmapi/test_llm_kv_cache_events.py unittest/trt/model/redrafter unittest/trt/model/test_phi.py unittest/trt/model/test_unet.py unittest/trt/python_plugin unittest/tools unittest/utils unittest/others SKIP (Disable for Blackwell) -full:B200/unittest/trt/quantization/test_weight_only_quant_matmul.py SKIP (Disable for Blackwell) -full:B200/unittest/trt/quantization/test_weight_only_groupwise_quant_matmul.py SKIP (Disable for Blackwell) -full:B200/unittest/trt/model/test_gpt.py -k "partition0" SKIP (Disable for Blackwell) -full:B200/unittest/test_model_runner_cpp.py SKIP (Disable for Blackwell) -full:B200/unittest/llmapi/test_llm_models.py -m "part0" SKIP (Disable for Blackwell for context fmha doesn't support when headsize is 80/96) -full:B200/examples/test_multimodal.py::test_llm_multimodal_general[video-neva-pp:1-tp:1-bfloat16-bs:1-cpp_e2e:False-nb:1] SKIP (megatron-core 0.8 is not supported in python 3.12) -full:B200/examples/test_nemotron.py::test_llm_nemotron_3_8b_1gpu[bfloat16-fp8] SKIP (megatron-core 0.8 is not supported in python 3.12) -full:B200/accuracy/test_cli_flow.py::TestMixtral8x7B::test_fp4_plugin SKIP (Disable for Blackwell OOM) -full:B200/unittest/llmapi/test_llm_models.py -m "not (part0 or part1)" SKIP (Disable for Blackwell OOM) -full:B200/examples/test_mixtral.py::test_llm_mixtral_moe_plugin_fp8_lora_4gpus[Mixtral-8x7B-v0.1-chinese-mixtral-lora] SKIP (https://nvbugs/5064768) +full:sm100/unittest/trt/functional SKIP (Disable for Blackwell) +full:sm100/unittest/trt/quantization SKIP (Disable for Blackwell) +full:sm100/unittest/trt/attention/test_bert_attention.py SKIP (Disable for Blackwell) +full:sm100/unittest/trt/model/test_mamba.py SKIP (Disable for Blackwell) +full:sm100/unittest/bindings SKIP (Disable for Blackwell) +full:sm100/unittest/trt/attention/test_sage_attention.py unittest/llmapi/test_llm_download.py unittest/llmapi/test_llm_kv_cache_events.py unittest/trt/model/redrafter unittest/trt/model/test_phi.py unittest/trt/model/test_unet.py unittest/trt/python_plugin unittest/tools unittest/utils unittest/others SKIP (Disable for Blackwell) +full:sm100/unittest/trt/quantization/test_weight_only_quant_matmul.py SKIP (Disable for Blackwell) +full:sm100/unittest/trt/quantization/test_weight_only_groupwise_quant_matmul.py SKIP (Disable for Blackwell) +full:sm100/unittest/trt/model/test_gpt.py -k "partition0" SKIP (Disable for Blackwell) +full:sm100/unittest/test_model_runner_cpp.py SKIP (Disable for Blackwell) +full:sm100/unittest/llmapi/test_llm_models.py -m "part0" SKIP (Disable for Blackwell for context fmha doesn't support when headsize is 80/96) +full:sm100/examples/test_nemotron.py::test_llm_nemotron_3_8b_1gpu[bfloat16-fp8] SKIP (megatron-core 0.8 is not supported in python 3.12) +full:sm100/accuracy/test_cli_flow.py::TestMixtral8x7B::test_fp4_plugin SKIP (Disable for Blackwell OOM) +full:sm100/unittest/llmapi/test_llm_models.py -m "not (part0 or part1)" SKIP (Disable for Blackwell OOM) +full:sm100/examples/test_multimodal.py::test_llm_multimodal_general[video-neva-pp:1-tp:1-bfloat16-bs:1-cpp_e2e:False-nb:1] SKIP (megatron-core 0.8 is not supported in python 3.12) examples/test_mixtral.py::test_llm_mixtral_moe_plugin_fp8_lora_4gpus[Mixtral-8x7B-v0.1-chinese-mixtral-lora] SKIP (https://nvbugs/5064768) test_e2e.py::test_openai_consistent_chat SKIP (https://nvbugs/5112075) examples/test_eagle.py::test_qwen_eagle_1gpu[qwen_7b_chat-eagle1] SKIP (https://nvbugs/5206383) @@ -420,4 +405,5 @@ accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales_4gpu unittest/_torch/modules/test_fused_moe.py::test_fused_moe_alltoall[DeepEPLowLatency] SKIP (https://nvbugs/5664904) unittest/_torch/modules/test_fused_moe.py::test_fused_moe_alltoall[DeepEP] SKIP (https://nvbugs/5664904) unittest/_torch/modules/test_fused_moe.py::test_fused_moe_alltoall[MNNVL] SKIP (https://nvbugs/5664904) -examples/test_qwen.py::test_llm_qwen_int4_single_gpu_summary[qwen2.5_14b_instruct_int4-nb:4] SKIP (https://nvbugs/5481075) +examples/test_qwen.py::test_llm_qwen_int4_single_gpu_summary[qwen2.5_14b_instruct_int4-nb:4] SKIP (https://nvbugs/5666826) +examples/test_llama.py::test_llm_llama_1gpu_fp4[llama-3.1-70b-instruct-enable_norm_quant_fusion-enable_fused_quant-fp4_plugin-bfloat16] SKIP (https://nvbugs/5451216)