change tests, add to CI

govind-ramnarayan · govind-ramnarayan · commit c6e07ed51cdc · 2025-11-19T23:30:41.000-08:00
Signed-off-by: Govind Ramnarayan &lt;105831528+govind-ramnarayan@users.noreply.github.com&gt;
diff --git a/tests/integration/defs/examples/test_ad_speculative_decoding.py b/tests/integration/defs/examples/test_ad_speculative_decoding.py
@@ -69,7 +69,7 @@ def run_with_autodeploy(model, speculative_model_dir, batch_size):
 
     # Configure KV cache
     kv_cache_config = KvCacheConfig(
-        free_gpu_memory_fraction=0.2,
+        free_gpu_memory_fraction=0.1,
     )
 
     # Configure AutoDeploy LLM arguments
diff --git a/tests/integration/test_lists/test-db/l0_h100.yml b/tests/integration/test_lists/test-db/l0_h100.yml
@@ -107,6 +107,8 @@ l0_h100:
   - accuracy/test_llm_api_autodeploy.py::TestLlama3_1_8B::test_auto_dtype[True-1]
   - accuracy/test_llm_api_autodeploy.py::TestNemotronH::test_auto_dtype[False]
   - accuracy/test_llm_api_autodeploy.py::TestNemotronH::test_auto_dtype[True]
+  - examples/test_ad_speculative_decoding.py::test_autodeploy_spec_dec[1]
+  - examples/test_ad_speculative_decoding.py::test_autodeploy_spec_dec[4]
 - condition:
     ranges:
       system_gpu_count:
diff --git a/tests/unittest/_torch/auto_deploy/unit/singlegpu/test_ad_speculative_decoding.py b/tests/unittest/_torch/auto_deploy/unit/singlegpu/test_ad_speculative_decoding.py
@@ -41,7 +41,7 @@ def test_ad_speculative_decoding_smoke():
 
     # Configure KV cache
     kv_cache_config = KvCacheConfig(
-        free_gpu_memory_fraction=0.1,
+        free_gpu_memory_fraction=0.0001,
     )
 
     experiment_config["args"]["runtime"] = "trtllm"

Original file line number	Diff line number	Diff line change
`@@ -69,7 +69,7 @@ def run_with_autodeploy(model, speculative_model_dir, batch_size):`
`69`	`69`
`70`	`70`	`# Configure KV cache`
`71`	`71`	`kv_cache_config = KvCacheConfig(`
`72`		`- free_gpu_memory_fraction=0.2,`
	`72`	`+ free_gpu_memory_fraction=0.1,`
`73`	`73`	`)`
`74`	`74`
`75`	`75`	`# Configure AutoDeploy LLM arguments`
Original file line number	Diff line number	Diff line change
`@@ -41,7 +41,7 @@ def test_ad_speculative_decoding_smoke():`
`41`	`41`
`42`	`42`	`# Configure KV cache`
`43`	`43`	`kv_cache_config = KvCacheConfig(`
`44`		`- free_gpu_memory_fraction=0.1,`
	`44`	`+ free_gpu_memory_fraction=0.0001,`
`45`	`45`	`)`
`46`	`46`
`47`	`47`	`experiment_config["args"]["runtime"] = "trtllm"`