From 7c8f2b08984475a37e3577fc4dfeae53d8a57b19 Mon Sep 17 00:00:00 2001 From: Chenghao Zhang <211069071+nvchenghaoz@users.noreply.github.com> Date: Wed, 12 Nov 2025 15:19:43 -0800 Subject: [PATCH] Support fp8 kv cache Signed-off-by: Chenghao Zhang <211069071+nvchenghaoz@users.noreply.github.com> --- .../auto_deploy/custom_ops/mamba/cuda_backend_causal_conv.py | 2 +- .../auto_deploy/custom_ops/mamba/torch_backend_causal_conv.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorrt_llm/_torch/auto_deploy/custom_ops/mamba/cuda_backend_causal_conv.py b/tensorrt_llm/_torch/auto_deploy/custom_ops/mamba/cuda_backend_causal_conv.py index 0803375847f..dfe39ce08dd 100644 --- a/tensorrt_llm/_torch/auto_deploy/custom_ops/mamba/cuda_backend_causal_conv.py +++ b/tensorrt_llm/_torch/auto_deploy/custom_ops/mamba/cuda_backend_causal_conv.py @@ -283,7 +283,7 @@ def _get_conv_cache(si: SequenceInfo): in_channels, max(1, kernel_size - 1), device=si.device, - dtype=cache_config.dtype or inp_fake.dtype, + dtype=inp_fake.dtype, ) return {"conv_state_cache": _get_conv_cache} diff --git a/tensorrt_llm/_torch/auto_deploy/custom_ops/mamba/torch_backend_causal_conv.py b/tensorrt_llm/_torch/auto_deploy/custom_ops/mamba/torch_backend_causal_conv.py index 6f0059d250d..67913a324c0 100644 --- a/tensorrt_llm/_torch/auto_deploy/custom_ops/mamba/torch_backend_causal_conv.py +++ b/tensorrt_llm/_torch/auto_deploy/custom_ops/mamba/torch_backend_causal_conv.py @@ -341,7 +341,7 @@ def _get_conv_cache(si: SequenceInfo): in_channels, kernel_size, device=si.device, - dtype=cache_config.dtype or inp_fake.dtype, + dtype=inp_fake.dtype, ) return {"conv_state_cache": _get_conv_cache}