pytorch · lanluo-nvidia · Sep 26, 2025 · Sep 24, 2025 · Sep 24, 2025 · Sep 24, 2025
diff --git a/.github/workflows/build-test-linux-x86_64.yml b/.github/workflows/build-test-linux-x86_64.yml
@@ -177,6 +177,7 @@ jobs:
         cd tests/py
         cd dynamo
         python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dyn_models_export.xml --ir dynamo models/
+        python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dyn_models_llm.xml llm/
         popd
 
   tests-py-dynamo-serde:

diff --git a/.github/workflows/build-test-linux-x86_64_rtx.yml b/.github/workflows/build-test-linux-x86_64_rtx.yml
@@ -141,6 +141,7 @@ jobs:
         cd tests/py
         cd dynamo
         python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dyn_models_export.xml --ir dynamo models/
+        python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dyn_models_llm.xml llm/
         popd
 
   tests-py-dynamo-serde:

diff --git a/.github/workflows/build-test-windows.yml b/.github/workflows/build-test-windows.yml
@@ -138,6 +138,8 @@ jobs:
       pre-script: packaging/driver_upgrade.bat
       script: |
         set -euo pipefail
+        nvidia-smi
+        nvcc --version
         export USE_HOST_DEPS=1
         export CI_BUILD=1
         pushd .
@@ -172,6 +174,7 @@ jobs:
         cd tests/py
         cd dynamo
         python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dyn_models_export.xml --ir dynamo models/
+        python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dyn_models_llm.xml llm/
         popd
 
   tests-py-dynamo-serde:

diff --git a/.github/workflows/build-test-windows_rtx.yml b/.github/workflows/build-test-windows_rtx.yml
@@ -143,6 +143,7 @@ jobs:
         cd tests/py
         cd dynamo
         python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dyn_models_export.xml --ir dynamo models/
+        python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dyn_models_llm.xml llm/
         popd
 
   tests-py-dynamo-serde:

diff --git a/.github/workflows/build_windows.yml b/.github/workflows/build_windows.yml
@@ -288,6 +288,7 @@ jobs:
           BUILD_PARAMS: ${{ inputs.wheel-build-params }}
         run: |
           source "${BUILD_ENV_FILE}"
+
           if [[ ${{ inputs.is-release-wheel }} == true || ${{ inputs.is-release-tarball }} == true ]]; then
             # release version for upload to pypi
             # BUILD_VERSION example: 2.4.0+cu121, we don't want the +cu121 part, so remove +cu121
@@ -346,18 +347,24 @@ jobs:
           source "${BUILD_ENV_FILE}"
           WHEEL_NAME=$(ls "${{ inputs.repository }}/dist/")
           echo "$WHEEL_NAME"
+          set -x
+          nvidia-smi
+          nvcc --version
+          ${CONDA_RUN} python -m pip list
+
           ${CONDA_RUN} pip install "${{ inputs.repository }}/dist/$WHEEL_NAME"
           if [[ $USE_TRT_RTX == true ]]; then
             # TODO: lan to remove this once we have a better way to do a smoke test
             echo "Smoke test for TensorRT-RTX is not skipped for now"
           else
-            if [[ ! -f "${{ inputs.repository }}"/${SMOKE_TEST_SCRIPT} ]]; then
-              echo "${{ inputs.repository }}/${SMOKE_TEST_SCRIPT} not found"
-              ${CONDA_RUN} "${{ inputs.repository }}/${ENV_SCRIPT}" python -c "import ${PACKAGE_NAME}; print('package version is ', ${PACKAGE_NAME}.__version__)"
-            else
-              echo "${{ inputs.repository }}/${SMOKE_TEST_SCRIPT} found"
-              ${CONDA_RUN} "${{ inputs.repository }}/${ENV_SCRIPT}" python "${{ inputs.repository }}/${SMOKE_TEST_SCRIPT}"
-            fi
+            echo "Skip smoke test for windows."
+            # if [[ ! -f "${{ inputs.repository }}"/${SMOKE_TEST_SCRIPT} ]]; then
+            #   echo "${{ inputs.repository }}/${SMOKE_TEST_SCRIPT} not found"
+            #   ${CONDA_RUN} "${{ inputs.repository }}/${ENV_SCRIPT}" python -c "import ${PACKAGE_NAME}; print('package version is ', ${PACKAGE_NAME}.__version__)"
+            # else
+            #   echo "${{ inputs.repository }}/${SMOKE_TEST_SCRIPT} found"
+            #   ${CONDA_RUN} "${{ inputs.repository }}/${ENV_SCRIPT}" python "${{ inputs.repository }}/${SMOKE_TEST_SCRIPT}"
+            # fi
           fi
       - name: Smoke Test ARM64
         if: inputs.architecture == 'arm64'

diff --git a/MODULE.bazel b/MODULE.bazel
@@ -101,9 +101,9 @@ http_archive(
 http_archive(
     name = "tensorrt",
     build_file = "@//third_party/tensorrt/archive:BUILD",
-    strip_prefix = "TensorRT-10.13.2.6",
+    strip_prefix = "TensorRT-10.13.3.9",
     urls = [
-        "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.13.2/tars/TensorRT-10.13.2.6.Linux.x86_64-gnu.cuda-12.9.tar.gz",
+        "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.13.3/tars/TensorRT-10.13.3.9.Linux.x86_64-gnu.cuda-13.0.tar.gz",
     ],
 )
 
@@ -119,9 +119,9 @@ http_archive(
 http_archive(
     name = "tensorrt_sbsa",
     build_file = "@//third_party/tensorrt/archive:BUILD",
-    strip_prefix = "TensorRT-10.13.2.6",
+    strip_prefix = "TensorRT-10.13.3.9",
     urls = [
-        "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.13.2/tars/TensorRT-10.13.2.6.Linux.aarch64-gnu.cuda-13.0.tar.gz",
+        "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.13.3/tars/TensorRT-10.13.3.9.Linux.aarch64-gnu.cuda-13.0.tar.gz",
     ],
 )
 
@@ -137,9 +137,9 @@ http_archive(
 http_archive(
     name = "tensorrt_win",
     build_file = "@//third_party/tensorrt/archive:BUILD",
-    strip_prefix = "TensorRT-10.13.2.6",
+    strip_prefix = "TensorRT-10.13.3.9",
     urls = [
-        "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.13.2/zip/TensorRT-10.13.2.6.Windows.win10.cuda-12.9.zip",
+        "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.13.3/zip/TensorRT-10.13.3.9.Windows.win10.cuda-13.0.zip",
     ],
 )
 

diff --git a/dev_dep_versions.yml b/dev_dep_versions.yml
@@ -1,3 +1,3 @@
 __cuda_version__: "12.8"
-__tensorrt_version__: "10.12.0"
+__tensorrt_version__: "10.13.3"
 __tensorrt_rtx_version__: "1.0.0"
diff --git a/packaging/driver_upgrade.bat b/packaging/driver_upgrade.bat
@@ -1,9 +1,9 @@
-set WIN_DRIVER_VN=528.89
-set "DRIVER_DOWNLOAD_LINK=https://ossci-windows.s3.amazonaws.com/%WIN_DRIVER_VN%-data-center-tesla-desktop-winserver-2016-2019-2022-dch-international.exe"
-curl --retry 3 -kL %DRIVER_DOWNLOAD_LINK% --output %WIN_DRIVER_VN%-data-center-tesla-desktop-winserver-2016-2019-2022-dch-international.exe
+set WIN_DRIVER_VN=580.88
+set "DRIVER_DOWNLOAD_LINK=https://ossci-windows.s3.amazonaws.com/%WIN_DRIVER_VN%-data-center-tesla-desktop-win10-win11-64bit-dch-international.exe" & REM @lint-ignore
+curl --retry 3 -kL %DRIVER_DOWNLOAD_LINK% --output %WIN_DRIVER_VN%-data-center-tesla-desktop-win10-win11-64bit-dch-international.exe
 if errorlevel 1 exit /b 1
 
-start /wait %WIN_DRIVER_VN%-data-center-tesla-desktop-winserver-2016-2019-2022-dch-international.exe -s -noreboot
+start /wait %WIN_DRIVER_VN%-data-center-tesla-desktop-win10-win11-64bit-dch-international.exe -s -noreboot
 if errorlevel 1 exit /b 1
 
-del %WIN_DRIVER_VN%-data-center-tesla-desktop-winserver-2016-2019-2022-dch-international.exe || ver > NUL
+del %WIN_DRIVER_VN%-data-center-tesla-desktop-win10-win11-64bit-dch-international.exe || ver > NUL
diff --git a/packaging/pre_build_script.sh b/packaging/pre_build_script.sh
@@ -59,15 +59,13 @@ fi
 export TORCH_BUILD_NUMBER=$(python -c "import torch, urllib.parse as ul; print(ul.quote_plus(torch.__version__))")
 export TORCH_INSTALL_PATH=$(python -c "import torch, os; print(os.path.dirname(torch.__file__))")
 
-if [[ ${TENSORRT_VERSION} != "" ]]; then
-  # Replace dependencies in the original pyproject.toml with the current TensorRT version. It is used for CI tests of different TensorRT versions.
-  # For example, if the current testing TensorRT version is 10.7.0, but the pyproject.toml tensorrt>=10.8.0,<10.9.0, then the following sed command
-  # will replace tensorrt>=10.8.0,<10.9.0 with tensorrt==10.7.0
-  sed -i -e "s/tensorrt>=.*,<.*\"/tensorrt>=${TENSORRT_VERSION},<$(echo "${TENSORRT_VERSION}" | awk -F. '{print $1"."$2+1".0"}')\"/g" \
-         -e "s/tensorrt-cu12>=.*,<.*\"/tensorrt-cu12>=${TENSORRT_VERSION},<$(echo "${TENSORRT_VERSION}" | awk -F. '{print $1"."$2+1".0"}')\"/g" \
-         -e "s/tensorrt-cu12-bindings>=.*,<.*\"/tensorrt-cu12-bindings>=${TENSORRT_VERSION},<$(echo "${TENSORRT_VERSION}" | awk -F. '{print $1"."$2+1".0"}')\"/g" \
-         -e "s/tensorrt-cu12-libs>=.*,<.*\"/tensorrt-cu12-libs>=${TENSORRT_VERSION},<$(echo "${TENSORRT_VERSION}" | awk -F. '{print $1"."$2+1".0"}')\"/g" \
-         pyproject.toml
+# CU_UPPERBOUND eg:13.0 or 12.9
+# tensorrt tar for linux and windows are different across cuda version
+# for sbsa it is the same tar across cuda version
+if [[ ${CU_VERSION:2:2} == "13" ]]; then
+    export CU_UPPERBOUND="13.0"
+else
+    export CU_UPPERBOUND="12.9"
 fi
 
 cat toolchains/ci_workspaces/MODULE.bazel.tmpl | envsubst > MODULE.bazel

diff --git a/packaging/pre_build_script_windows.sh b/packaging/pre_build_script_windows.sh
@@ -27,6 +27,14 @@ pip install --force-reinstall --pre ${TORCH} --index-url ${INDEX_URL}
 export CUDA_HOME="$(echo ${CUDA_PATH} | sed -e 's#\\#\/#g')"
 export TORCH_INSTALL_PATH="$(python -c "import torch, os; print(os.path.dirname(torch.__file__))" | sed -e 's#\\#\/#g')"
 
+# CU_UPPERBOUND eg:13.0 or 12.9
+# tensorrt tar for linux and windows are different across cuda version
+# for sbsa it is the same tar across cuda version
+if [[ ${CU_VERSION:2:2} == "13" ]]; then
+    export CU_UPPERBOUND="13.0"
+else
+    export CU_UPPERBOUND="12.9"
+fi
 cat toolchains/ci_workspaces/MODULE.bazel.tmpl | envsubst > MODULE.bazel
 
 if [[ ${TENSORRT_VERSION} != "" ]]; then

diff --git a/tests/py/dynamo/models/test_llm_models.py → tests/py/dynamo/llm/test_llm_models.py b/tests/py/dynamo/models/test_llm_models.py → tests/py/dynamo/llm/test_llm_models.py
@@ -16,6 +16,8 @@
 @pytest.mark.unit
 @pytest.mark.parametrize("precision", ["FP16", "BF16", "FP32"])
 def test_llm_decoder_layer(precision):
+    if torch_tensorrt.ENABLED_FEATURES.tensorrt_rtx and precision == "BF16":
+        pytest.skip("TensorRT-RTX does not support bfloat16, skipping test")
 
     with torch.inference_mode():
         args = argparse.Namespace()

diff --git a/tests/py/dynamo/models/test_models.py b/tests/py/dynamo/models/test_models.py
@@ -182,7 +182,7 @@ def test_mobilenet_v2(ir, dtype):
 )
 def test_efficientnet_b0(ir, dtype):
     if torchtrt.ENABLED_FEATURES.tensorrt_rtx and dtype == torch.bfloat16:
-        pytest.skip("TensorRT-RTX does not support bfloat16")
+        pytest.skip("TensorRT-RTX does not support bfloat16, skipping test")
 
     model = (
         timm.create_model("efficientnet_b0", pretrained=True)

diff --git a/toolchains/ci_workspaces/MODULE.bazel.tmpl b/toolchains/ci_workspaces/MODULE.bazel.tmpl
@@ -75,9 +75,9 @@ http_archive = use_repo_rule("@bazel_tools//tools/build_defs/repo:http.bzl", "ht
 http_archive(
     name = "tensorrt",
     build_file = "@//third_party/tensorrt/archive:BUILD",
-    strip_prefix = "TensorRT-10.13.2.6",
+    strip_prefix = "TensorRT-10.13.3.9",
     urls = [
-        "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.13.2/tars/TensorRT-10.13.2.6.Linux.x86_64-gnu.cuda-12.9.tar.gz",
+        "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.13.3/tars/TensorRT-10.13.3.9.Linux.x86_64-gnu.cuda-${CU_UPPERBOUND}.tar.gz",
     ],
 )
 
@@ -93,9 +93,9 @@ http_archive(
 http_archive(
     name = "tensorrt_sbsa",
     build_file = "@//third_party/tensorrt/archive:BUILD",
-    strip_prefix = "TensorRT-10.13.2.6",
+    strip_prefix = "TensorRT-10.13.3.9",
     urls = [
-        "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.13.2/tars/TensorRT-10.13.2.6.Linux.aarch64-gnu.cuda-13.0.tar.gz",
+        "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.13.3/tars/TensorRT-10.13.3.9.Linux.aarch64-gnu.cuda-13.0.tar.gz",
     ],
 )
 
@@ -111,9 +111,9 @@ http_archive(
 http_archive(
     name = "tensorrt_win",
     build_file = "@//third_party/tensorrt/archive:BUILD",
-    strip_prefix = "TensorRT-10.13.2.6",
+    strip_prefix = "TensorRT-10.13.3.9",
     urls = [
-        "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.13.2/zip/TensorRT-10.13.2.6.Windows.win10.cuda-12.9.zip",
+        "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.13.3/zip/TensorRT-10.13.3.9.Windows.win10.cuda-${CU_UPPERBOUND}.zip",
     ],
 )