pytorch · cehongwang · Jun 30, 2025 · Jun 30, 2025 · Jun 30, 2025 · Jul 2, 2025
diff --git a/.bazelrc b/.bazelrc
@@ -28,6 +28,7 @@ build:linux --cxxopt="-fdiagnostics-color=always"
 
 build:windows --cxxopt="/GS-" --cxxopt="/std:c++17" --cxxopt="/permissive-"
 build:windows --cxxopt="/wd4244" --cxxopt="/wd4267" --cxxopt="/wd4819"
+build:windows --cxxopt="/utf-8"
 build:windows --features=windows_export_all_symbols
 
 build:python --define=target_lang=python

diff --git a/.github/scripts/generate-tensorrt-test-matrix.py b/.github/scripts/generate-tensorrt-test-matrix.py
@@ -52,6 +52,10 @@
             "urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.11.0/zip/TensorRT-10.11.0.33.Windows.win10.cuda-12.9.zip",
             "strip_prefix": "TensorRT-10.11.0.33",
         },
+        "10.12.0": {
+            "urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.12.0/zip/TensorRT-10.12.0.36.Windows.win10.cuda-12.9.zip",
+            "strip_prefix": "TensorRT-10.12.0.36",
+        },
     },
     "linux": {
         "10.3.0": {
@@ -78,6 +82,10 @@
             "urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.11.0/tars/TensorRT-10.11.0.33.Linux.x86_64-gnu.cuda-12.9.tar.gz",
             "strip_prefix": "TensorRT-10.11.0.33",
         },
+        "10.12.0": {
+            "urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.12.0/tars/TensorRT-10.12.0.36.Linux.x86_64-gnu.cuda-12.9.tar.gz",
+            "strip_prefix": "TensorRT-10.12.0.36",
+        },
     },
 }
 

diff --git a/.github/workflows/build-test-linux-aarch64-jetpack.yml b/.github/workflows/build-test-linux-aarch64-jetpack.yml
@@ -1,16 +1,17 @@
 name: Build and test Linux aarch64 wheels for Jetpack
 
 on:
-  pull_request:
-  push:
-    branches:
-      - main
-      - nightly
-      - release/*
-    tags:
-      # NOTE: Binary build pipelines should only get triggered on release candidate builds
-      # Release candidate tags look like: v1.11.0-rc1
-      - v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+
+  # TODO: Uncomment this when we have a stable release
+  # pull_request:
+  # push:
+  #   branches:
+  #     - main
+  #     - nightly
+  #     - release/*
+  #   tags:
+  #     # NOTE: Binary build pipelines should only get triggered on release candidate builds
+  #     # Release candidate tags look like: v1.11.0-rc1
+  #     - v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+
   workflow_dispatch:
 
 jobs:

diff --git a/.github/workflows/build-test-tensorrt-windows.yml b/.github/workflows/build-test-tensorrt-windows.yml
@@ -254,9 +254,9 @@ jobs:
         pushd .
         cd tests/py
         cd dynamo
-        python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_test_results.xml --ignore runtime/test_002_cudagraphs_py.py --ignore runtime/test_002_cudagraphs_cpp.py runtime/
+        ../../../packaging/vc_env_helper.bat python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_test_results.xml --ignore runtime/test_002_cudagraphs_py.py --ignore runtime/test_002_cudagraphs_cpp.py runtime/
         python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_partitioning_test_results.xml partitioning/
-        python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_lowering_test_results.xml lowering/
+        ../../../packaging/vc_env_helper.bat python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_lowering_test_results.xml lowering/
         popd
 
   tests-py-dynamo-cudagraphs:

diff --git a/.github/workflows/build-test-windows.yml b/.github/workflows/build-test-windows.yml
@@ -230,7 +230,7 @@ jobs:
         cd dynamo
         python -m pytest -ra -n 10 --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_compile_be_test_results.xml backend/
         python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_complete_be_e2e_test_results.xml --ir torch_compile models/test_models.py
-        python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_compile_dyn_models_export.xml --ir torch_compile models/test_dyn_models.py
+        ../../../packaging/vc_env_helper.bat python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_compile_dyn_models_export.xml --ir torch_compile models/test_dyn_models.py
         popd
 
   tests-py-dynamo-core:
@@ -258,9 +258,9 @@ jobs:
         pushd .
         cd tests/py
         cd dynamo
-        python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_test_results.xml --ignore runtime/test_002_cudagraphs_py.py --ignore runtime/test_002_cudagraphs_cpp.py runtime/
+        ../../../packaging/vc_env_helper.bat python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_test_results.xml --ignore runtime/test_002_cudagraphs_py.py --ignore runtime/test_002_cudagraphs_cpp.py runtime/
         python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_partitioning_test_results.xml partitioning/
-        python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_lowering_test_results.xml lowering/
+        ../../../packaging/vc_env_helper.bat python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_lowering_test_results.xml lowering/
         popd
 
   tests-py-dynamo-cudagraphs:

diff --git a/.github/workflows/docgen.yml b/.github/workflows/docgen.yml
@@ -14,12 +14,12 @@ jobs:
         if: ${{ ! contains(github.actor, 'pytorchbot') }}
         environment: pytorchbot-env
         container:
-            image: docker.io/pytorch/manylinux2_28-builder:cuda12.8
+            image: docker.io/pytorch/manylinux2_28-builder:cuda12.9
             options: --gpus all
         env:
-            CUDA_HOME: /usr/local/cuda-12.8
-            VERSION_SUFFIX: cu128
-            CU_VERSION: cu128
+            CUDA_HOME: /usr/local/cuda-12.9
+            VERSION_SUFFIX: cu129
+            CU_VERSION: cu129
             CHANNEL: nightly
             CI_BUILD: 1
         steps:
@@ -35,14 +35,14 @@ jobs:
             - name: Install base deps
               run: |
                   python3 -m pip install pip --upgrade
-                  python3 -m pip install pyyaml numpy torch --pre --extra-index-url https://download.pytorch.org/whl/nightly/cu128
+                  python3 -m pip install pyyaml numpy torch --pre --extra-index-url https://download.pytorch.org/whl/nightly/cu129
                   ./packaging/pre_build_script.sh
             - name: Get HEAD SHA
               id: vars
               run: echo "sha=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT
             - name: Build Python Package
               run: |
-                  python3 -m pip install --pre . --extra-index-url https://download.pytorch.org/whl/nightly/cu128
+                  python3 -m pip install --pre . --extra-index-url https://download.pytorch.org/whl/nightly/cu129
             - name: Generate New Docs
               run: |
                   cd docsrc

diff --git a/MODULE.bazel b/MODULE.bazel
@@ -1,6 +1,6 @@
 module(
     name = "torch_tensorrt",
-    version = "2.8.0a0",
+    version = "2.9.0a0",
     repo_name = "org_pytorch_tensorrt",
 )
 
@@ -103,18 +103,18 @@ http_archive(
 http_archive(
     name = "tensorrt",
     build_file = "@//third_party/tensorrt/archive:BUILD",
-    strip_prefix = "TensorRT-10.11.0.33",
+    strip_prefix = "TensorRT-10.12.0.36",
     urls = [
-        "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.11.0/tars/TensorRT-10.11.0.33.Linux.x86_64-gnu.cuda-12.9.tar.gz",
+        "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.12.0/tars/TensorRT-10.12.0.36.Linux.x86_64-gnu.cuda-12.9.tar.gz",
     ],
 )
 
 http_archive(
     name = "tensorrt_sbsa",
     build_file = "@//third_party/tensorrt/archive:BUILD",
-    strip_prefix = "TensorRT-10.11.0.33",
+    strip_prefix = "TensorRT-10.12.0.36",
     urls = [
-        "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.11.0/tars/TensorRT-10.11.0.33.Linux.aarch64-gnu.cuda-12.9.tar.gz",
+        "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.12.0/tars/TensorRT-10.12.0.36.Linux.aarch64-gnu.cuda-12.9.tar.gz",
     ],
 )
 
@@ -130,9 +130,9 @@ http_archive(
 http_archive(
     name = "tensorrt_win",
     build_file = "@//third_party/tensorrt/archive:BUILD",
-    strip_prefix = "TensorRT-10.11.0.33",
+    strip_prefix = "TensorRT-10.12.0.36",
     urls = [
-        "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.11.0/zip/TensorRT-10.11.0.33.Windows.win10.cuda-12.9.zip",
+        "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.12.0/zip/TensorRT-10.12.0.36.Windows.win10.cuda-12.9.zip",
     ],
 )
 

diff --git a/cpp/include/torch_tensorrt/macros.h b/cpp/include/torch_tensorrt/macros.h
@@ -24,7 +24,7 @@
 #define STR(x) XSTR(x)
 
 #define TORCH_TENSORRT_MAJOR_VERSION 2
-#define TORCH_TENSORRT_MINOR_VERSION 6
+#define TORCH_TENSORRT_MINOR_VERSION 9
 #define TORCH_TENSORRT_PATCH_VERSION 0
 #define TORCH_TENSORRT_VERSION      \
   STR(TORCH_TENSORRT_MAJOR_VERSION) \

diff --git a/dev_dep_versions.yml b/dev_dep_versions.yml
@@ -1,2 +1,2 @@
 __cuda_version__: "12.8"
-__tensorrt_version__: "10.11.0"
+__tensorrt_version__: "10.12.0"
diff --git a/docker/Dockerfile b/docker/Dockerfile
@@ -2,9 +2,9 @@
 
 # Base image starts with CUDA
 #TODO: cuda version
-ARG BASE_IMG=nvidia/cuda:12.8.0-devel-ubuntu22.04
+ARG BASE_IMG=nvidia/cuda:12.9.0-devel-ubuntu22.04
 FROM ${BASE_IMG} as base
-ENV BASE_IMG=nvidia/cuda:12.8.0-devel-ubuntu22.04
+ENV BASE_IMG=nvidia/cuda:12.9.0-devel-ubuntu22.04
 
 ARG TENSORRT_VERSION
 ENV TENSORRT_VERSION=${TENSORRT_VERSION}

diff --git a/docker/README.md b/docker/README.md
@@ -15,14 +15,14 @@
 
 ### Instructions
 
-- The example below uses TensorRT 10.11.0.33
+- The example below uses TensorRT 10.12.0.36
 - See <a href="https://github.com/pytorch/TensorRT#dependencies">dependencies</a> for a list of current default dependencies.
 
 > From root of Torch-TensorRT repo
 
 Build:
 ```
-DOCKER_BUILDKIT=1 docker build --build-arg TENSORRT_VERSION=10.11.0 -f docker/Dockerfile -t torch_tensorrt:latest .
+DOCKER_BUILDKIT=1 docker build --build-arg TENSORRT_VERSION=10.12.0 -f docker/Dockerfile -t torch_tensorrt:latest .
 ```
 
 Run:

diff --git a/docker/dist-build.sh b/docker/dist-build.sh
@@ -4,7 +4,7 @@ set -x
 
 TOP_DIR=$(cd $(dirname $0); pwd)/..
 
-BUILD_CMD="python -m pip wheel .  --extra-index-url https://download.pytorch.org/whl/nightly/cu128 -w dist"
+BUILD_CMD="python -m pip wheel .  --extra-index-url https://download.pytorch.org/whl/nightly/cu129 -w dist"
 
 # TensorRT restricts our pip version
 cd ${TOP_DIR} \

diff --git a/docs/_cpp_api/classtorch__tensorrt_1_1DataType.html b/docs/_cpp_api/classtorch__tensorrt_1_1DataType.html
@@ -10,7 +10,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
 
-  <title>Class DataType &mdash; Torch-TensorRT v2.8.0.dev0+ee32da0 documentation</title>
+  <title>Class DataType &mdash; Torch-TensorRT v2.9.0.dev0+92a6908 documentation</title>
 
 
 
@@ -293,7 +293,7 @@
 
 
                 <div class="version">
-                  v2.8.0.dev0+ee32da0
+                  v2.9.0.dev0+92a6908
                 </div>
 
 
@@ -324,7 +324,7 @@
               <p class="caption" role="heading"><span class="caption-text">Getting Started</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../getting_started/installation.html">Installation</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../getting_started/jetpack.html">Overview</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../getting_started/jetpack.html">Torch-TensorRT in JetPack</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../getting_started/quick_start.html">Quick Start</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">User Guide</span></p>
@@ -376,9 +376,8 @@
 <li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/torch_compile_resnet_example.html">Compiling ResNet with dynamic shapes using the <cite>torch.compile</cite> backend</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/torch_compile_transformers_example.html">Compiling BERT using the <cite>torch.compile</cite> backend</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/torch_compile_stable_diffusion.html">Compiling Stable Diffusion model using the <cite>torch.compile</cite> backend</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../tutorials/compile_hf_models.html">Compiling LLM models from Huggingface</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/torch_compile_gpt2.html">Compiling GPT2 using the Torch-TensorRT <code class="docutils literal notranslate"><span class="pre">torch.compile</span></code> frontend</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/torch_export_gpt2.html">Compiling GPT2 using the dynamo backend</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/torch_export_llama2.html">Compiling Llama2 using the dynamo backend</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/torch_export_sam2.html">Compiling SAM2 using the dynamo backend</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/torch_export_flux_dev.html">Compiling FLUX.1-dev model using the Torch-TensorRT dynamo backend</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../tutorials/notebooks.html">Legacy notebooks</a></li>

diff --git a/docs/_cpp_api/classtorch__tensorrt_1_1Device_1_1DeviceType.html b/docs/_cpp_api/classtorch__tensorrt_1_1Device_1_1DeviceType.html
@@ -10,7 +10,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
 
-  <title>Class Device::DeviceType &mdash; Torch-TensorRT v2.8.0.dev0+ee32da0 documentation</title>
+  <title>Class Device::DeviceType &mdash; Torch-TensorRT v2.9.0.dev0+92a6908 documentation</title>
 
 
 
@@ -293,7 +293,7 @@
 
 
                 <div class="version">
-                  v2.8.0.dev0+ee32da0
+                  v2.9.0.dev0+92a6908
                 </div>
 
 
@@ -324,7 +324,7 @@
               <p class="caption" role="heading"><span class="caption-text">Getting Started</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../getting_started/installation.html">Installation</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../getting_started/jetpack.html">Overview</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../getting_started/jetpack.html">Torch-TensorRT in JetPack</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../getting_started/quick_start.html">Quick Start</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">User Guide</span></p>
@@ -376,9 +376,8 @@
 <li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/torch_compile_resnet_example.html">Compiling ResNet with dynamic shapes using the <cite>torch.compile</cite> backend</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/torch_compile_transformers_example.html">Compiling BERT using the <cite>torch.compile</cite> backend</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/torch_compile_stable_diffusion.html">Compiling Stable Diffusion model using the <cite>torch.compile</cite> backend</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../tutorials/compile_hf_models.html">Compiling LLM models from Huggingface</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/torch_compile_gpt2.html">Compiling GPT2 using the Torch-TensorRT <code class="docutils literal notranslate"><span class="pre">torch.compile</span></code> frontend</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/torch_export_gpt2.html">Compiling GPT2 using the dynamo backend</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/torch_export_llama2.html">Compiling Llama2 using the dynamo backend</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/torch_export_sam2.html">Compiling SAM2 using the dynamo backend</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/torch_export_flux_dev.html">Compiling FLUX.1-dev model using the Torch-TensorRT dynamo backend</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../tutorials/notebooks.html">Legacy notebooks</a></li>

diff --git a/docs/_cpp_api/classtorch__tensorrt_1_1TensorFormat.html b/docs/_cpp_api/classtorch__tensorrt_1_1TensorFormat.html
@@ -10,7 +10,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
 
-  <title>Class TensorFormat &mdash; Torch-TensorRT v2.8.0.dev0+ee32da0 documentation</title>
+  <title>Class TensorFormat &mdash; Torch-TensorRT v2.9.0.dev0+92a6908 documentation</title>
 
 
 
@@ -293,7 +293,7 @@
 
 
                 <div class="version">
-                  v2.8.0.dev0+ee32da0
+                  v2.9.0.dev0+92a6908
                 </div>
 
 
@@ -324,7 +324,7 @@
               <p class="caption" role="heading"><span class="caption-text">Getting Started</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../getting_started/installation.html">Installation</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../getting_started/jetpack.html">Overview</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../getting_started/jetpack.html">Torch-TensorRT in JetPack</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../getting_started/quick_start.html">Quick Start</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">User Guide</span></p>
@@ -376,9 +376,8 @@
 <li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/torch_compile_resnet_example.html">Compiling ResNet with dynamic shapes using the <cite>torch.compile</cite> backend</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/torch_compile_transformers_example.html">Compiling BERT using the <cite>torch.compile</cite> backend</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/torch_compile_stable_diffusion.html">Compiling Stable Diffusion model using the <cite>torch.compile</cite> backend</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../tutorials/compile_hf_models.html">Compiling LLM models from Huggingface</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/torch_compile_gpt2.html">Compiling GPT2 using the Torch-TensorRT <code class="docutils literal notranslate"><span class="pre">torch.compile</span></code> frontend</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/torch_export_gpt2.html">Compiling GPT2 using the dynamo backend</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/torch_export_llama2.html">Compiling Llama2 using the dynamo backend</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/torch_export_sam2.html">Compiling SAM2 using the dynamo backend</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/torch_export_flux_dev.html">Compiling FLUX.1-dev model using the Torch-TensorRT dynamo backend</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../tutorials/notebooks.html">Legacy notebooks</a></li>