Skip to content

Commit 0e0532e

Browse files
authored
Merge branch 'main' into dev-stsun-update-doc
2 parents b75a097 + 2854f0c commit 0e0532e

File tree

15 files changed

+129
-18
lines changed

15 files changed

+129
-18
lines changed

.github/CODEOWNERS

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,23 @@ docs/source/performance/perf-benchmarking.md @NVIDIA/trtllm-bench-reviewers
152152
/cpp/tensorrt_llm/batch_manager/dataTransceiverImpl.h @NVIDIA/trt-llm-disagg-devs
153153
/tensorrt_llm/serve/openai_disagg_server.py @NVIDIA/trt-llm-disagg-devs
154154

155+
## TensorRT-LLM - KV Cache Manager
156+
/cpp/tensorrt_llm/batch_manager/kvCacheManager.cpp @NVIDIA/trt-llm-kv-cache-manager-devs
157+
/cpp/tensorrt_llm/batch_manager/kvCacheEventManager.cpp @NVIDIA/trt-llm-kv-cache-manager-devs
158+
/cpp/tensorrt_llm/batch_manager/kvCacheTransferManager.cpp @NVIDIA/trt-llm-kv-cache-manager-devs
159+
/cpp/tensorrt_llm/batch_manager/evictionPolicy.cpp @NVIDIA/trt-llm-kv-cache-manager-devs
160+
/cpp/include/tensorrt_llm/batch_manager/kvCacheManager.h @NVIDIA/trt-llm-kv-cache-manager-devs
161+
/cpp/include/tensorrt_llm/batch_manager/kvCacheEventManager.h @NVIDIA/trt-llm-kv-cache-manager-devs
162+
/cpp/include/tensorrt_llm/batch_manager/kvCacheTransferManager.h @NVIDIA/trt-llm-kv-cache-manager-devs
163+
/cpp/include/tensorrt_llm/batch_manager/evictionPolicy.h @NVIDIA/trt-llm-kv-cache-manager-devs
164+
/cpp/tensorrt_llm/batch_manager/allocateKvCache.cpp @NVIDIA/trt-llm-kv-cache-manager-devs
165+
/cpp/tests/unit_tests/batch_manager/kvCacheManagerTest.cpp @NVIDIA/trt-llm-kv-cache-manager-devs
166+
/cpp/tests/unit_tests/batch_manager/kvCacheUtilsTest.cpp @NVIDIA/trt-llm-kv-cache-manager-devs
167+
/tensorrt_llm/_torch/pyexecutor/resource_manager.py @NVIDIA/trt-llm-kv-cache-manager-devs
168+
/cpp/tensorrt_llm/nanobind/batch_manager/kvCacheManager.h @NVIDIA/trt-llm-kv-cache-manager-devs
169+
/cpp/tensorrt_llm/nanobind/batch_manager/kvCacheManager.cpp @NVIDIA/trt-llm-kv-cache-manager-devs
170+
/cpp/tensorrt_llm/pybind/batch_manager/kvCacheManager.h @NVIDIA/trt-llm-kv-cache-manager-devs
171+
/cpp/tensorrt_llm/pybind/batch_manager/kvCacheManager.cpp @NVIDIA/trt-llm-kv-cache-manager-devs
155172

156173
# The rule below requires that any PR modifying public APIs must be approved by at least one member
157174
# of the NVIDIA/trt-llm-committed-api-review-committee or NVIDIA/trt-llm-noncommitted-api-review-committee team.

.github/workflows/blossom-ci.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,7 @@ jobs:
210210
"omera-nv",
211211
"pamelap-nvidia",
212212
"pcastonguay",
213+
"pdrake-nv",
213214
"peaceh-nv",
214215
"pengbowang-nv",
215216
"PerkzZheng",

docker/Dockerfile.multi

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,7 @@
22
ARG BASE_IMAGE=nvcr.io/nvidia/pytorch
33
ARG TRITON_IMAGE=nvcr.io/nvidia/tritonserver
44
ARG BASE_TAG=25.10-py3
5-
# [TODO] Update to NVIDIA Triton 25.10 when it's available
6-
ARG TRITON_BASE_TAG=25.09-py3
5+
ARG TRITON_BASE_TAG=25.10-py3
76
ARG DEVEL_IMAGE=devel
87

98
FROM ${BASE_IMAGE}:${BASE_TAG} AS base

jenkins/Build.groovy

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -454,7 +454,7 @@ def runLLMBuild(pipeline, buildFlags, tarName, is_linux_x86_64)
454454
def llmPath = sh (script: "realpath ${LLM_ROOT}",returnStdout: true).trim()
455455
// TODO: Remove after the cmake version is upgraded to 3.31.8
456456
// Get triton tag from docker/dockerfile.multi
457-
def tritonShortTag = "r25.09"
457+
def tritonShortTag = "r25.10"
458458
sh "cd ${LLM_ROOT}/triton_backend/inflight_batcher_llm && mkdir build && cd build && cmake .. -DTRTLLM_DIR=${llmPath} -DTRITON_COMMON_REPO_TAG=${tritonShortTag} -DTRITON_CORE_REPO_TAG=${tritonShortTag} -DTRITON_THIRD_PARTY_REPO_TAG=${tritonShortTag} -DTRITON_BACKEND_REPO_TAG=${tritonShortTag} -DUSE_CXX11_ABI=ON && make -j${buildJobs} install"
459459

460460
// Step 3: packaging wheels into tarfile

jenkins/GenerateLock.groovy

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ def generate()
8181
echo "No update that needs to be checked in"
8282
} else {
8383
sh "git status"
84-
sh "git add \$(find . -type f \\( -name 'poetry.lock' -o -name 'pyproject.toml' \\))"
84+
sh "git add \$(find . -type f \\( -name 'poetry.lock' -o -name 'pyproject.toml' -o -name 'metadata.json' \\))"
8585
sh "git commit -s -m \"[None][infra] Check in most recent lock file from nightly pipeline\""
8686
withCredentials([string(credentialsId: CREDENTIAL_ID, variable: 'API_TOKEN')]) {
8787
def authedUrl = LLM_REPO.replaceFirst('https://', "https://svc_tensorrt:${API_TOKEN}@")
@@ -111,6 +111,12 @@ pipeline {
111111
timestamps()
112112
}
113113

114+
triggers {
115+
parameterizedCron('''
116+
H 2 * * * %branchName=main;repoUrlKey=tensorrt_llm_github
117+
''')
118+
}
119+
114120
stages {
115121
stage("Generating Poetry Locks"){
116122
agent {

jenkins/L0_Test.groovy

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1527,7 +1527,7 @@ def launchTestListCheck(pipeline)
15271527
sh "tar -zxf ${tarName}"
15281528
def llmPath = sh (script: "realpath .", returnStdout: true).trim()
15291529
def llmSrc = "${llmPath}/TensorRT-LLM/src"
1530-
sh "NVIDIA_TRITON_SERVER_VERSION=25.09 LLM_ROOT=${llmSrc} LLM_BACKEND_ROOT=${llmSrc}/triton_backend python3 ${llmSrc}/scripts/check_test_list.py --l0 --qa --waive"
1530+
sh "NVIDIA_TRITON_SERVER_VERSION=25.10 LLM_ROOT=${llmSrc} LLM_BACKEND_ROOT=${llmSrc}/triton_backend python3 ${llmSrc}/scripts/check_test_list.py --l0 --qa --waive"
15311531
} catch (InterruptedException e) {
15321532
throw e
15331533
} catch (Exception e) {

jenkins/current_image_tags.properties

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
# images are adopted from PostMerge pipelines, the abbreviated commit hash is used instead.
1414
IMAGE_NAME=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm
1515

16-
LLM_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.10-py3-x86_64-ubuntu24.04-trt10.13.3.9-skip-tritondevel-202511110140-8447
17-
LLM_SBSA_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.10-py3-aarch64-ubuntu24.04-trt10.13.3.9-skip-tritondevel-202511110140-8447
18-
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.0.2-devel-rocky8-x86_64-rocky8-py310-trt10.13.3.9-skip-tritondevel-202511110140-8447
19-
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.0.2-devel-rocky8-x86_64-rocky8-py312-trt10.13.3.9-skip-tritondevel-202511110140-8447
16+
LLM_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.10-py3-x86_64-ubuntu24.04-trt10.13.3.9-skip-tritondevel-202511131803-8929
17+
LLM_SBSA_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.10-py3-aarch64-ubuntu24.04-trt10.13.3.9-skip-tritondevel-202511131803-8929
18+
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.0.2-devel-rocky8-x86_64-rocky8-py310-trt10.13.3.9-skip-tritondevel-202511131803-8929
19+
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.0.2-devel-rocky8-x86_64-rocky8-py312-trt10.13.3.9-skip-tritondevel-202511131803-8929

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ pandas
1919
h5py==3.12.1
2020
StrEnum
2121
sentencepiece>=0.1.99
22-
tensorrt~=10.13.0
22+
tensorrt~=10.13.3
2323
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-10.html#rel-25-10 uses 2.9.0a0.
2424
torch>=2.9.0a0,<=2.9.0
2525
torchvision

scripts/generate_lock_file.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,13 @@
2727
"""
2828

2929
import argparse
30+
import json
3031
import os
3132
import re
3233
import shutil
3334
import subprocess
3435
import sys
36+
from datetime import datetime, timezone
3537
from pathlib import Path
3638

3739
sys.path.insert(0, os.getcwd())
@@ -74,6 +76,24 @@ def get_project_info(path: str):
7476
return {"name": name, "version": version}
7577

7678

79+
def generate_metadata_json():
80+
try:
81+
commit_hash = subprocess.check_output(["git", "rev-parse", "HEAD"],
82+
text=True).strip()
83+
except subprocess.CalledProcessError as e:
84+
print(f"Error retrieving git commit hash: {e}")
85+
raise
86+
87+
data = {
88+
"commit_hash": commit_hash,
89+
"timestamp": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
90+
}
91+
with open(f"{FOLDER_SECURITY_SCANNING}/metadata.json",
92+
"w",
93+
encoding="utf-8") as f:
94+
json.dump(data, f, indent=2)
95+
96+
7797
if __name__ == "__main__":
7898
parser = argparse.ArgumentParser(
7999
description="Lock files generator",
@@ -93,6 +113,7 @@ def get_project_info(path: str):
93113
if os.path.exists(FOLDER_SECURITY_SCANNING):
94114
shutil.rmtree(FOLDER_SECURITY_SCANNING)
95115
os.mkdir(FOLDER_SECURITY_SCANNING)
116+
generate_metadata_json()
96117

97118
# generate pyproject.toml and poetry.lock files in the same location
98119
for path in paths:

security_scanning/metadata.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
{
2+
"commit_hash": "05b5336ab6135e368157600da1d16b090ee9a00a",
3+
"timestamp": "2025-11-14T18:16:21Z"
4+
}

0 commit comments

Comments
 (0)