Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions .github/runners/linux/Dockerfile.convertModel
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
FROM nvidia/cuda:12.3.0-devel-ubuntu22.04

RUN apt-get update && apt-get -y install python3.10 python3-pip openmpi-bin libopenmpi-dev git git-lfs

RUN pip3 install tensorrt_llm==0.9.0 --extra-index-url https://pypi.nvidia.com

RUN pip3 install numpy==1.26.4

RUN git clone https://github.com/NVIDIA/TensorRT-LLM.git && cd TensorRT-LLM && git checkout v0.9.0
70 changes: 25 additions & 45 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,13 @@ jobs:
runs-on: "windows-tensorrt-llm-cuda-12-2"
run-e2e: false
s3-key-prefix: "windows-tensorrt-llm-ccache"
sccache-conf-path: 'C:\sccache.conf'
ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache'
- os: "linux"
name: "cuda-12-3"
runs-on: "linux-tensorrt-llm-cuda-12-3"
run-e2e: false
s3-key-prefix: "linux-tensorrt-llm"
sccache-conf-path: '/tmp/sccache.conf'
s3-key-prefix: "linux-tensorrt-llm-0-9-0-cuda-12-3"
ccache-dir: "/home/runner/.ccache"
permissions:
contents: write
steps:
Expand All @@ -60,51 +60,34 @@ jobs:
submodules: recursive
lfs: true

- name: Install choco on Windows
- name: Install tools on Windows
if: runner.os == 'Windows'
run: |
choco install make pkgconfiglite ccache awscli -y
choco install make pkgconfiglite ccache awscli 7zip -y
Import-Module "$env:ChocolateyInstall\helpers\chocolateyProfile.psm1"
refreshenv

- name: create sccache.conf file Linux
if: runner.os == 'Linux'
run: |
echo "[cache.s3]" > ${{ matrix.sccache-conf-path }}
echo 'bucket = "${{ secrets.MINIO_BUCKET_NAME }}"' >> ${{ matrix.sccache-conf-path }}
echo 'endpoint = "${{ secrets.MINIO_ENDPOINT }}"' >> ${{ matrix.sccache-conf-path }}
echo 'key_prefix = "${{ matrix.s3-key-prefix }}"' >> ${{ matrix.sccache-conf-path }}
echo 'use_ssl = false' >> ${{ matrix.sccache-conf-path }}
echo 'server_side_encryption = false' >> ${{ matrix.sccache-conf-path }}
echo 'no_credentials = false' >> ${{ matrix.sccache-conf-path }}

- name: Download ccache from s3
continue-on-error: true
if: runner.os == 'Windows'
continue-on-error: true
run: |
Import-Module "$env:ChocolateyInstall\helpers\chocolateyProfile.psm1"
refreshenv
aws s3 cp s3://${{ secrets.MINIO_BUCKET_NAME }}/${{ matrix.s3-key-prefix }} C:\Users\ContainerAdministrator\AppData\Local\ccache --recursive --endpoint ${{ secrets.MINIO_ENDPOINT }}
aws s3 cp s3://${{ secrets.MINIO_BUCKET_NAME }}/${{ matrix.s3-key-prefix }} ${{ matrix.ccache-dir }} --recursive --endpoint ${{ secrets.MINIO_ENDPOINT }}
env:
AWS_ACCESS_KEY_ID: "${{ secrets.MINIO_ACCESS_KEY_ID }}"
AWS_SECRET_ACCESS_KEY: "${{ secrets.MINIO_SECRET_ACCESS_KEY }}"
AWS_DEFAULT_REGION: "${{ secrets.MINIO_REGION }}"

- name: start sccache server for linux
- name: Download ccache from s3
if: runner.os == 'Linux'
working-directory: cpp
continue-on-error: true
run: |
sccache --start-server
aws s3 cp s3://${{ secrets.MINIO_BUCKET_NAME }}/${{ matrix.s3-key-prefix }} ${{ matrix.ccache-dir }} --recursive --endpoint ${{ secrets.MINIO_ENDPOINT }}
env:
SCCACHE_BUCKET: "${{ secrets.MINIO_BUCKET_NAME }}"
SCCACHE_REGION: "${{ secrets.MINIO_REGION }}"
SCCACHE_ENDPOINT: "${{ secrets.MINIO_ENDPOINT }}"
SCCACHE_S3_USE_SSL: "false"
SCCACHE_S3_SERVER_SIDE_ENCRYPTION: "false"
SCCACHE_S3_KEY_PREFIX: "${{ matrix.s3-key-prefix }}"
SCCACHE_LOG: "debug"
SCCACHE_CONF: '${{ matrix.sccache-conf-path }}'
AWS_ACCESS_KEY_ID: "${{ secrets.MINIO_ACCESS_KEY_ID }}"
AWS_SECRET_ACCESS_KEY: "${{ secrets.MINIO_SECRET_ACCESS_KEY }}"
SCCACHE_IDLE_TIMEOUT: "0"
AWS_DEFAULT_REGION: "${{ secrets.MINIO_REGION }}"

- name: Build Dependencies
working-directory: cpp
Expand Down Expand Up @@ -141,25 +124,29 @@ jobs:
asset_path: cpp/tensorrt_llm/cortex.tensorrt-llm/cortex.tensorrt-llm.tar.gz
asset_name: cortex.tensorrt-llm-${{ needs.create-draft-release.outputs.version }}-${{ matrix.os }}-${{ matrix.name }}.tar.gz
asset_content_type: application/gzip

- name: Clean
if: always()
continue-on-error: true
run: |
sccache --stop-server
rm ${{ matrix.sccache-conf-path }}

- name: Upload ccache to s3
continue-on-error: true
if: always() && runner.os == 'Windows'
run: |
Import-Module "$env:ChocolateyInstall\helpers\chocolateyProfile.psm1"
refreshenv
aws s3 cp C:\Users\ContainerAdministrator\AppData\Local\ccache s3://${{ secrets.MINIO_BUCKET_NAME }}/${{ matrix.s3-key-prefix }} --recursive --endpoint ${{ secrets.MINIO_ENDPOINT }}
aws s3 cp ${{ matrix.ccache-dir }} s3://${{ secrets.MINIO_BUCKET_NAME }}/${{ matrix.s3-key-prefix }} --recursive --endpoint ${{ secrets.MINIO_ENDPOINT }}
env:
AWS_ACCESS_KEY_ID: "${{ secrets.MINIO_ACCESS_KEY_ID }}"
AWS_SECRET_ACCESS_KEY: "${{ secrets.MINIO_SECRET_ACCESS_KEY }}"
AWS_DEFAULT_REGION: "${{ secrets.MINIO_REGION }}"

- name: Upload ccache to s3
continue-on-error: true
if: always() && runner.os == 'Linux'
run: |
aws s3 cp ${{ matrix.ccache-dir }} s3://${{ secrets.MINIO_BUCKET_NAME }}/${{ matrix.s3-key-prefix }} --recursive --endpoint ${{ secrets.MINIO_ENDPOINT }}
env:
AWS_ACCESS_KEY_ID: "${{ secrets.MINIO_ACCESS_KEY_ID }}"
AWS_SECRET_ACCESS_KEY: "${{ secrets.MINIO_SECRET_ACCESS_KEY }}"
AWS_DEFAULT_REGION: "${{ secrets.MINIO_REGION }}"

update_release_draft:
needs: [build-and-test]
permissions:
Expand All @@ -183,10 +170,3 @@ jobs:
# disable-autolabeler: true
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

- name: Clean
if: always()
continue-on-error: true
run: |
sccache --stop-server
rm ${{ matrix.sccache-conf-path }}
62 changes: 23 additions & 39 deletions .github/workflows/quality-gate.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,13 @@ jobs:
runs-on: "windows-tensorrt-llm-cuda-12-2"
run-e2e: false
s3-key-prefix: "windows-tensorrt-llm-ccache"
sccache-conf-path: 'C:\sccache.conf'
ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache'
- os: "linux"
name: "cuda-12-3"
runs-on: "linux-tensorrt-llm-cuda-12-3"
run-e2e: false
s3-key-prefix: "linux-tensorrt-llm"
sccache-conf-path: '/tmp/sccache.conf'
s3-key-prefix: "linux-tensorrt-llm-0-9-0-cuda-12-3"
ccache-dir: "/home/runner/.ccache"
permissions:
contents: write
steps:
Expand All @@ -35,51 +35,34 @@ jobs:
submodules: recursive
lfs: true

- name: Install choco on Windows
- name: Install tools on Windows
if: runner.os == 'Windows'
run: |
choco install make pkgconfiglite ccache awscli -y
choco install make pkgconfiglite ccache awscli 7zip -y
Import-Module "$env:ChocolateyInstall\helpers\chocolateyProfile.psm1"
refreshenv

- name: create sccache.conf file Linux
if: runner.os == 'Linux'
run: |
echo "[cache.s3]" > ${{ matrix.sccache-conf-path }}
echo 'bucket = "${{ secrets.MINIO_BUCKET_NAME }}"' >> ${{ matrix.sccache-conf-path }}
echo 'endpoint = "${{ secrets.MINIO_ENDPOINT }}"' >> ${{ matrix.sccache-conf-path }}
echo 'key_prefix = "${{ matrix.s3-key-prefix }}"' >> ${{ matrix.sccache-conf-path }}
echo 'use_ssl = false' >> ${{ matrix.sccache-conf-path }}
echo 'server_side_encryption = false' >> ${{ matrix.sccache-conf-path }}
echo 'no_credentials = false' >> ${{ matrix.sccache-conf-path }}

- name: Download ccache from s3
continue-on-error: true
if: runner.os == 'Windows'
continue-on-error: true
run: |
Import-Module "$env:ChocolateyInstall\helpers\chocolateyProfile.psm1"
refreshenv
aws s3 cp s3://${{ secrets.MINIO_BUCKET_NAME }}/${{ matrix.s3-key-prefix }} C:\Users\ContainerAdministrator\AppData\Local\ccache --recursive --endpoint ${{ secrets.MINIO_ENDPOINT }}
aws s3 cp s3://${{ secrets.MINIO_BUCKET_NAME }}/${{ matrix.s3-key-prefix }} ${{ matrix.ccache-dir }} --recursive --endpoint ${{ secrets.MINIO_ENDPOINT }}
env:
AWS_ACCESS_KEY_ID: "${{ secrets.MINIO_ACCESS_KEY_ID }}"
AWS_SECRET_ACCESS_KEY: "${{ secrets.MINIO_SECRET_ACCESS_KEY }}"
AWS_DEFAULT_REGION: "${{ secrets.MINIO_REGION }}"

- name: start sccache server for linux
- name: Download ccache from s3
if: runner.os == 'Linux'
working-directory: cpp
continue-on-error: true
run: |
sccache --start-server
aws s3 cp s3://${{ secrets.MINIO_BUCKET_NAME }}/${{ matrix.s3-key-prefix }} ${{ matrix.ccache-dir }} --recursive --endpoint ${{ secrets.MINIO_ENDPOINT }}
env:
SCCACHE_BUCKET: "${{ secrets.MINIO_BUCKET_NAME }}"
SCCACHE_REGION: "${{ secrets.MINIO_REGION }}"
SCCACHE_ENDPOINT: "${{ secrets.MINIO_ENDPOINT }}"
SCCACHE_S3_USE_SSL: "false"
SCCACHE_S3_SERVER_SIDE_ENCRYPTION: "false"
SCCACHE_S3_KEY_PREFIX: "${{ matrix.s3-key-prefix }}"
SCCACHE_LOG: "debug"
SCCACHE_CONF: '${{ matrix.sccache-conf-path }}'
AWS_ACCESS_KEY_ID: "${{ secrets.MINIO_ACCESS_KEY_ID }}"
AWS_SECRET_ACCESS_KEY: "${{ secrets.MINIO_SECRET_ACCESS_KEY }}"
SCCACHE_IDLE_TIMEOUT: "0"
AWS_DEFAULT_REGION: "${{ secrets.MINIO_REGION }}"

- name: Build Dependencies
working-directory: cpp
Expand All @@ -106,24 +89,25 @@ jobs:
with:
name: cortex.tensorrt-llm-${{ matrix.os }}-${{ matrix.name }}
path: cpp/tensorrt_llm/cortex.tensorrt-llm/cortex.tensorrt-llm.tar.gz

- name: Clean
if: always()
continue-on-error: true
run: |
sccache --stop-server
rm ${{ matrix.sccache-conf-path }}

- name: Upload ccache to s3
continue-on-error: true
if: always() && runner.os == 'Windows'
run: |
Import-Module "$env:ChocolateyInstall\helpers\chocolateyProfile.psm1"
refreshenv
aws s3 cp C:\Users\ContainerAdministrator\AppData\Local\ccache s3://${{ secrets.MINIO_BUCKET_NAME }}/${{ matrix.s3-key-prefix }} --recursive --endpoint ${{ secrets.MINIO_ENDPOINT }}
aws s3 cp ${{ matrix.ccache-dir }} s3://${{ secrets.MINIO_BUCKET_NAME }}/${{ matrix.s3-key-prefix }} --recursive --endpoint ${{ secrets.MINIO_ENDPOINT }}
env:
AWS_ACCESS_KEY_ID: "${{ secrets.MINIO_ACCESS_KEY_ID }}"
AWS_SECRET_ACCESS_KEY: "${{ secrets.MINIO_SECRET_ACCESS_KEY }}"
AWS_DEFAULT_REGION: "${{ secrets.MINIO_REGION }}"


- name: Upload ccache to s3
continue-on-error: true
if: always() && runner.os == 'Linux'
run: |
aws s3 cp ${{ matrix.ccache-dir }} s3://${{ secrets.MINIO_BUCKET_NAME }}/${{ matrix.s3-key-prefix }} --recursive --endpoint ${{ secrets.MINIO_ENDPOINT }}
env:
AWS_ACCESS_KEY_ID: "${{ secrets.MINIO_ACCESS_KEY_ID }}"
AWS_SECRET_ACCESS_KEY: "${{ secrets.MINIO_SECRET_ACCESS_KEY }}"
AWS_DEFAULT_REGION: "${{ secrets.MINIO_REGION }}"
13 changes: 5 additions & 8 deletions cpp/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ build-deps:
ifeq ($(OS),Windows_NT)
@powershell -Command "cd tensorrt_llm/cortex.tensorrt-llm; cmake -S ./third-party -B ./build_deps/third-party -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE=Release -DCMAKE_OBJECT_PATH_MAX=500; cmake --build ./build_deps/third-party --config Release -j8;"
else
@cd tensorrt_llm/cortex.tensorrt-llm && cmake -S ./third-party -B ./build_deps/third-party -DCMAKE_BUILD_TYPE=Release -DCMAKE_OBJECT_PATH_MAX=500 && make -C ./build_deps/third-party -j 10 && rm -rf ./build_deps/third-party;
@cd tensorrt_llm/cortex.tensorrt-llm && cmake -S ./third-party -B ./build_deps/third-party -DCMAKE_BUILD_TYPE=Release && make -C ./build_deps/third-party -j 10 && rm -rf ./build_deps/third-party;
endif

# Build the Cortex engine
Expand All @@ -27,9 +27,8 @@ ifeq ($(OS),Windows_NT)
@powershell -Command "cd ..; python .\scripts\build_wheel.py -a '80-real;86-real;89-real' --trt_root 'C:\workspace\TensorRT-9.3.0.1\' -D 'BUILD_CORTEX_TENSORRT-LLM=ON' --use_ccache"
@powershell -Command "cd build; cmake .. -DCMAKE_CUDA_ARCHITECTURES='80-real;86-real;89-real' -DTRT_LIB_DIR='C:/workspace/TensorRT-9.3.0.1/lib' -DTRT_INCLUDE_DIR='C:/workspace/TensorRT-9.3.0.1/include' -DBUILD_CORTEX_TENSORRT-LLM=ON -DCMAKE_CUDA_COMPILER='C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.2/bin/nvcc.exe' -DENABLE_MULTI_DEVICE=0 -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -G Ninja; cmake --build . --parallel 2 --config Release"
else
@mkdir -p build && cd build; \
cmake .. -GNinja -DBUILD_CORTEX_TENSORRT-LLM=ON -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CUDA_COMPILER_LAUNCHER=sccache -DCMAKE_BUILD_TYPE='Release' -DBUILD_PYT='OFF' -DBUILD_PYBIND='OFF' -DNVTX_DISABLE='ON' -DCMAKE_CUDA_ARCHITECTURES='80-real;86-real;89-real' '-DENABLE_MULTI_DEVICE=0' '-DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc' -DBUILD_BENCHMARKS=OFF '-DBUILD_TESTS=OFF' -DTRT_LIB_DIR=/usr/local/tensorrt/lib -DTRT_INCLUDE_DIR=/usr/local/tensorrt/include; \
cmake --build . --config Release;
@cd .. && python3 ./scripts/build_wheel.py --trt_root /usr/local/tensorrt --cuda_architectures "80-real;86-real;89-real" --extra-cmake-vars "BUILD_CORTEX_TENSORRT-LLM=ON" --use_ccache
@cd build && cmake .. -DCMAKE_CUDA_ARCHITECTURES='80-real;86-real;89-real' -DTRT_LIB_DIR='/usr/local/tensorrt/lib' -DTRT_INCLUDE_DIR='/usr/local/tensorrt/include' -DBUILD_CORTEX_TENSORRT-LLM=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -G Ninja && cmake --build . --config Release;
endif

# Prepackage the Cortex engine
Expand All @@ -41,19 +40,17 @@ ifeq ($(OS),Windows_NT)
@powershell -Command "cd tensorrt_llm\cortex.tensorrt-llm\; cp -Force C:\workspace\cuDNN\bin\cudnn_ops_infer64_8.dll cortex.tensorrt-llm\;"
@powershell -Command "cd tensorrt_llm\cortex.tensorrt-llm\; cp -Force ..\..\build\tensorrt_llm\plugins\nvinfer_plugin_tensorrt_llm.dll cortex.tensorrt-llm\;"
@powershell -Command "cd tensorrt_llm\cortex.tensorrt-llm\; cp -Force ..\..\build\tensorrt_llm\tensorrt_llm.dll cortex.tensorrt-llm\;"
@powershell -Command "cd tensorrt_llm\cortex.tensorrt-llm\; cp -Force C:\Windows\System32\msmpi.dll cortex.tensorrt-llm\;"
@powershell -Command "cd tensorrt_llm\cortex.tensorrt-llm\; cp -Force .\build_deps\_install\bin\zlib.dll cortex.tensorrt-llm\;"
else
cd ./tensorrt_llm/cortex.tensorrt-llm && \
mkdir -p cortex.tensorrt-llm && \
cp ../../build/tensorrt_llm/cortex.tensorrt-llm/libengine.$(shell uname | tr '[:upper:]' '[:lower:]' | sed 's/darwin/dylib/;s/linux/so/') cortex.tensorrt-llm && \
cp ../../build/tensorrt_llm/cortex.tensorrt-llm/libengine.so cortex.tensorrt-llm && \
cp /usr/local/tensorrt/targets/x86_64-linux-gnu/lib/libnvinfer.so.9 cortex.tensorrt-llm && \
cp /usr/lib/x86_64-linux-gnu/libcudnn.so.8 cortex.tensorrt-llm && \
cp /usr/lib/x86_64-linux-gnu/libcudnn_ops_infer.so.8 cortex.tensorrt-llm && \
cp /home/runner/actions-runner/_work/cortex.tensorrt-llm/cortex.tensorrt-llm/cpp/build/tensorrt_llm/plugins/libnvinfer_plugin_tensorrt_llm.so.9 cortex.tensorrt-llm && \
cp /home/runner/actions-runner/_work/cortex.tensorrt-llm/cortex.tensorrt-llm/cpp/build/tensorrt_llm/libtensorrt_llm.so cortex.tensorrt-llm && \
cp /opt/hpcx/ompi/lib/libmpi.so.40 cortex.tensorrt-llm && \
cp /usr/lib/x86_64-linux-gnu/libnccl.so cortex.tensorrt-llm
cp /usr/lib/x86_64-linux-gnu/libnccl.so cortex.tensorrt-llm/libnccl.so.2
endif

codesign:
Expand Down
2 changes: 1 addition & 1 deletion scripts/build_wheel.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def main(build_type: str = "Release",
if cuda_architectures is not None else "")

cmake_def_args = []
cmake_generator = ""
cmake_generator = "-GNinja"

hardware_arch = platform.machine()

Expand Down