From 6c8451902263b8d3ebf8c436bafa290d6296539d Mon Sep 17 00:00:00 2001
From: Hien To <tominhhien97@gmail.com>
Date: Mon, 24 Jun 2024 00:43:49 +0700
Subject: [PATCH] Linux add compile python lib for convert model

---
 .github/runners/linux/Dockerfile.convertModel |  9 +++
 .github/workflows/build.yml                   | 70 +++++++------------
 .github/workflows/quality-gate.yml            | 62 ++++++----------
 cpp/Makefile                                  | 13 ++--
 scripts/build_wheel.py                        |  2 +-
 5 files changed, 63 insertions(+), 93 deletions(-)
 create mode 100644 .github/runners/linux/Dockerfile.convertModel

diff --git a/.github/runners/linux/Dockerfile.convertModel b/.github/runners/linux/Dockerfile.convertModel
new file mode 100644
index 00000000000..a35e71dd9b6
--- /dev/null
+++ b/.github/runners/linux/Dockerfile.convertModel
@@ -0,0 +1,9 @@
+FROM nvidia/cuda:12.3.0-devel-ubuntu22.04
+
+RUN apt-get update && apt-get -y install python3.10 python3-pip openmpi-bin libopenmpi-dev git git-lfs
+
+RUN pip3 install tensorrt_llm==0.9.0 --extra-index-url https://pypi.nvidia.com
+
+RUN pip3 install numpy==1.26.4
+
+RUN git clone https://github.com/NVIDIA/TensorRT-LLM.git && cd TensorRT-LLM && git checkout v0.9.0
\ No newline at end of file
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index f9fd1e0d3c6..5b29e0843bb 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -43,13 +43,13 @@ jobs:
             runs-on: "windows-tensorrt-llm-cuda-12-2"
             run-e2e: false
             s3-key-prefix: "windows-tensorrt-llm-ccache"
-            sccache-conf-path: 'C:\sccache.conf'
+            ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache'
           - os: "linux"
             name: "cuda-12-3"
             runs-on: "linux-tensorrt-llm-cuda-12-3"
             run-e2e: false
-            s3-key-prefix: "linux-tensorrt-llm"
-            sccache-conf-path: '/tmp/sccache.conf'
+            s3-key-prefix: "linux-tensorrt-llm-0-9-0-cuda-12-3"
+            ccache-dir: "/home/runner/.ccache"
     permissions:
       contents: write
     steps:
@@ -60,51 +60,34 @@ jobs:
           submodules: recursive
           lfs: true
 
-      - name: Install choco on Windows
+      - name: Install tools on Windows
         if: runner.os == 'Windows'
         run: |
-          choco install make pkgconfiglite ccache awscli -y
+          choco install make pkgconfiglite ccache awscli 7zip -y
+          Import-Module "$env:ChocolateyInstall\helpers\chocolateyProfile.psm1"
+          refreshenv
 
-      - name: create sccache.conf file Linux
-        if: runner.os == 'Linux'
-        run: |
-          echo "[cache.s3]" > ${{ matrix.sccache-conf-path }}
-          echo 'bucket = "${{ secrets.MINIO_BUCKET_NAME }}"' >> ${{ matrix.sccache-conf-path }}
-          echo 'endpoint = "${{ secrets.MINIO_ENDPOINT }}"' >> ${{ matrix.sccache-conf-path }}
-          echo 'key_prefix = "${{ matrix.s3-key-prefix }}"' >> ${{ matrix.sccache-conf-path }}
-          echo 'use_ssl = false' >> ${{ matrix.sccache-conf-path }}
-          echo 'server_side_encryption = false' >> ${{ matrix.sccache-conf-path }}
-          echo 'no_credentials = false' >> ${{ matrix.sccache-conf-path }}
-      
       - name: Download ccache from s3
-        continue-on-error: true
         if: runner.os == 'Windows'
+        continue-on-error: true
         run: |
           Import-Module "$env:ChocolateyInstall\helpers\chocolateyProfile.psm1"
           refreshenv
-          aws s3 cp s3://${{ secrets.MINIO_BUCKET_NAME }}/${{ matrix.s3-key-prefix }} C:\Users\ContainerAdministrator\AppData\Local\ccache --recursive --endpoint ${{ secrets.MINIO_ENDPOINT }}
+          aws s3 cp s3://${{ secrets.MINIO_BUCKET_NAME }}/${{ matrix.s3-key-prefix }} ${{ matrix.ccache-dir }} --recursive --endpoint ${{ secrets.MINIO_ENDPOINT }}
         env:
           AWS_ACCESS_KEY_ID: "${{ secrets.MINIO_ACCESS_KEY_ID }}"
           AWS_SECRET_ACCESS_KEY: "${{ secrets.MINIO_SECRET_ACCESS_KEY }}"
           AWS_DEFAULT_REGION: "${{ secrets.MINIO_REGION }}"
 
-      - name: start sccache server for linux
+      - name: Download ccache from s3
         if: runner.os == 'Linux'
-        working-directory: cpp
+        continue-on-error: true
         run: |
-          sccache --start-server
+          aws s3 cp s3://${{ secrets.MINIO_BUCKET_NAME }}/${{ matrix.s3-key-prefix }} ${{ matrix.ccache-dir }} --recursive --endpoint ${{ secrets.MINIO_ENDPOINT }}
         env:
-          SCCACHE_BUCKET: "${{ secrets.MINIO_BUCKET_NAME }}"
-          SCCACHE_REGION: "${{ secrets.MINIO_REGION }}"
-          SCCACHE_ENDPOINT: "${{ secrets.MINIO_ENDPOINT }}"
-          SCCACHE_S3_USE_SSL: "false"
-          SCCACHE_S3_SERVER_SIDE_ENCRYPTION: "false"
-          SCCACHE_S3_KEY_PREFIX: "${{ matrix.s3-key-prefix }}"
-          SCCACHE_LOG: "debug"
-          SCCACHE_CONF: '${{ matrix.sccache-conf-path }}'
           AWS_ACCESS_KEY_ID: "${{ secrets.MINIO_ACCESS_KEY_ID }}"
           AWS_SECRET_ACCESS_KEY: "${{ secrets.MINIO_SECRET_ACCESS_KEY }}"
-          SCCACHE_IDLE_TIMEOUT: "0"
+          AWS_DEFAULT_REGION: "${{ secrets.MINIO_REGION }}"
 
       - name: Build Dependencies
         working-directory: cpp
@@ -141,13 +124,6 @@ jobs:
           asset_path: cpp/tensorrt_llm/cortex.tensorrt-llm/cortex.tensorrt-llm.tar.gz
           asset_name: cortex.tensorrt-llm-${{ needs.create-draft-release.outputs.version }}-${{ matrix.os }}-${{ matrix.name }}.tar.gz
           asset_content_type: application/gzip
-      
-      - name: Clean
-        if: always()
-        continue-on-error: true
-        run: |
-          sccache --stop-server
-          rm ${{ matrix.sccache-conf-path }}
 
       - name: Upload ccache to s3
         continue-on-error: true
@@ -155,11 +131,22 @@ jobs:
         run: |
           Import-Module "$env:ChocolateyInstall\helpers\chocolateyProfile.psm1"
           refreshenv
-          aws s3 cp C:\Users\ContainerAdministrator\AppData\Local\ccache s3://${{ secrets.MINIO_BUCKET_NAME }}/${{ matrix.s3-key-prefix }} --recursive --endpoint ${{ secrets.MINIO_ENDPOINT }}
+          aws s3 cp ${{ matrix.ccache-dir }} s3://${{ secrets.MINIO_BUCKET_NAME }}/${{ matrix.s3-key-prefix }} --recursive --endpoint ${{ secrets.MINIO_ENDPOINT }}
         env:
           AWS_ACCESS_KEY_ID: "${{ secrets.MINIO_ACCESS_KEY_ID }}"
           AWS_SECRET_ACCESS_KEY: "${{ secrets.MINIO_SECRET_ACCESS_KEY }}"
           AWS_DEFAULT_REGION: "${{ secrets.MINIO_REGION }}"
+
+      - name: Upload ccache to s3
+        continue-on-error: true
+        if: always() && runner.os == 'Linux'
+        run: |
+          aws s3 cp ${{ matrix.ccache-dir }} s3://${{ secrets.MINIO_BUCKET_NAME }}/${{ matrix.s3-key-prefix }} --recursive --endpoint ${{ secrets.MINIO_ENDPOINT }}
+        env:
+          AWS_ACCESS_KEY_ID: "${{ secrets.MINIO_ACCESS_KEY_ID }}"
+          AWS_SECRET_ACCESS_KEY: "${{ secrets.MINIO_SECRET_ACCESS_KEY }}"
+          AWS_DEFAULT_REGION: "${{ secrets.MINIO_REGION }}"
+
   update_release_draft:
     needs: [build-and-test]
     permissions:
@@ -183,10 +170,3 @@ jobs:
         #   disable-autolabeler: true
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-
-      - name: Clean
-        if: always()
-        continue-on-error: true
-        run: |
-          sccache --stop-server
-          rm ${{ matrix.sccache-conf-path }}
\ No newline at end of file
diff --git a/.github/workflows/quality-gate.yml b/.github/workflows/quality-gate.yml
index 3ee37dc2f14..7369214192b 100644
--- a/.github/workflows/quality-gate.yml
+++ b/.github/workflows/quality-gate.yml
@@ -18,13 +18,13 @@ jobs:
             runs-on: "windows-tensorrt-llm-cuda-12-2"
             run-e2e: false
             s3-key-prefix: "windows-tensorrt-llm-ccache"
-            sccache-conf-path: 'C:\sccache.conf'
+            ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache'
           - os: "linux"
             name: "cuda-12-3"
             runs-on: "linux-tensorrt-llm-cuda-12-3"
             run-e2e: false
-            s3-key-prefix: "linux-tensorrt-llm"
-            sccache-conf-path: '/tmp/sccache.conf'
+            s3-key-prefix: "linux-tensorrt-llm-0-9-0-cuda-12-3"
+            ccache-dir: "/home/runner/.ccache"
     permissions:
       contents: write
     steps:
@@ -35,51 +35,34 @@ jobs:
           submodules: recursive
           lfs: true
 
-      - name: Install choco on Windows
+      - name: Install tools on Windows
         if: runner.os == 'Windows'
         run: |
-          choco install make pkgconfiglite ccache awscli -y
+          choco install make pkgconfiglite ccache awscli 7zip -y
+          Import-Module "$env:ChocolateyInstall\helpers\chocolateyProfile.psm1"
+          refreshenv
 
-      - name: create sccache.conf file Linux
-        if: runner.os == 'Linux'
-        run: |
-          echo "[cache.s3]" > ${{ matrix.sccache-conf-path }}
-          echo 'bucket = "${{ secrets.MINIO_BUCKET_NAME }}"' >> ${{ matrix.sccache-conf-path }}
-          echo 'endpoint = "${{ secrets.MINIO_ENDPOINT }}"' >> ${{ matrix.sccache-conf-path }}
-          echo 'key_prefix = "${{ matrix.s3-key-prefix }}"' >> ${{ matrix.sccache-conf-path }}
-          echo 'use_ssl = false' >> ${{ matrix.sccache-conf-path }}
-          echo 'server_side_encryption = false' >> ${{ matrix.sccache-conf-path }}
-          echo 'no_credentials = false' >> ${{ matrix.sccache-conf-path }}
-      
       - name: Download ccache from s3
-        continue-on-error: true
         if: runner.os == 'Windows'
+        continue-on-error: true
         run: |
           Import-Module "$env:ChocolateyInstall\helpers\chocolateyProfile.psm1"
           refreshenv
-          aws s3 cp s3://${{ secrets.MINIO_BUCKET_NAME }}/${{ matrix.s3-key-prefix }} C:\Users\ContainerAdministrator\AppData\Local\ccache --recursive --endpoint ${{ secrets.MINIO_ENDPOINT }}
+          aws s3 cp s3://${{ secrets.MINIO_BUCKET_NAME }}/${{ matrix.s3-key-prefix }} ${{ matrix.ccache-dir }} --recursive --endpoint ${{ secrets.MINIO_ENDPOINT }}
         env:
           AWS_ACCESS_KEY_ID: "${{ secrets.MINIO_ACCESS_KEY_ID }}"
           AWS_SECRET_ACCESS_KEY: "${{ secrets.MINIO_SECRET_ACCESS_KEY }}"
           AWS_DEFAULT_REGION: "${{ secrets.MINIO_REGION }}"
 
-      - name: start sccache server for linux
+      - name: Download ccache from s3
         if: runner.os == 'Linux'
-        working-directory: cpp
+        continue-on-error: true
         run: |
-          sccache --start-server
+          aws s3 cp s3://${{ secrets.MINIO_BUCKET_NAME }}/${{ matrix.s3-key-prefix }} ${{ matrix.ccache-dir }} --recursive --endpoint ${{ secrets.MINIO_ENDPOINT }}
         env:
-          SCCACHE_BUCKET: "${{ secrets.MINIO_BUCKET_NAME }}"
-          SCCACHE_REGION: "${{ secrets.MINIO_REGION }}"
-          SCCACHE_ENDPOINT: "${{ secrets.MINIO_ENDPOINT }}"
-          SCCACHE_S3_USE_SSL: "false"
-          SCCACHE_S3_SERVER_SIDE_ENCRYPTION: "false"
-          SCCACHE_S3_KEY_PREFIX: "${{ matrix.s3-key-prefix }}"
-          SCCACHE_LOG: "debug"
-          SCCACHE_CONF: '${{ matrix.sccache-conf-path }}'
           AWS_ACCESS_KEY_ID: "${{ secrets.MINIO_ACCESS_KEY_ID }}"
           AWS_SECRET_ACCESS_KEY: "${{ secrets.MINIO_SECRET_ACCESS_KEY }}"
-          SCCACHE_IDLE_TIMEOUT: "0"
+          AWS_DEFAULT_REGION: "${{ secrets.MINIO_REGION }}"
 
       - name: Build Dependencies
         working-directory: cpp
@@ -106,13 +89,6 @@ jobs:
         with:
           name: cortex.tensorrt-llm-${{ matrix.os }}-${{ matrix.name }}
           path: cpp/tensorrt_llm/cortex.tensorrt-llm/cortex.tensorrt-llm.tar.gz
-      
-      - name: Clean
-        if: always()
-        continue-on-error: true
-        run: |
-          sccache --stop-server
-          rm ${{ matrix.sccache-conf-path }}
 
       - name: Upload ccache to s3
         continue-on-error: true
@@ -120,10 +96,18 @@ jobs:
         run: |
           Import-Module "$env:ChocolateyInstall\helpers\chocolateyProfile.psm1"
           refreshenv
-          aws s3 cp C:\Users\ContainerAdministrator\AppData\Local\ccache s3://${{ secrets.MINIO_BUCKET_NAME }}/${{ matrix.s3-key-prefix }} --recursive --endpoint ${{ secrets.MINIO_ENDPOINT }}
+          aws s3 cp ${{ matrix.ccache-dir }} s3://${{ secrets.MINIO_BUCKET_NAME }}/${{ matrix.s3-key-prefix }} --recursive --endpoint ${{ secrets.MINIO_ENDPOINT }}
         env:
           AWS_ACCESS_KEY_ID: "${{ secrets.MINIO_ACCESS_KEY_ID }}"
           AWS_SECRET_ACCESS_KEY: "${{ secrets.MINIO_SECRET_ACCESS_KEY }}"
           AWS_DEFAULT_REGION: "${{ secrets.MINIO_REGION }}"
 
-          
\ No newline at end of file
+      - name: Upload ccache to s3
+        continue-on-error: true
+        if: always() && runner.os == 'Linux'
+        run: |
+          aws s3 cp ${{ matrix.ccache-dir }} s3://${{ secrets.MINIO_BUCKET_NAME }}/${{ matrix.s3-key-prefix }} --recursive --endpoint ${{ secrets.MINIO_ENDPOINT }}
+        env:
+          AWS_ACCESS_KEY_ID: "${{ secrets.MINIO_ACCESS_KEY_ID }}"
+          AWS_SECRET_ACCESS_KEY: "${{ secrets.MINIO_SECRET_ACCESS_KEY }}"
+          AWS_DEFAULT_REGION: "${{ secrets.MINIO_REGION }}"
diff --git a/cpp/Makefile b/cpp/Makefile
index fc469e2f62e..f3acb1bbde0 100644
--- a/cpp/Makefile
+++ b/cpp/Makefile
@@ -18,7 +18,7 @@ build-deps:
 ifeq ($(OS),Windows_NT)
 	@powershell -Command "cd tensorrt_llm/cortex.tensorrt-llm; cmake -S ./third-party -B ./build_deps/third-party -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE=Release -DCMAKE_OBJECT_PATH_MAX=500; cmake --build ./build_deps/third-party --config Release -j8;"
 else
-	@cd tensorrt_llm/cortex.tensorrt-llm && cmake -S ./third-party -B ./build_deps/third-party -DCMAKE_BUILD_TYPE=Release -DCMAKE_OBJECT_PATH_MAX=500 && make -C ./build_deps/third-party -j 10 && rm -rf ./build_deps/third-party;
+	@cd tensorrt_llm/cortex.tensorrt-llm && cmake -S ./third-party -B ./build_deps/third-party -DCMAKE_BUILD_TYPE=Release && make -C ./build_deps/third-party -j 10 && rm -rf ./build_deps/third-party;
 endif
 
 # Build the Cortex engine
@@ -27,9 +27,8 @@ ifeq ($(OS),Windows_NT)
 	@powershell -Command "cd ..; python .\scripts\build_wheel.py -a '80-real;86-real;89-real' --trt_root 'C:\workspace\TensorRT-9.3.0.1\' -D 'BUILD_CORTEX_TENSORRT-LLM=ON' --use_ccache"
 	@powershell -Command "cd build; cmake .. -DCMAKE_CUDA_ARCHITECTURES='80-real;86-real;89-real' -DTRT_LIB_DIR='C:/workspace/TensorRT-9.3.0.1/lib' -DTRT_INCLUDE_DIR='C:/workspace/TensorRT-9.3.0.1/include' -DBUILD_CORTEX_TENSORRT-LLM=ON -DCMAKE_CUDA_COMPILER='C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.2/bin/nvcc.exe' -DENABLE_MULTI_DEVICE=0 -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -G Ninja; cmake --build . --parallel 2 --config Release"
 else
-	@mkdir -p build && cd build; \
-	cmake .. -GNinja -DBUILD_CORTEX_TENSORRT-LLM=ON -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CUDA_COMPILER_LAUNCHER=sccache -DCMAKE_BUILD_TYPE='Release' -DBUILD_PYT='OFF' -DBUILD_PYBIND='OFF' -DNVTX_DISABLE='ON' -DCMAKE_CUDA_ARCHITECTURES='80-real;86-real;89-real' '-DENABLE_MULTI_DEVICE=0' '-DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc' -DBUILD_BENCHMARKS=OFF '-DBUILD_TESTS=OFF' -DTRT_LIB_DIR=/usr/local/tensorrt/lib -DTRT_INCLUDE_DIR=/usr/local/tensorrt/include; \
-	cmake --build . --config Release;
+	@cd .. && python3 ./scripts/build_wheel.py --trt_root /usr/local/tensorrt --cuda_architectures "80-real;86-real;89-real" --extra-cmake-vars "BUILD_CORTEX_TENSORRT-LLM=ON" --use_ccache
+	@cd build && cmake .. -DCMAKE_CUDA_ARCHITECTURES='80-real;86-real;89-real' -DTRT_LIB_DIR='/usr/local/tensorrt/lib' -DTRT_INCLUDE_DIR='/usr/local/tensorrt/include' -DBUILD_CORTEX_TENSORRT-LLM=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -G Ninja && cmake --build . --config Release;
 endif
 
 # Prepackage the Cortex engine
@@ -41,19 +40,17 @@ ifeq ($(OS),Windows_NT)
 	@powershell -Command "cd tensorrt_llm\cortex.tensorrt-llm\; cp -Force C:\workspace\cuDNN\bin\cudnn_ops_infer64_8.dll cortex.tensorrt-llm\;"
 	@powershell -Command "cd tensorrt_llm\cortex.tensorrt-llm\; cp -Force ..\..\build\tensorrt_llm\plugins\nvinfer_plugin_tensorrt_llm.dll cortex.tensorrt-llm\;"
 	@powershell -Command "cd tensorrt_llm\cortex.tensorrt-llm\; cp -Force ..\..\build\tensorrt_llm\tensorrt_llm.dll cortex.tensorrt-llm\;"
-	@powershell -Command "cd tensorrt_llm\cortex.tensorrt-llm\; cp -Force C:\Windows\System32\msmpi.dll cortex.tensorrt-llm\;"
 	@powershell -Command "cd tensorrt_llm\cortex.tensorrt-llm\; cp -Force .\build_deps\_install\bin\zlib.dll cortex.tensorrt-llm\;"
 else
 	cd ./tensorrt_llm/cortex.tensorrt-llm && \
 	mkdir -p cortex.tensorrt-llm && \
-	cp ../../build/tensorrt_llm/cortex.tensorrt-llm/libengine.$(shell uname | tr '[:upper:]' '[:lower:]' | sed 's/darwin/dylib/;s/linux/so/') cortex.tensorrt-llm && \
+	cp ../../build/tensorrt_llm/cortex.tensorrt-llm/libengine.so cortex.tensorrt-llm && \
 	cp /usr/local/tensorrt/targets/x86_64-linux-gnu/lib/libnvinfer.so.9 cortex.tensorrt-llm && \
 	cp /usr/lib/x86_64-linux-gnu/libcudnn.so.8 cortex.tensorrt-llm && \
 	cp /usr/lib/x86_64-linux-gnu/libcudnn_ops_infer.so.8 cortex.tensorrt-llm && \
 	cp /home/runner/actions-runner/_work/cortex.tensorrt-llm/cortex.tensorrt-llm/cpp/build/tensorrt_llm/plugins/libnvinfer_plugin_tensorrt_llm.so.9 cortex.tensorrt-llm && \
 	cp /home/runner/actions-runner/_work/cortex.tensorrt-llm/cortex.tensorrt-llm/cpp/build/tensorrt_llm/libtensorrt_llm.so cortex.tensorrt-llm && \
-	cp /opt/hpcx/ompi/lib/libmpi.so.40 cortex.tensorrt-llm && \
-	cp /usr/lib/x86_64-linux-gnu/libnccl.so cortex.tensorrt-llm
+	cp /usr/lib/x86_64-linux-gnu/libnccl.so cortex.tensorrt-llm/libnccl.so.2
 endif
 
 codesign:
diff --git a/scripts/build_wheel.py b/scripts/build_wheel.py
index d789f471038..53b1efdf549 100755
--- a/scripts/build_wheel.py
+++ b/scripts/build_wheel.py
@@ -88,7 +88,7 @@ def main(build_type: str = "Release",
         if cuda_architectures is not None else "")
 
     cmake_def_args = []
-    cmake_generator = ""
+    cmake_generator = "-GNinja"
 
     hardware_arch = platform.machine()