From 6c8451902263b8d3ebf8c436bafa290d6296539d Mon Sep 17 00:00:00 2001 From: Hien To Date: Mon, 24 Jun 2024 00:43:49 +0700 Subject: [PATCH] Linux add compile python lib for convert model --- .github/runners/linux/Dockerfile.convertModel | 9 +++ .github/workflows/build.yml | 70 +++++++------------ .github/workflows/quality-gate.yml | 62 ++++++---------- cpp/Makefile | 13 ++-- scripts/build_wheel.py | 2 +- 5 files changed, 63 insertions(+), 93 deletions(-) create mode 100644 .github/runners/linux/Dockerfile.convertModel diff --git a/.github/runners/linux/Dockerfile.convertModel b/.github/runners/linux/Dockerfile.convertModel new file mode 100644 index 00000000000..a35e71dd9b6 --- /dev/null +++ b/.github/runners/linux/Dockerfile.convertModel @@ -0,0 +1,9 @@ +FROM nvidia/cuda:12.3.0-devel-ubuntu22.04 + +RUN apt-get update && apt-get -y install python3.10 python3-pip openmpi-bin libopenmpi-dev git git-lfs + +RUN pip3 install tensorrt_llm==0.9.0 --extra-index-url https://pypi.nvidia.com + +RUN pip3 install numpy==1.26.4 + +RUN git clone https://github.com/NVIDIA/TensorRT-LLM.git && cd TensorRT-LLM && git checkout v0.9.0 \ No newline at end of file diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index f9fd1e0d3c6..5b29e0843bb 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -43,13 +43,13 @@ jobs: runs-on: "windows-tensorrt-llm-cuda-12-2" run-e2e: false s3-key-prefix: "windows-tensorrt-llm-ccache" - sccache-conf-path: 'C:\sccache.conf' + ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' - os: "linux" name: "cuda-12-3" runs-on: "linux-tensorrt-llm-cuda-12-3" run-e2e: false - s3-key-prefix: "linux-tensorrt-llm" - sccache-conf-path: '/tmp/sccache.conf' + s3-key-prefix: "linux-tensorrt-llm-0-9-0-cuda-12-3" + ccache-dir: "/home/runner/.ccache" permissions: contents: write steps: @@ -60,51 +60,34 @@ jobs: submodules: recursive lfs: true - - name: Install choco on Windows + - name: Install tools on Windows if: runner.os == 'Windows' run: | - choco install make pkgconfiglite ccache awscli -y + choco install make pkgconfiglite ccache awscli 7zip -y + Import-Module "$env:ChocolateyInstall\helpers\chocolateyProfile.psm1" + refreshenv - - name: create sccache.conf file Linux - if: runner.os == 'Linux' - run: | - echo "[cache.s3]" > ${{ matrix.sccache-conf-path }} - echo 'bucket = "${{ secrets.MINIO_BUCKET_NAME }}"' >> ${{ matrix.sccache-conf-path }} - echo 'endpoint = "${{ secrets.MINIO_ENDPOINT }}"' >> ${{ matrix.sccache-conf-path }} - echo 'key_prefix = "${{ matrix.s3-key-prefix }}"' >> ${{ matrix.sccache-conf-path }} - echo 'use_ssl = false' >> ${{ matrix.sccache-conf-path }} - echo 'server_side_encryption = false' >> ${{ matrix.sccache-conf-path }} - echo 'no_credentials = false' >> ${{ matrix.sccache-conf-path }} - - name: Download ccache from s3 - continue-on-error: true if: runner.os == 'Windows' + continue-on-error: true run: | Import-Module "$env:ChocolateyInstall\helpers\chocolateyProfile.psm1" refreshenv - aws s3 cp s3://${{ secrets.MINIO_BUCKET_NAME }}/${{ matrix.s3-key-prefix }} C:\Users\ContainerAdministrator\AppData\Local\ccache --recursive --endpoint ${{ secrets.MINIO_ENDPOINT }} + aws s3 cp s3://${{ secrets.MINIO_BUCKET_NAME }}/${{ matrix.s3-key-prefix }} ${{ matrix.ccache-dir }} --recursive --endpoint ${{ secrets.MINIO_ENDPOINT }} env: AWS_ACCESS_KEY_ID: "${{ secrets.MINIO_ACCESS_KEY_ID }}" AWS_SECRET_ACCESS_KEY: "${{ secrets.MINIO_SECRET_ACCESS_KEY }}" AWS_DEFAULT_REGION: "${{ secrets.MINIO_REGION }}" - - name: start sccache server for linux + - name: Download ccache from s3 if: runner.os == 'Linux' - working-directory: cpp + continue-on-error: true run: | - sccache --start-server + aws s3 cp s3://${{ secrets.MINIO_BUCKET_NAME }}/${{ matrix.s3-key-prefix }} ${{ matrix.ccache-dir }} --recursive --endpoint ${{ secrets.MINIO_ENDPOINT }} env: - SCCACHE_BUCKET: "${{ secrets.MINIO_BUCKET_NAME }}" - SCCACHE_REGION: "${{ secrets.MINIO_REGION }}" - SCCACHE_ENDPOINT: "${{ secrets.MINIO_ENDPOINT }}" - SCCACHE_S3_USE_SSL: "false" - SCCACHE_S3_SERVER_SIDE_ENCRYPTION: "false" - SCCACHE_S3_KEY_PREFIX: "${{ matrix.s3-key-prefix }}" - SCCACHE_LOG: "debug" - SCCACHE_CONF: '${{ matrix.sccache-conf-path }}' AWS_ACCESS_KEY_ID: "${{ secrets.MINIO_ACCESS_KEY_ID }}" AWS_SECRET_ACCESS_KEY: "${{ secrets.MINIO_SECRET_ACCESS_KEY }}" - SCCACHE_IDLE_TIMEOUT: "0" + AWS_DEFAULT_REGION: "${{ secrets.MINIO_REGION }}" - name: Build Dependencies working-directory: cpp @@ -141,13 +124,6 @@ jobs: asset_path: cpp/tensorrt_llm/cortex.tensorrt-llm/cortex.tensorrt-llm.tar.gz asset_name: cortex.tensorrt-llm-${{ needs.create-draft-release.outputs.version }}-${{ matrix.os }}-${{ matrix.name }}.tar.gz asset_content_type: application/gzip - - - name: Clean - if: always() - continue-on-error: true - run: | - sccache --stop-server - rm ${{ matrix.sccache-conf-path }} - name: Upload ccache to s3 continue-on-error: true @@ -155,11 +131,22 @@ jobs: run: | Import-Module "$env:ChocolateyInstall\helpers\chocolateyProfile.psm1" refreshenv - aws s3 cp C:\Users\ContainerAdministrator\AppData\Local\ccache s3://${{ secrets.MINIO_BUCKET_NAME }}/${{ matrix.s3-key-prefix }} --recursive --endpoint ${{ secrets.MINIO_ENDPOINT }} + aws s3 cp ${{ matrix.ccache-dir }} s3://${{ secrets.MINIO_BUCKET_NAME }}/${{ matrix.s3-key-prefix }} --recursive --endpoint ${{ secrets.MINIO_ENDPOINT }} env: AWS_ACCESS_KEY_ID: "${{ secrets.MINIO_ACCESS_KEY_ID }}" AWS_SECRET_ACCESS_KEY: "${{ secrets.MINIO_SECRET_ACCESS_KEY }}" AWS_DEFAULT_REGION: "${{ secrets.MINIO_REGION }}" + + - name: Upload ccache to s3 + continue-on-error: true + if: always() && runner.os == 'Linux' + run: | + aws s3 cp ${{ matrix.ccache-dir }} s3://${{ secrets.MINIO_BUCKET_NAME }}/${{ matrix.s3-key-prefix }} --recursive --endpoint ${{ secrets.MINIO_ENDPOINT }} + env: + AWS_ACCESS_KEY_ID: "${{ secrets.MINIO_ACCESS_KEY_ID }}" + AWS_SECRET_ACCESS_KEY: "${{ secrets.MINIO_SECRET_ACCESS_KEY }}" + AWS_DEFAULT_REGION: "${{ secrets.MINIO_REGION }}" + update_release_draft: needs: [build-and-test] permissions: @@ -183,10 +170,3 @@ jobs: # disable-autolabeler: true env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - - name: Clean - if: always() - continue-on-error: true - run: | - sccache --stop-server - rm ${{ matrix.sccache-conf-path }} \ No newline at end of file diff --git a/.github/workflows/quality-gate.yml b/.github/workflows/quality-gate.yml index 3ee37dc2f14..7369214192b 100644 --- a/.github/workflows/quality-gate.yml +++ b/.github/workflows/quality-gate.yml @@ -18,13 +18,13 @@ jobs: runs-on: "windows-tensorrt-llm-cuda-12-2" run-e2e: false s3-key-prefix: "windows-tensorrt-llm-ccache" - sccache-conf-path: 'C:\sccache.conf' + ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache' - os: "linux" name: "cuda-12-3" runs-on: "linux-tensorrt-llm-cuda-12-3" run-e2e: false - s3-key-prefix: "linux-tensorrt-llm" - sccache-conf-path: '/tmp/sccache.conf' + s3-key-prefix: "linux-tensorrt-llm-0-9-0-cuda-12-3" + ccache-dir: "/home/runner/.ccache" permissions: contents: write steps: @@ -35,51 +35,34 @@ jobs: submodules: recursive lfs: true - - name: Install choco on Windows + - name: Install tools on Windows if: runner.os == 'Windows' run: | - choco install make pkgconfiglite ccache awscli -y + choco install make pkgconfiglite ccache awscli 7zip -y + Import-Module "$env:ChocolateyInstall\helpers\chocolateyProfile.psm1" + refreshenv - - name: create sccache.conf file Linux - if: runner.os == 'Linux' - run: | - echo "[cache.s3]" > ${{ matrix.sccache-conf-path }} - echo 'bucket = "${{ secrets.MINIO_BUCKET_NAME }}"' >> ${{ matrix.sccache-conf-path }} - echo 'endpoint = "${{ secrets.MINIO_ENDPOINT }}"' >> ${{ matrix.sccache-conf-path }} - echo 'key_prefix = "${{ matrix.s3-key-prefix }}"' >> ${{ matrix.sccache-conf-path }} - echo 'use_ssl = false' >> ${{ matrix.sccache-conf-path }} - echo 'server_side_encryption = false' >> ${{ matrix.sccache-conf-path }} - echo 'no_credentials = false' >> ${{ matrix.sccache-conf-path }} - - name: Download ccache from s3 - continue-on-error: true if: runner.os == 'Windows' + continue-on-error: true run: | Import-Module "$env:ChocolateyInstall\helpers\chocolateyProfile.psm1" refreshenv - aws s3 cp s3://${{ secrets.MINIO_BUCKET_NAME }}/${{ matrix.s3-key-prefix }} C:\Users\ContainerAdministrator\AppData\Local\ccache --recursive --endpoint ${{ secrets.MINIO_ENDPOINT }} + aws s3 cp s3://${{ secrets.MINIO_BUCKET_NAME }}/${{ matrix.s3-key-prefix }} ${{ matrix.ccache-dir }} --recursive --endpoint ${{ secrets.MINIO_ENDPOINT }} env: AWS_ACCESS_KEY_ID: "${{ secrets.MINIO_ACCESS_KEY_ID }}" AWS_SECRET_ACCESS_KEY: "${{ secrets.MINIO_SECRET_ACCESS_KEY }}" AWS_DEFAULT_REGION: "${{ secrets.MINIO_REGION }}" - - name: start sccache server for linux + - name: Download ccache from s3 if: runner.os == 'Linux' - working-directory: cpp + continue-on-error: true run: | - sccache --start-server + aws s3 cp s3://${{ secrets.MINIO_BUCKET_NAME }}/${{ matrix.s3-key-prefix }} ${{ matrix.ccache-dir }} --recursive --endpoint ${{ secrets.MINIO_ENDPOINT }} env: - SCCACHE_BUCKET: "${{ secrets.MINIO_BUCKET_NAME }}" - SCCACHE_REGION: "${{ secrets.MINIO_REGION }}" - SCCACHE_ENDPOINT: "${{ secrets.MINIO_ENDPOINT }}" - SCCACHE_S3_USE_SSL: "false" - SCCACHE_S3_SERVER_SIDE_ENCRYPTION: "false" - SCCACHE_S3_KEY_PREFIX: "${{ matrix.s3-key-prefix }}" - SCCACHE_LOG: "debug" - SCCACHE_CONF: '${{ matrix.sccache-conf-path }}' AWS_ACCESS_KEY_ID: "${{ secrets.MINIO_ACCESS_KEY_ID }}" AWS_SECRET_ACCESS_KEY: "${{ secrets.MINIO_SECRET_ACCESS_KEY }}" - SCCACHE_IDLE_TIMEOUT: "0" + AWS_DEFAULT_REGION: "${{ secrets.MINIO_REGION }}" - name: Build Dependencies working-directory: cpp @@ -106,13 +89,6 @@ jobs: with: name: cortex.tensorrt-llm-${{ matrix.os }}-${{ matrix.name }} path: cpp/tensorrt_llm/cortex.tensorrt-llm/cortex.tensorrt-llm.tar.gz - - - name: Clean - if: always() - continue-on-error: true - run: | - sccache --stop-server - rm ${{ matrix.sccache-conf-path }} - name: Upload ccache to s3 continue-on-error: true @@ -120,10 +96,18 @@ jobs: run: | Import-Module "$env:ChocolateyInstall\helpers\chocolateyProfile.psm1" refreshenv - aws s3 cp C:\Users\ContainerAdministrator\AppData\Local\ccache s3://${{ secrets.MINIO_BUCKET_NAME }}/${{ matrix.s3-key-prefix }} --recursive --endpoint ${{ secrets.MINIO_ENDPOINT }} + aws s3 cp ${{ matrix.ccache-dir }} s3://${{ secrets.MINIO_BUCKET_NAME }}/${{ matrix.s3-key-prefix }} --recursive --endpoint ${{ secrets.MINIO_ENDPOINT }} env: AWS_ACCESS_KEY_ID: "${{ secrets.MINIO_ACCESS_KEY_ID }}" AWS_SECRET_ACCESS_KEY: "${{ secrets.MINIO_SECRET_ACCESS_KEY }}" AWS_DEFAULT_REGION: "${{ secrets.MINIO_REGION }}" - \ No newline at end of file + - name: Upload ccache to s3 + continue-on-error: true + if: always() && runner.os == 'Linux' + run: | + aws s3 cp ${{ matrix.ccache-dir }} s3://${{ secrets.MINIO_BUCKET_NAME }}/${{ matrix.s3-key-prefix }} --recursive --endpoint ${{ secrets.MINIO_ENDPOINT }} + env: + AWS_ACCESS_KEY_ID: "${{ secrets.MINIO_ACCESS_KEY_ID }}" + AWS_SECRET_ACCESS_KEY: "${{ secrets.MINIO_SECRET_ACCESS_KEY }}" + AWS_DEFAULT_REGION: "${{ secrets.MINIO_REGION }}" diff --git a/cpp/Makefile b/cpp/Makefile index fc469e2f62e..f3acb1bbde0 100644 --- a/cpp/Makefile +++ b/cpp/Makefile @@ -18,7 +18,7 @@ build-deps: ifeq ($(OS),Windows_NT) @powershell -Command "cd tensorrt_llm/cortex.tensorrt-llm; cmake -S ./third-party -B ./build_deps/third-party -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE=Release -DCMAKE_OBJECT_PATH_MAX=500; cmake --build ./build_deps/third-party --config Release -j8;" else - @cd tensorrt_llm/cortex.tensorrt-llm && cmake -S ./third-party -B ./build_deps/third-party -DCMAKE_BUILD_TYPE=Release -DCMAKE_OBJECT_PATH_MAX=500 && make -C ./build_deps/third-party -j 10 && rm -rf ./build_deps/third-party; + @cd tensorrt_llm/cortex.tensorrt-llm && cmake -S ./third-party -B ./build_deps/third-party -DCMAKE_BUILD_TYPE=Release && make -C ./build_deps/third-party -j 10 && rm -rf ./build_deps/third-party; endif # Build the Cortex engine @@ -27,9 +27,8 @@ ifeq ($(OS),Windows_NT) @powershell -Command "cd ..; python .\scripts\build_wheel.py -a '80-real;86-real;89-real' --trt_root 'C:\workspace\TensorRT-9.3.0.1\' -D 'BUILD_CORTEX_TENSORRT-LLM=ON' --use_ccache" @powershell -Command "cd build; cmake .. -DCMAKE_CUDA_ARCHITECTURES='80-real;86-real;89-real' -DTRT_LIB_DIR='C:/workspace/TensorRT-9.3.0.1/lib' -DTRT_INCLUDE_DIR='C:/workspace/TensorRT-9.3.0.1/include' -DBUILD_CORTEX_TENSORRT-LLM=ON -DCMAKE_CUDA_COMPILER='C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.2/bin/nvcc.exe' -DENABLE_MULTI_DEVICE=0 -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -G Ninja; cmake --build . --parallel 2 --config Release" else - @mkdir -p build && cd build; \ - cmake .. -GNinja -DBUILD_CORTEX_TENSORRT-LLM=ON -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CUDA_COMPILER_LAUNCHER=sccache -DCMAKE_BUILD_TYPE='Release' -DBUILD_PYT='OFF' -DBUILD_PYBIND='OFF' -DNVTX_DISABLE='ON' -DCMAKE_CUDA_ARCHITECTURES='80-real;86-real;89-real' '-DENABLE_MULTI_DEVICE=0' '-DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc' -DBUILD_BENCHMARKS=OFF '-DBUILD_TESTS=OFF' -DTRT_LIB_DIR=/usr/local/tensorrt/lib -DTRT_INCLUDE_DIR=/usr/local/tensorrt/include; \ - cmake --build . --config Release; + @cd .. && python3 ./scripts/build_wheel.py --trt_root /usr/local/tensorrt --cuda_architectures "80-real;86-real;89-real" --extra-cmake-vars "BUILD_CORTEX_TENSORRT-LLM=ON" --use_ccache + @cd build && cmake .. -DCMAKE_CUDA_ARCHITECTURES='80-real;86-real;89-real' -DTRT_LIB_DIR='/usr/local/tensorrt/lib' -DTRT_INCLUDE_DIR='/usr/local/tensorrt/include' -DBUILD_CORTEX_TENSORRT-LLM=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -G Ninja && cmake --build . --config Release; endif # Prepackage the Cortex engine @@ -41,19 +40,17 @@ ifeq ($(OS),Windows_NT) @powershell -Command "cd tensorrt_llm\cortex.tensorrt-llm\; cp -Force C:\workspace\cuDNN\bin\cudnn_ops_infer64_8.dll cortex.tensorrt-llm\;" @powershell -Command "cd tensorrt_llm\cortex.tensorrt-llm\; cp -Force ..\..\build\tensorrt_llm\plugins\nvinfer_plugin_tensorrt_llm.dll cortex.tensorrt-llm\;" @powershell -Command "cd tensorrt_llm\cortex.tensorrt-llm\; cp -Force ..\..\build\tensorrt_llm\tensorrt_llm.dll cortex.tensorrt-llm\;" - @powershell -Command "cd tensorrt_llm\cortex.tensorrt-llm\; cp -Force C:\Windows\System32\msmpi.dll cortex.tensorrt-llm\;" @powershell -Command "cd tensorrt_llm\cortex.tensorrt-llm\; cp -Force .\build_deps\_install\bin\zlib.dll cortex.tensorrt-llm\;" else cd ./tensorrt_llm/cortex.tensorrt-llm && \ mkdir -p cortex.tensorrt-llm && \ - cp ../../build/tensorrt_llm/cortex.tensorrt-llm/libengine.$(shell uname | tr '[:upper:]' '[:lower:]' | sed 's/darwin/dylib/;s/linux/so/') cortex.tensorrt-llm && \ + cp ../../build/tensorrt_llm/cortex.tensorrt-llm/libengine.so cortex.tensorrt-llm && \ cp /usr/local/tensorrt/targets/x86_64-linux-gnu/lib/libnvinfer.so.9 cortex.tensorrt-llm && \ cp /usr/lib/x86_64-linux-gnu/libcudnn.so.8 cortex.tensorrt-llm && \ cp /usr/lib/x86_64-linux-gnu/libcudnn_ops_infer.so.8 cortex.tensorrt-llm && \ cp /home/runner/actions-runner/_work/cortex.tensorrt-llm/cortex.tensorrt-llm/cpp/build/tensorrt_llm/plugins/libnvinfer_plugin_tensorrt_llm.so.9 cortex.tensorrt-llm && \ cp /home/runner/actions-runner/_work/cortex.tensorrt-llm/cortex.tensorrt-llm/cpp/build/tensorrt_llm/libtensorrt_llm.so cortex.tensorrt-llm && \ - cp /opt/hpcx/ompi/lib/libmpi.so.40 cortex.tensorrt-llm && \ - cp /usr/lib/x86_64-linux-gnu/libnccl.so cortex.tensorrt-llm + cp /usr/lib/x86_64-linux-gnu/libnccl.so cortex.tensorrt-llm/libnccl.so.2 endif codesign: diff --git a/scripts/build_wheel.py b/scripts/build_wheel.py index d789f471038..53b1efdf549 100755 --- a/scripts/build_wheel.py +++ b/scripts/build_wheel.py @@ -88,7 +88,7 @@ def main(build_type: str = "Release", if cuda_architectures is not None else "") cmake_def_args = [] - cmake_generator = "" + cmake_generator = "-GNinja" hardware_arch = platform.machine()