Merge branch 'rel' into sync-0.10.0

hiento09 · hiento09 · commit c5aa6bcc12c8 · 2024-06-24T09:52:59.000+07:00
diff --git a/.github/runners/linux/Dockerfile.convertModel b/.github/runners/linux/Dockerfile.convertModel
@@ -0,0 +1,9 @@
+FROM nvidia/cuda:12.3.0-devel-ubuntu22.04
+
+RUN apt-get update && apt-get -y install python3.10 python3-pip openmpi-bin libopenmpi-dev git git-lfs
+
+RUN pip3 install tensorrt_llm==0.9.0 --extra-index-url https://pypi.nvidia.com
+
+RUN pip3 install numpy==1.26.4
+
+RUN git clone https://github.com/NVIDIA/TensorRT-LLM.git && cd TensorRT-LLM && git checkout v0.9.0
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -49,7 +49,7 @@ jobs:
             runs-on: "linux-tensorrt-llm-0-10-0-cuda-12-4"
             run-e2e: false
             s3-key-prefix: "linux-tensorrt-llm-tensorrt-llm-0-10-0-sccache"
-            ccache-dir: '/home/runner/.ccache'
+            ccache-dir: '/home/runner/.cache/ccache'
     permissions:
       contents: write
     steps:
@@ -169,4 +169,4 @@ jobs:
         #   config-name: my-config.yml
         #   disable-autolabeler: true
         env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/quality-gate.yml b/.github/workflows/quality-gate.yml
@@ -24,7 +24,7 @@ jobs:
             runs-on: "linux-tensorrt-llm-0-10-0-cuda-12-4"
             run-e2e: false
             s3-key-prefix: "linux-tensorrt-llm-tensorrt-llm-0-10-0-sccache"
-            ccache-dir: '/home/runner/.ccache'
+            ccache-dir: '/home/runner/.cache/ccache'
     permissions:
       contents: write
     steps:
@@ -111,5 +111,3 @@ jobs:
           AWS_ACCESS_KEY_ID: "${{ secrets.MINIO_ACCESS_KEY_ID }}"
           AWS_SECRET_ACCESS_KEY: "${{ secrets.MINIO_SECRET_ACCESS_KEY }}"
           AWS_DEFAULT_REGION: "${{ secrets.MINIO_REGION }}"
-
-          
diff --git a/cpp/Makefile b/cpp/Makefile
@@ -18,7 +18,7 @@ build-deps:
 ifeq ($(OS),Windows_NT)
 	@powershell -Command "cd tensorrt_llm/cortex.tensorrt-llm; cmake -S ./third-party -B ./build_deps/third-party -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE=Release -DCMAKE_OBJECT_PATH_MAX=500; cmake --build ./build_deps/third-party --config Release -j8;"
 else
-	@cd tensorrt_llm/cortex.tensorrt-llm && cmake -S ./third-party -B ./build_deps/third-party -DCMAKE_BUILD_TYPE=Release -DCMAKE_OBJECT_PATH_MAX=500 && make -C ./build_deps/third-party -j 10 && rm -rf ./build_deps/third-party;
+	@cd tensorrt_llm/cortex.tensorrt-llm && cmake -S ./third-party -B ./build_deps/third-party -DCMAKE_BUILD_TYPE=Release && make -C ./build_deps/third-party -j 10 && rm -rf ./build_deps/third-party;
 endif
 
 # Build the Cortex engine
@@ -29,7 +29,6 @@ ifeq ($(OS),Windows_NT)
 else
 	@cd .. && python3 ./scripts/build_wheel.py --trt_root /usr/local/tensorrt --cuda_architectures "80-real;86-real;89-real" --extra-cmake-vars "BUILD_CORTEX_TENSORRT-LLM=ON" --use_ccache
 	@cd build && cmake .. -DCMAKE_CUDA_ARCHITECTURES='80-real;86-real;89-real' -DTRT_LIB_DIR='/usr/local/tensorrt/lib' -DTRT_INCLUDE_DIR='/usr/local/tensorrt/include' -DBUILD_CORTEX_TENSORRT-LLM=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -G Ninja && cmake --build . --config Release;
-
 endif
 
 # Prepackage the Cortex engine