1212# See the License for the specific language governing permissions and
1313# limitations under the License.
1414
15- ARG TRITON_VERSION=22.12
15+ ARG TRITON_VERSION=23.05
1616ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:${TRITON_VERSION}-py3
1717FROM ${BASE_IMAGE}
1818
19- RUN apt-get update
20- RUN apt-get install -y --no-install-recommends \
19+ RUN apt-get update && \
20+ apt-get install -y --no-install-recommends \
2121 autoconf \
2222 autogen \
2323 clangd \
@@ -35,34 +35,34 @@ RUN apt-get install -y --no-install-recommends \
3535 unzip \
3636 zstd \
3737 zip \
38- zsh
39- RUN pip3 install torch==1.12.1+cu116 -f https://download.pytorch.org/whl/torch_stable.html && \
40- pip3 install --extra-index-url https://pypi.ngc.nvidia.com regex fire tritonclient[all] && \
41- pip3 install transformers huggingface_hub tokenizers SentencePiece sacrebleu datasets tqdm omegaconf rouge_score && \
42- pip3 install cmake==3.24.3
43-
44- RUN apt-get clean && \
38+ zsh && \
39+ apt-get clean && \
4540 rm -rf /var/lib/apt/lists/*
4641
42+ RUN pip3 install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cu118 torch==2.0.1+cu118 && \
43+ pip3 install --no-cache-dir --extra-index-url https://pypi.ngc.nvidia.com regex fire tritonclient[all] && \
44+ pip3 install --no-cache-dir accelerate transformers huggingface_hub tokenizers SentencePiece sacrebleu datasets tqdm omegaconf rouge_score && \
45+ pip3 install --no-cache-dir cmake==3.24.3
46+
4747# backend build
4848ADD . /workspace/build/fastertransformer_backend
4949RUN mkdir -p /workspace/build/fastertransformer_backend/build
5050
5151WORKDIR /workspace/build/fastertransformer_backend/build
5252ARG FORCE_BACKEND_REBUILD=0
53- RUN cmake \
53+ RUN CUDAFLAGS= "-include stdio.h" cmake \
5454 -D CMAKE_EXPORT_COMPILE_COMMANDS=1 \
5555 -D CMAKE_BUILD_TYPE=Release \
5656 -D ENABLE_FP8=OFF \
5757 -D CMAKE_INSTALL_PREFIX=/opt/tritonserver \
5858 -D TRITON_COMMON_REPO_TAG="r${NVIDIA_TRITON_SERVER_VERSION}" \
5959 -D TRITON_CORE_REPO_TAG="r${NVIDIA_TRITON_SERVER_VERSION}" \
6060 -D TRITON_BACKEND_REPO_TAG="r${NVIDIA_TRITON_SERVER_VERSION}" \
61- ..
62- RUN cd _deps/repo-ft-src/ && \
61+ .. && \
62+ cd _deps/repo-ft-src/ && \
6363 git log | head -n 3 2>&1 | tee /workspace/build/fastertransformer_backend/FT_version.txt && \
6464 cd /workspace/build/fastertransformer_backend/build && \
65- make -j"$(grep -c ^processor /proc/cpuinfo)" install && \
65+ CUDAFLAGS= "-include stdio.h" make -O -j"$(grep -c ^processor /proc/cpuinfo)" install && \
6666 rm /workspace/build/fastertransformer_backend/build/bin/*_example -rf && \
6767 rm /workspace/build/fastertransformer_backend/build/lib/lib*Backend.so -rf
6868
0 commit comments