From f10acbaedf81ee0549c7a6c2db71227a8ec7a0ad Mon Sep 17 00:00:00 2001 From: ydcjeff Date: Thu, 5 Nov 2020 22:03:19 +0630 Subject: [PATCH 1/9] [dockers] install nvidia-dali-cuda100 --- dockers/base-conda/Dockerfile | 2 ++ dockers/base-cuda/Dockerfile | 2 ++ 2 files changed, 4 insertions(+) diff --git a/dockers/base-conda/Dockerfile b/dockers/base-conda/Dockerfile index d11e61d92edbd..46bd13b093d0d 100644 --- a/dockers/base-conda/Dockerfile +++ b/dockers/base-conda/Dockerfile @@ -30,6 +30,7 @@ ARG PYTHON_VERSION=3.7 ARG PYTORCH_VERSION=1.6 ARG PYTORCH_CHANNEL=pytorch ARG CONDA_VERSION=4.7.12 +ARG NVIDIA_DALI_VERSION=100 SHELL ["/bin/bash", "-c"] @@ -104,6 +105,7 @@ RUN \ # Install remaining requirements pip install -r requirements-extra.txt --upgrade-strategy only-if-needed && \ pip install -r requirements-test.txt --upgrade-strategy only-if-needed && \ + pip install --extra-index-url https://developer.download.nvidia.com/compute/redist nvidia-dali-cuda${NVIDIA_DALI_VERSION} && \ rm requirements* RUN \ diff --git a/dockers/base-cuda/Dockerfile b/dockers/base-cuda/Dockerfile index e22b5a862a7d7..7767f486d5a53 100644 --- a/dockers/base-cuda/Dockerfile +++ b/dockers/base-cuda/Dockerfile @@ -28,6 +28,7 @@ FROM nvidia/cuda:${CUDA_VERSION}-cudnn${CUDNN_VERSION}-devel-ubuntu18.04 ARG PYTHON_VERSION=3.7 ARG PYTORCH_VERSION=1.6 +ARG NVIDIA_DALI_VERSION=100 SHELL ["/bin/bash", "-c"] # https://techoverflow.net/2019/05/18/how-to-fix-configuring-tzdata-interactive-input-when-building-docker-images/ @@ -93,6 +94,7 @@ RUN \ # Install all requirements pip install -r requirements/devel.txt --upgrade-strategy only-if-needed --use-feature=2020-resolver && \ + pip install --extra-index-url https://developer.download.nvidia.com/compute/redist nvidia-dali-cuda${NVIDIA_DALI_VERSION} && \ rm -rf requirements* RUN \ From ce23eb885ef241cc298e9f70b0323a9270b235ea Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Thu, 5 Nov 2020 19:38:25 +0100 Subject: [PATCH 2/9] Apply suggestions from code review --- dockers/base-conda/Dockerfile | 5 ++--- dockers/base-cuda/Dockerfile | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/dockers/base-conda/Dockerfile b/dockers/base-conda/Dockerfile index 46bd13b093d0d..0dbbf587889fc 100644 --- a/dockers/base-conda/Dockerfile +++ b/dockers/base-conda/Dockerfile @@ -30,7 +30,6 @@ ARG PYTHON_VERSION=3.7 ARG PYTORCH_VERSION=1.6 ARG PYTORCH_CHANNEL=pytorch ARG CONDA_VERSION=4.7.12 -ARG NVIDIA_DALI_VERSION=100 SHELL ["/bin/bash", "-c"] @@ -105,7 +104,7 @@ RUN \ # Install remaining requirements pip install -r requirements-extra.txt --upgrade-strategy only-if-needed && \ pip install -r requirements-test.txt --upgrade-strategy only-if-needed && \ - pip install --extra-index-url https://developer.download.nvidia.com/compute/redist nvidia-dali-cuda${NVIDIA_DALI_VERSION} && \ + pip install --extra-index-url https://developer.download.nvidia.com/compute/redist nvidia-dali-cuda${CUDA_VERSION/./} && \ rm requirements* RUN \ @@ -120,4 +119,4 @@ RUN \ conda info && \ pip list && \ python -c "import sys; assert sys.version[:3] == '$PYTHON_VERSION', sys.version" && \ - python -c "import torch; assert torch.__version__[:3] == '$PYTORCH_VERSION', torch.__version__" \ No newline at end of file + python -c "import torch; assert torch.__version__[:3] == '$PYTORCH_VERSION', torch.__version__" diff --git a/dockers/base-cuda/Dockerfile b/dockers/base-cuda/Dockerfile index 7767f486d5a53..e2fa08230e73e 100644 --- a/dockers/base-cuda/Dockerfile +++ b/dockers/base-cuda/Dockerfile @@ -28,7 +28,6 @@ FROM nvidia/cuda:${CUDA_VERSION}-cudnn${CUDNN_VERSION}-devel-ubuntu18.04 ARG PYTHON_VERSION=3.7 ARG PYTORCH_VERSION=1.6 -ARG NVIDIA_DALI_VERSION=100 SHELL ["/bin/bash", "-c"] # https://techoverflow.net/2019/05/18/how-to-fix-configuring-tzdata-interactive-input-when-building-docker-images/ @@ -94,7 +93,7 @@ RUN \ # Install all requirements pip install -r requirements/devel.txt --upgrade-strategy only-if-needed --use-feature=2020-resolver && \ - pip install --extra-index-url https://developer.download.nvidia.com/compute/redist nvidia-dali-cuda${NVIDIA_DALI_VERSION} && \ + pip install --extra-index-url https://developer.download.nvidia.com/compute/redist nvidia-dali-cuda${CUDA_VERSION/./} && \ rm -rf requirements* RUN \ @@ -108,4 +107,4 @@ RUN \ pip --version && \ pip list && \ python -c "import sys; assert sys.version[:3] == '$PYTHON_VERSION', sys.version" && \ - python -c "import torch; assert torch.__version__[:3] == '$PYTORCH_VERSION', torch.__version__" \ No newline at end of file + python -c "import torch; assert torch.__version__[:3] == '$PYTORCH_VERSION', torch.__version__" From 684c4c652f31078e3279559ab27e3830dc73e543 Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Thu, 5 Nov 2020 22:11:50 +0100 Subject: [PATCH 3/9] build DALI --- dockers/base-cuda/Dockerfile | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/dockers/base-cuda/Dockerfile b/dockers/base-cuda/Dockerfile index e2fa08230e73e..30fae55592b94 100644 --- a/dockers/base-cuda/Dockerfile +++ b/dockers/base-cuda/Dockerfile @@ -102,6 +102,20 @@ RUN \ pip install --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" ./apex && \ rm -rf apex +RUN \ + # Get DALI source code: + git clone --recursive https://github.com/NVIDIA/DALI + cd DALI + # Create a directory for CMake-generated Makefiles. This will be the directory, that DALI’s built in. + mkdir build + cd build + # Run CMake. For additional options you can pass to CMake, refer to Optional CMake build parameters. + cmake -D CMAKE_BUILD_TYPE=Release .. + # Build. You can use -j option to execute it in several threads + make -j"$(nproc)" + # In order to run DALI using Python API, you need to install Python bindings + pip install dali/python + RUN \ # Show what we have pip --version && \ From 4b89bf7254dc4f0d95dd9dc7c97bd1c8f8327473 Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Thu, 5 Nov 2020 22:14:53 +0100 Subject: [PATCH 4/9] build DALI --- dockers/base-cuda/Dockerfile | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/dockers/base-cuda/Dockerfile b/dockers/base-cuda/Dockerfile index 30fae55592b94..0ea645b70170a 100644 --- a/dockers/base-cuda/Dockerfile +++ b/dockers/base-cuda/Dockerfile @@ -104,17 +104,20 @@ RUN \ RUN \ # Get DALI source code: - git clone --recursive https://github.com/NVIDIA/DALI - cd DALI + git clone --recursive https://github.com/NVIDIA/DALI && \ + cd DALI && \ # Create a directory for CMake-generated Makefiles. This will be the directory, that DALI’s built in. - mkdir build - cd build + mkdir build && \ + cd build && \ # Run CMake. For additional options you can pass to CMake, refer to Optional CMake build parameters. - cmake -D CMAKE_BUILD_TYPE=Release .. + cmake -D CMAKE_BUILD_TYPE=Release .. && \ # Build. You can use -j option to execute it in several threads - make -j"$(nproc)" + make -j"$(nproc)" && \ # In order to run DALI using Python API, you need to install Python bindings - pip install dali/python + pip install dali/python && \ + # claan-up + cd ../.. && \ + rm -rf DALI RUN \ # Show what we have From 05c31a19f1f06d9bffbf46be0e5090b39fbf03ba Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Fri, 6 Nov 2020 01:17:11 +0100 Subject: [PATCH 5/9] build DALI --- dockers/README.md | 6 +++--- dockers/base-conda/Dockerfile | 3 ++- dockers/base-cuda/Dockerfile | 37 +++++++++++++++++++++++++++++++---- 3 files changed, 38 insertions(+), 8 deletions(-) diff --git a/dockers/README.md b/dockers/README.md index 73c40635eb0a5..aab82a171641a 100644 --- a/dockers/README.md +++ b/dockers/README.md @@ -14,10 +14,10 @@ or with specific arguments ```bash git clone docker image build \ - -t pytorch-lightning:py3.8 \ - -f dockers/conda/Dockerfile \ + -t pytorch-lightning:py3.8-pt1.6 \ + -f dockers/base-cuda/Dockerfile \ --build-arg PYTHON_VERSION=3.8 \ - --build-arg PYTORCH_VERSION=1.4 \ + --build-arg PYTORCH_VERSION=1.6 \ . ``` diff --git a/dockers/base-conda/Dockerfile b/dockers/base-conda/Dockerfile index 0dbbf587889fc..060b2c66b0331 100644 --- a/dockers/base-conda/Dockerfile +++ b/dockers/base-conda/Dockerfile @@ -104,7 +104,6 @@ RUN \ # Install remaining requirements pip install -r requirements-extra.txt --upgrade-strategy only-if-needed && \ pip install -r requirements-test.txt --upgrade-strategy only-if-needed && \ - pip install --extra-index-url https://developer.download.nvidia.com/compute/redist nvidia-dali-cuda${CUDA_VERSION/./} && \ rm requirements* RUN \ @@ -113,6 +112,8 @@ RUN \ pip install --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" ./apex && \ rm -rf apex +# todo: add DALI + RUN \ # Show what we have pip --version && \ diff --git a/dockers/base-cuda/Dockerfile b/dockers/base-cuda/Dockerfile index 0ea645b70170a..0ace83a2bd6dc 100644 --- a/dockers/base-cuda/Dockerfile +++ b/dockers/base-cuda/Dockerfile @@ -37,7 +37,7 @@ ENV TZ=Europe/Prague ENV PATH="$PATH:/root/.local/bin" ENV CUDA_TOOLKIT_ROOT_DIR="/usr/local/cuda" -RUN apt-get update && \ +RUN apt-get update -qq && \ apt-get install -y --no-install-recommends \ build-essential \ pkg-config \ @@ -93,7 +93,6 @@ RUN \ # Install all requirements pip install -r requirements/devel.txt --upgrade-strategy only-if-needed --use-feature=2020-resolver && \ - pip install --extra-index-url https://developer.download.nvidia.com/compute/redist nvidia-dali-cuda${CUDA_VERSION/./} && \ rm -rf requirements* RUN \ @@ -102,7 +101,25 @@ RUN \ pip install --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" ./apex && \ rm -rf apex +ENV CMAKE_VERSION=3.16.9 + +RUN \ + # need to install cmake > 3.13 for DALI build + apt-get remove cmake -y && \ + wget https://cmake.org/files/v${CMAKE_VERSION%.*}/cmake-${CMAKE_VERSION}-Linux-x86_64.sh --progress=bar:force:noscroll && \ + bash cmake-${CMAKE_VERSION}-Linux-x86_64.sh --skip-license && \ + rm cmake-${CMAKE_VERSION}-Linux-x86_64.sh && \ + cmake --version + RUN \ + apt-get update -qq && \ + apt-get install -y \ + libopencv-dev \ + libsndfile-dev \ + libavfilter-dev \ + libprotobuf-dev \ + protobuf-compiler \ + && \ # Get DALI source code: git clone --recursive https://github.com/NVIDIA/DALI && \ cd DALI && \ @@ -110,14 +127,26 @@ RUN \ mkdir build && \ cd build && \ # Run CMake. For additional options you can pass to CMake, refer to Optional CMake build parameters. - cmake -D CMAKE_BUILD_TYPE=Release .. && \ + cmake \ + -D CMAKE_BUILD_TYPE=Release \ + -D CMAKE_CXX_STANDARD=14 \ + .. && \ # Build. You can use -j option to execute it in several threads make -j"$(nproc)" && \ # In order to run DALI using Python API, you need to install Python bindings pip install dali/python && \ # claan-up cd ../.. && \ - rm -rf DALI + rm -rf DALI && \ + apt-get remove -y \ + libopencv-dev \ + libsndfile-dev \ + libavfilter-dev \ + libprotobuf-dev \ + protobuf-compiler \ + && \ + apt-get autoremove -y && \ + apt-get clean && \ RUN \ # Show what we have From a92b06904eef994d7d8cd56c87e18b2eb2eac7d8 Mon Sep 17 00:00:00 2001 From: ydcjeff Date: Fri, 6 Nov 2020 19:03:29 +0630 Subject: [PATCH 6/9] dali from source --- dockers/base-cuda/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dockers/base-cuda/Dockerfile b/dockers/base-cuda/Dockerfile index 0ace83a2bd6dc..90db2a64b8021 100644 --- a/dockers/base-cuda/Dockerfile +++ b/dockers/base-cuda/Dockerfile @@ -129,7 +129,6 @@ RUN \ # Run CMake. For additional options you can pass to CMake, refer to Optional CMake build parameters. cmake \ -D CMAKE_BUILD_TYPE=Release \ - -D CMAKE_CXX_STANDARD=14 \ .. && \ # Build. You can use -j option to execute it in several threads make -j"$(nproc)" && \ @@ -152,5 +151,6 @@ RUN \ # Show what we have pip --version && \ pip list && \ + python -c 'from nvidia.dali.pipeline import Pipeline' && \ python -c "import sys; assert sys.version[:3] == '$PYTHON_VERSION', sys.version" && \ python -c "import torch; assert torch.__version__[:3] == '$PYTORCH_VERSION', torch.__version__" From 57c5f107ad80ef51573a4e1ca23ab57078e5b61c Mon Sep 17 00:00:00 2001 From: ydcjeff Date: Fri, 6 Nov 2020 20:50:05 +0630 Subject: [PATCH 7/9] dali from source --- dockers/base-cuda/Dockerfile | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/dockers/base-cuda/Dockerfile b/dockers/base-cuda/Dockerfile index 90db2a64b8021..d0a359156a85b 100644 --- a/dockers/base-cuda/Dockerfile +++ b/dockers/base-cuda/Dockerfile @@ -28,6 +28,7 @@ FROM nvidia/cuda:${CUDA_VERSION}-cudnn${CUDNN_VERSION}-devel-ubuntu18.04 ARG PYTHON_VERSION=3.7 ARG PYTORCH_VERSION=1.6 +ARG CMAKE_VERSION=3.18.4 SHELL ["/bin/bash", "-c"] # https://techoverflow.net/2019/05/18/how-to-fix-configuring-tzdata-interactive-input-when-building-docker-images/ @@ -126,10 +127,12 @@ RUN \ # Create a directory for CMake-generated Makefiles. This will be the directory, that DALI’s built in. mkdir build && \ cd build && \ + wget https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-Linux-x86_64.sh && \ + bash cmake-${CMAKE_VERSION}-Linux-x86_64.sh --skip-license && \ + rm cmake-${CMAKE_VERSION}-Linux-x86_64.sh && \ + bin/cmake --version && \ # Run CMake. For additional options you can pass to CMake, refer to Optional CMake build parameters. - cmake \ - -D CMAKE_BUILD_TYPE=Release \ - .. && \ + bin/cmake -D CMAKE_BUILD_TYPE=Release .. && \ # Build. You can use -j option to execute it in several threads make -j"$(nproc)" && \ # In order to run DALI using Python API, you need to install Python bindings From c160d9d306b22ff5b0dba1b6cb8c0946f0bedf76 Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Fri, 6 Nov 2020 18:33:16 +0100 Subject: [PATCH 8/9] use binaries --- dockers/base-conda/Dockerfile | 3 +-- dockers/base-cuda/Dockerfile | 49 +---------------------------------- 2 files changed, 2 insertions(+), 50 deletions(-) diff --git a/dockers/base-conda/Dockerfile b/dockers/base-conda/Dockerfile index 060b2c66b0331..281cab911eb0d 100644 --- a/dockers/base-conda/Dockerfile +++ b/dockers/base-conda/Dockerfile @@ -104,6 +104,7 @@ RUN \ # Install remaining requirements pip install -r requirements-extra.txt --upgrade-strategy only-if-needed && \ pip install -r requirements-test.txt --upgrade-strategy only-if-needed && \ + pip install --extra-index-url https://developer.download.nvidia.com/compute/redist nvidia-dali-cuda${CUDA_VERSION%%.*}0 && \ rm requirements* RUN \ @@ -112,8 +113,6 @@ RUN \ pip install --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" ./apex && \ rm -rf apex -# todo: add DALI - RUN \ # Show what we have pip --version && \ diff --git a/dockers/base-cuda/Dockerfile b/dockers/base-cuda/Dockerfile index d0a359156a85b..f886ccc30be7a 100644 --- a/dockers/base-cuda/Dockerfile +++ b/dockers/base-cuda/Dockerfile @@ -94,6 +94,7 @@ RUN \ # Install all requirements pip install -r requirements/devel.txt --upgrade-strategy only-if-needed --use-feature=2020-resolver && \ + pip install --extra-index-url https://developer.download.nvidia.com/compute/redist nvidia-dali-cuda${CUDA_VERSION%%.*}0 && \ rm -rf requirements* RUN \ @@ -102,54 +103,6 @@ RUN \ pip install --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" ./apex && \ rm -rf apex -ENV CMAKE_VERSION=3.16.9 - -RUN \ - # need to install cmake > 3.13 for DALI build - apt-get remove cmake -y && \ - wget https://cmake.org/files/v${CMAKE_VERSION%.*}/cmake-${CMAKE_VERSION}-Linux-x86_64.sh --progress=bar:force:noscroll && \ - bash cmake-${CMAKE_VERSION}-Linux-x86_64.sh --skip-license && \ - rm cmake-${CMAKE_VERSION}-Linux-x86_64.sh && \ - cmake --version - -RUN \ - apt-get update -qq && \ - apt-get install -y \ - libopencv-dev \ - libsndfile-dev \ - libavfilter-dev \ - libprotobuf-dev \ - protobuf-compiler \ - && \ - # Get DALI source code: - git clone --recursive https://github.com/NVIDIA/DALI && \ - cd DALI && \ - # Create a directory for CMake-generated Makefiles. This will be the directory, that DALI’s built in. - mkdir build && \ - cd build && \ - wget https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-Linux-x86_64.sh && \ - bash cmake-${CMAKE_VERSION}-Linux-x86_64.sh --skip-license && \ - rm cmake-${CMAKE_VERSION}-Linux-x86_64.sh && \ - bin/cmake --version && \ - # Run CMake. For additional options you can pass to CMake, refer to Optional CMake build parameters. - bin/cmake -D CMAKE_BUILD_TYPE=Release .. && \ - # Build. You can use -j option to execute it in several threads - make -j"$(nproc)" && \ - # In order to run DALI using Python API, you need to install Python bindings - pip install dali/python && \ - # claan-up - cd ../.. && \ - rm -rf DALI && \ - apt-get remove -y \ - libopencv-dev \ - libsndfile-dev \ - libavfilter-dev \ - libprotobuf-dev \ - protobuf-compiler \ - && \ - apt-get autoremove -y && \ - apt-get clean && \ - RUN \ # Show what we have pip --version && \ From aaaa55fda76858d68f2a1a0625999bde94518e9a Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Fri, 6 Nov 2020 18:35:22 +0100 Subject: [PATCH 9/9] qq --- dockers/base-conda/Dockerfile | 3 ++- dockers/base-xla/Dockerfile | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/dockers/base-conda/Dockerfile b/dockers/base-conda/Dockerfile index 281cab911eb0d..ea8c6bc5d001d 100644 --- a/dockers/base-conda/Dockerfile +++ b/dockers/base-conda/Dockerfile @@ -35,7 +35,8 @@ SHELL ["/bin/bash", "-c"] ENV PATH="$PATH:/root/.local/bin" -RUN apt-get update && apt-get install -y --no-install-recommends \ +RUN apt-get update -qq && \ + apt-get install -y --no-install-recommends \ build-essential \ cmake \ git \ diff --git a/dockers/base-xla/Dockerfile b/dockers/base-xla/Dockerfile index 3eaabade428e6..8eb093295c37b 100644 --- a/dockers/base-xla/Dockerfile +++ b/dockers/base-xla/Dockerfile @@ -31,7 +31,7 @@ ENV CONDA_ENV=lightning # show system inforation RUN lsb_release -a && cat /etc/*-release -RUN apt-get update && \ +RUN apt-get update -qq && \ apt-get install -y --no-install-recommends \ build-essential \ cmake \