diff --git a/dockers/base-conda/Dockerfile b/dockers/base-conda/Dockerfile index 85d5ed345af10..280ab0687ffed 100644 --- a/dockers/base-conda/Dockerfile +++ b/dockers/base-conda/Dockerfile @@ -123,7 +123,7 @@ ENV \ HOROVOD_WITHOUT_TENSORFLOW=1 \ HOROVOD_WITHOUT_MXNET=1 \ HOROVOD_WITH_GLOO=1 \ - HOROVOD_WITHOUT_MPI=1 + HOROVOD_WITH_MPI=1 RUN \ HOROVOD_BUILD_CUDA_CC_LIST=${TORCH_CUDA_ARCH_LIST//";"/","} && \ @@ -154,4 +154,5 @@ RUN \ pip list && \ python -c "import sys; ver = sys.version_info ; assert f'{ver.major}.{ver.minor}' == '$PYTHON_VERSION', ver" && \ python -c "import torch; assert torch.__version__.startswith('$PYTORCH_VERSION'), torch.__version__" && \ - python -c "import horovod.torch" + python -c "import horovod.torch" && \ + python -c "from horovod.torch import nccl_built; nccl_built()" diff --git a/dockers/base-cuda/Dockerfile b/dockers/base-cuda/Dockerfile index 739ff591eb062..bf57fb91025fc 100644 --- a/dockers/base-cuda/Dockerfile +++ b/dockers/base-cuda/Dockerfile @@ -83,21 +83,21 @@ RUN \ python ./requirements/adjust-versions.py requirements/extra.txt ${PYTORCH_VERSION} && \ python ./requirements/adjust-versions.py requirements/examples.txt ${PYTORCH_VERSION} && \ python -c "print(' '.join([ln for ln in open('requirements/extra.txt').readlines() if 'horovod' in ln]))" > ./requirements/horovod.txt && \ - python assistant.py requirements_prune_pkgs requirements/examples.txt "horovod" && \ + python assistant.py requirements_prune_pkgs requirements/extra.txt "horovod" && \ # Install all requirements \ pip install -r requirements/devel.txt --no-cache-dir --find-links https://download.pytorch.org/whl/cu${CUDA_VERSION_MM}/torch_stable.html && \ rm -rf requirements.* && \ rm assistant.py RUN \ - apt-get purge -y cmake && \ - wget -q https://github.com/Kitware/CMake/releases/download/v3.20.2/cmake-3.20.2.tar.gz && \ - tar -zxvf cmake-3.20.2.tar.gz && \ - cd cmake-3.20.2 && \ - ./bootstrap -- -DCMAKE_USE_OPENSSL=OFF && \ - make && \ - make install && \ - cmake --version + apt-get purge -y cmake && \ + wget -q https://github.com/Kitware/CMake/releases/download/v3.20.2/cmake-3.20.2.tar.gz && \ + tar -zxvf cmake-3.20.2.tar.gz && \ + cd cmake-3.20.2 && \ + ./bootstrap -- -DCMAKE_USE_OPENSSL=OFF && \ + make && \ + make install && \ + cmake --version ENV \ HOROVOD_CUDA_HOME=$CUDA_TOOLKIT_ROOT_DIR \ @@ -106,15 +106,15 @@ ENV \ HOROVOD_WITHOUT_TENSORFLOW=1 \ HOROVOD_WITHOUT_MXNET=1 \ HOROVOD_WITH_GLOO=1 \ - HOROVOD_WITHOUT_MPI=1 + HOROVOD_WITH_MPI=1 RUN \ - HOROVOD_BUILD_CUDA_CC_LIST=${TORCH_CUDA_ARCH_LIST//";"/","} && \ - export HOROVOD_BUILD_CUDA_CC_LIST=${HOROVOD_BUILD_CUDA_CC_LIST//"."/""} && \ - cat ./requirements/horovod.txt && \ - cmake --version && \ - pip install --no-cache-dir -r ./requirements/horovod.txt && \ - rm -rf requirements/ + HOROVOD_BUILD_CUDA_CC_LIST=${TORCH_CUDA_ARCH_LIST//";"/","} && \ + export HOROVOD_BUILD_CUDA_CC_LIST=${HOROVOD_BUILD_CUDA_CC_LIST//"."/""} && \ + cat ./requirements/horovod.txt && \ + cmake --version && \ + pip install --no-cache-dir -r ./requirements/horovod.txt && \ + rm -rf requirements/ RUN \ CUDA_VERSION_MAJOR=$(python -c "import torch; print(torch.version.cuda.split('.')[0])") && \ @@ -147,4 +147,5 @@ RUN \ pip list && \ python -c "import sys; ver = sys.version_info ; assert f'{ver.major}.{ver.minor}' == '$PYTHON_VERSION', ver" && \ python -c "import torch; assert torch.__version__.startswith('$PYTORCH_VERSION'), torch.__version__" && \ - python -c "import horovod.torch" + python -c "import horovod.torch" && \ + python -c "from horovod.torch import nccl_built; nccl_built()"