Skip to content

Commit 9171e0a

Browse files
committed
Upgrade to PyTorch 1.8.1 & TensorFlow 2.5.0
And CUDA 11.2 http://b/181966788
1 parent bc02b80 commit 9171e0a

File tree

2 files changed

+30
-24
lines changed

2 files changed

+30
-24
lines changed

Dockerfile

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
ARG BASE_TAG=m75
2-
ARG TENSORFLOW_VERSION=2.4.1
2+
ARG TENSORFLOW_VERSION=2.5.0
33

44
FROM gcr.io/deeplearning-platform-release/base-cpu:${BASE_TAG}
55

@@ -40,16 +40,15 @@ ENV PROJ_LIB=/opt/conda/share/proj
4040
# Using the same global consistent ordered list of channels
4141
RUN conda config --add channels conda-forge && \
4242
conda config --add channels nvidia && \
43-
conda config --add channels pytorch && \
4443
conda config --add channels rapidsai && \
4544
# ^ rapidsai is the highest priority channel, default lowest, conda-forge 2nd lowest.
46-
# b/182405233 pyproj 3.x is not compatible with basemap 1.2.1
4745
# b/161473620#comment7 pin required to prevent resolver from picking pysal 1.x., pysal 2.2.x is also downloading data on import.
4846
conda install basemap cartopy imagemagick pyproj "pysal==2.1.0" && \
49-
conda install "pytorch=1.7" "torchvision=0.8" "torchaudio=0.7" "torchtext=0.8" cpuonly && \
5047
/tmp/clean-layer.sh
5148

52-
# The anaconda base image includes outdated versions of these packages. Update them to include the latest version.
49+
RUN pip install torch==1.8.1+cpu torchvision==0.9.1+cpu torchaudio==0.8.1 torchtext==0.9.1 -f https://download.pytorch.org/whl/torch_stable.html && \
50+
/tmp/clean-layer.sh
51+
5352
RUN pip install seaborn python-dateutil dask python-igraph && \
5453
pip install pyyaml joblib husl geopy ml_metrics mne pyshp && \
5554
pip install pandas && \
@@ -60,8 +59,8 @@ RUN pip install seaborn python-dateutil dask python-igraph && \
6059
/tmp/clean-layer.sh
6160

6261
RUN pip install tensorflow==${TENSORFLOW_VERSION} && \
63-
pip install tensorflow-gcs-config==2.4.0 && \
64-
pip install tensorflow-addons==0.12.1 && \
62+
pip install tensorflow-gcs-config==${TENSORFLOW_VERSION} && \
63+
pip install tensorflow-addons==0.13.0 && \
6564
/tmp/clean-layer.sh
6665

6766
RUN apt-get install -y libfreetype6-dev && \
@@ -329,8 +328,7 @@ RUN pip install bleach && \
329328
pip install widgetsnbextension && \
330329
pip install pyarrow && \
331330
pip install feather-format && \
332-
# fastai >= 2.3.1 upgrades pytorch/torchvision. upgrade of pytorch will be handled in b/181966788
333-
pip install fastai==2.2.7 && \
331+
pip install fastai && \
334332
pip install allennlp && \
335333
# https://b.corp.google.com/issues/184685619#comment9: 3.9.0 is causing a major performance degradation with spacy 2.3.5
336334
pip install importlib-metadata==3.4.0 && \

gpu.Dockerfile

Lines changed: 23 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
ARG BASE_TAG=staging
22

3-
FROM nvidia/cuda:11.0-cudnn8-devel-ubuntu18.04 AS nvidia
3+
FROM nvidia/cuda:11.2.2-cudnn8-devel-ubuntu18.04 AS nvidia
44
FROM gcr.io/kaggle-images/python:${BASE_TAG}
55

66
ADD clean-layer.sh /tmp/clean-layer.sh
@@ -13,7 +13,7 @@ COPY --from=nvidia /etc/apt/trusted.gpg /etc/apt/trusted.gpg.d/cuda.gpg
1313
RUN sed -i 's/deb https:\/\/developer.download.nvidia.com/deb http:\/\/developer.download.nvidia.com/' /etc/apt/sources.list.d/*.list
1414

1515
ENV CUDA_MAJOR_VERSION=11
16-
ENV CUDA_MINOR_VERSION=0
16+
ENV CUDA_MINOR_VERSION=2
1717
ENV CUDA_VERSION=$CUDA_MAJOR_VERSION.$CUDA_MINOR_VERSION
1818
LABEL com.nvidia.volumes.needed="nvidia_driver"
1919
LABEL com.nvidia.cuda.version="${CUDA_VERSION}"
@@ -27,7 +27,9 @@ ENV LD_LIBRARY_PATH_NO_STUBS="/usr/local/nvidia/lib64:/usr/local/cuda/lib64:$LD_
2727
ENV LD_LIBRARY_PATH="/usr/local/nvidia/lib64:/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH"
2828
ENV NVIDIA_VISIBLE_DEVICES=all
2929
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
30-
ENV NVIDIA_REQUIRE_CUDA="cuda>=$CUDA_MAJOR_VERSION.$CUDA_MINOR_VERSION"
30+
# With CUDA enhanced compatibility, applications that were compiled with CUDA 11.1 can be run on the driver associated with CUDA 11.0 (i.e. R450).
31+
# See: https://docs.nvidia.com/deploy/cuda-compatibility/index.html#existing-apps-minor-versions
32+
ENV NVIDIA_REQUIRE_CUDA="cuda>=$CUDA_MAJOR_VERSION"
3133
RUN apt-get update && apt-get install -y --no-install-recommends \
3234
cuda-cupti-$CUDA_VERSION \
3335
cuda-cudart-$CUDA_VERSION \
@@ -37,10 +39,10 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
3739
cuda-nvml-dev-$CUDA_VERSION \
3840
cuda-minimal-build-$CUDA_VERSION \
3941
cuda-command-line-tools-$CUDA_VERSION \
40-
libcudnn8=8.0.4.30-1+cuda$CUDA_VERSION \
41-
libcudnn8-dev=8.0.4.30-1+cuda$CUDA_VERSION \
42-
libnccl2=2.7.8-1+cuda$CUDA_VERSION \
43-
libnccl-dev=2.7.8-1+cuda$CUDA_VERSION && \
42+
libcudnn8=8.1.1.33-1+cuda$CUDA_VERSION \
43+
libcudnn8-dev=8.1.1.33-1+cuda$CUDA_VERSION \
44+
libnccl2=2.8.4-1+cuda$CUDA_VERSION \
45+
libnccl-dev=2.8.4-1+cuda$CUDA_VERSION && \
4446
ln -s /usr/local/cuda-$CUDA_VERSION /usr/local/cuda && \
4547
ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 && \
4648
/tmp/clean-layer.sh
@@ -55,12 +57,18 @@ RUN apt-get install -y ocl-icd-libopencl1 clinfo libboost-all-dev && \
5557
# the remaining pip commands: https://www.anaconda.com/using-pip-in-a-conda-environment/
5658
# However, because this image is based on the CPU image, this isn't possible but better
5759
# to put them at the top of this file to minize conflicts.
58-
RUN conda remove --force -y pytorch torchvision torchaudio torchtext cpuonly && \
59-
conda install "pytorch=1.7" "torchvision=0.8" "torchaudio=0.7" "torchtext=0.8" cudatoolkit=$CUDA_VERSION && \
60-
conda install "cudf=21.06" "cuml=21.06" && \
60+
RUN conda install "cudf=21.06" "cuml=21.06" && \
6161
/tmp/clean-layer.sh
6262

63-
# Install LightGBM with GPU
63+
# Install Pytorch and torchvision with GPU support.
64+
# Note: torchtext and torchaudio do not require a separate package.
65+
# Replace `cu111` by `cu$CUDA_MAJOR_VERSION$CUDA_MINOR_VERSION` once build for CUDA 11.2 is released.
66+
# Introduced in CUDA 11.1, CUDA Enhanced Compatibility leverages semantic versioning across components in the CUDA Toolkit, an application can be built for one CUDA minor release (such as 11.1) and work across all future minor releases within the major family (such as 11.x).
67+
# See: https://docs.nvidia.com/deploy/cuda-compatibility/index.html#overview
68+
RUN pip install torch==1.8.1+cu111 torchvision==0.9.1+cu111 -f https://download.pytorch.org/whl/torch_stable.html && \
69+
/tmp/clean-layer.sh
70+
71+
# Install LightGBM with GPU support
6472
RUN pip uninstall -y lightgbm && \
6573
cd /usr/local/src && \
6674
git clone --recursive https://github.com/microsoft/LightGBM && \
@@ -76,7 +84,8 @@ RUN pip uninstall -y lightgbm && \
7684
/tmp/clean-layer.sh
7785

7886
# Install JAX (Keep JAX version in sync with CPU image)
79-
RUN pip install jax==0.2.16 jaxlib==0.1.68+cuda$CUDA_MAJOR_VERSION$CUDA_MINOR_VERSION -f https://storage.googleapis.com/jax-releases/jax_releases.html && \
87+
# TODO(b/181966788) Replace `cuda111` with `cuda$CUDA_MAJOR_VERSION$CUDA_MINOR_VERSION` once new version is out.
88+
RUN pip install jax==0.2.16 jaxlib==0.1.68+cuda111 -f https://storage.googleapis.com/jax-releases/jax_releases.html && \
8089
/tmp/clean-layer.sh
8190

8291
# Reinstall packages with a separate version for GPU support.
@@ -87,9 +96,8 @@ RUN pip uninstall -y mxnet && \
8796
# Install GPU-only packages
8897
RUN pip install pycuda && \
8998
pip install pynvrtc && \
90-
# b/190622765 latest version is causing issue. nnabla fixed it in https://github.com/sony/nnabla/issues/892, waiting for new release before we can remove this pin.
91-
pip install pynvml==8.0.4 && \
92-
pip install nnabla-ext-cuda$CUDA_MAJOR_VERSION$CUDA_MINOR_VERSION && \
99+
# TODO(b/181966788) Replace `110` with `$CUDA_MAJOR_VERSION$CUDA_MINOR_VERSION` once new version of mxnet is out.
100+
pip install nnabla-ext-cuda110 && \
93101
/tmp/clean-layer.sh
94102

95103
# Re-add TensorBoard Jupyter extension patch

0 commit comments

Comments
 (0)