From 02367736dd0287d8e6422ecc0a9ec86f4289a880 Mon Sep 17 00:00:00 2001 From: Vincent Roseberry Date: Tue, 17 May 2022 20:12:48 +0000 Subject: [PATCH 1/2] Upgrade TensorFlow to 2.6.4 & base image - Upgraded to `m92` base image. - Removed upgrade to `nbconvert` now that base image has the expected version. - Removed workaround for NVIDIA GPG key now that the base image has fixed it. http://b/232964717 --- Dockerfile.tmpl | 11 +---------- config.txt | 2 +- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/Dockerfile.tmpl b/Dockerfile.tmpl index ca751699..386e4ad2 100644 --- a/Dockerfile.tmpl +++ b/Dockerfile.tmpl @@ -26,7 +26,7 @@ RUN ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/lib FROM ${BASE_IMAGE_REPO}/${CPU_BASE_IMAGE_NAME}:${BASE_IMAGE_TAG} {{ end }} # Keep these variables in sync if base image is updated. -ENV TENSORFLOW_VERSION=2.6.2 +ENV TENSORFLOW_VERSION=2.6.4 # We need to redefine the ARG here to get the ARG value defined above the FROM instruction. # See: https://docs.docker.com/engine/reference/builder/#understand-how-arg-and-from-interact @@ -53,13 +53,6 @@ RUN pip uninstall -y horovod && \ /tmp/clean-layer.sh {{ end }} -{{ if eq .Accelerator "gpu" }} -# b/230864778: Temporarily swap the NVIDIA GPG key. Remove once new base image with new GPG key is released. -RUN rm /etc/apt/sources.list.d/cuda.list && \ - apt-key del 7fa2af80 && \ - apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/7fa2af80.pub -{{ end }} - # Use a fixed apt-get repo to stop intermittent failures due to flaky httpredir connections, # as described by Lionel Chan at http://stackoverflow.com/a/37426929/5881346 RUN sed -i "s/httpredir.debian.org/debian.uchicago.edu/" /etc/apt/sources.list && \ @@ -405,8 +398,6 @@ RUN pip install bleach && \ pip install jupyterlab-lsp && \ pip install MarkupSafe && \ pip install mistune && \ - # b/227194111 install latest version of nbconvert until the base image includes nbconvert >= 6.4.5 - pip install --upgrade nbconvert Jinja2 && \ pip install nbformat && \ pip install notebook && \ pip install papermill && \ diff --git a/config.txt b/config.txt index 98078a9a..d736096c 100644 --- a/config.txt +++ b/config.txt @@ -1,5 +1,5 @@ BASE_IMAGE_REPO=gcr.io/deeplearning-platform-release -BASE_IMAGE_TAG=m91 +BASE_IMAGE_TAG=m92 CPU_BASE_IMAGE_NAME=tf2-cpu.2-6 GPU_BASE_IMAGE_NAME=tf2-gpu.2-6 LIGHTGBM_VERSION=3.3.1 From c8aa77f1b2e60012fe89c68624cdfd450f30e603 Mon Sep 17 00:00:00 2001 From: Vincent Roseberry Date: Wed, 18 May 2022 02:57:32 +0000 Subject: [PATCH 2/2] Increase cpu test timeout and fix pyarrow double installation issue --- Dockerfile.tmpl | 5 ++++- Jenkinsfile | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/Dockerfile.tmpl b/Dockerfile.tmpl index 386e4ad2..6a7bb5bf 100644 --- a/Dockerfile.tmpl +++ b/Dockerfile.tmpl @@ -78,7 +78,10 @@ RUN conda config --add channels nvidia && \ /tmp/clean-layer.sh {{ if eq .Accelerator "gpu" }} -RUN conda install cudf=21.10 cuml=21.10 cudatoolkit=$CUDA_MAJOR_VERSION.$CUDA_MINOR_VERSION && \ + +# b/232247930: uninstall pyarrow to avoid double installation with the GPU specific version. +RUN pip uninstall -y pyarrow && \ + conda install cudf=21.10 cuml=21.10 cudatoolkit=$CUDA_MAJOR_VERSION.$CUDA_MINOR_VERSION && \ /tmp/clean-layer.sh {{ end }} diff --git a/Jenkinsfile b/Jenkinsfile index d6708008..8bc9c907 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -93,7 +93,7 @@ pipeline { } stage('Test CPU Image') { options { - timeout(time: 5, unit: 'MINUTES') + timeout(time: 10, unit: 'MINUTES') } steps { sh '''#!/bin/bash