From ee5e03585b6e51986c70f3bb2923494654f62624 Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Fri, 26 Mar 2021 12:23:01 +0100 Subject: [PATCH 01/10] use latest --- dockers/nvidia/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dockers/nvidia/Dockerfile b/dockers/nvidia/Dockerfile index 4b04bc9426d4d..6a6e4eb31db1a 100644 --- a/dockers/nvidia/Dockerfile +++ b/dockers/nvidia/Dockerfile @@ -31,7 +31,7 @@ RUN \ mv pytorch-lightning-*/ pytorch-lightning ; \ rm *.zip ; \ fi && \ - pip install ./pytorch-lightning["extra"] --no-cache-dir && \ + pip install ./pytorch-lightning["extra"] -U --no-cache-dir && \ rm -rf pytorch-lightning RUN python --version && \ From 8b7f7321745d65b7891d927c2fb5a1f40e5b3731 Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Fri, 26 Mar 2021 12:49:05 +0100 Subject: [PATCH 02/10] remake --- dockers/nvidia/Dockerfile | 51 +++++++++++++++++++++++++++++++++++---- 1 file changed, 46 insertions(+), 5 deletions(-) diff --git a/dockers/nvidia/Dockerfile b/dockers/nvidia/Dockerfile index 6a6e4eb31db1a..c80c1fa3fb894 100644 --- a/dockers/nvidia/Dockerfile +++ b/dockers/nvidia/Dockerfile @@ -12,18 +12,54 @@ # See the License for the specific language governing permissions and # limitations under the License. -FROM nvcr.io/nvidia/pytorch:21.02-py3 +FROM nvcr.io/nvidia/cuda:11.2.2-runtime-ubuntu20.04 MAINTAINER PyTorchLightning ARG LIGHTNING_VERSION="" +SHELL ["/bin/bash", "-c"] +# https://techoverflow.net/2019/05/18/how-to-fix-configuring-tzdata-interactive-input-when-building-docker-images/ +ENV \ + DEBIAN_FRONTEND=noninteractive \ + TZ=Europe/Prague \ + PATH="$PATH:/root/.local/bin" \ + CUDA_TOOLKIT_ROOT_DIR="/usr/local/cuda" \ + MKL_THREADING_LAYER=GNU + +RUN apt-get update -qq && \ + apt-get install -y --no-install-recommends \ + build-essential \ + python3 \ + python3-distutils \ + python3-dev \ + pkg-config \ + cmake \ + git \ + wget \ + curl \ + unzip \ + ca-certificates \ + software-properties-common \ + libopenmpi-dev \ + && \ + +# Cleaning + apt-get autoremove -y && \ + apt-get clean && \ + rm -rf /root/.cache && \ + rm -rf /var/lib/apt/lists/* && \ + +# Setup PIP + update-alternatives --install /usr/bin/python python /usr/bin/python3 1 && \ + wget https://bootstrap.pypa.io/get-pip.py --progress=bar:force:noscroll --no-check-certificate && \ + python get-pip.py && \ + rm get-pip.py && \ + pip --version + COPY ./ ./pytorch-lightning/ -# install dependencies RUN \ - #conda install "pip>20.1" && \ - pip list | grep torch && \ if [ ! -z "$LIGHTNING_VERSION" ] ; then \ rm -rf pytorch-lightning ; \ wget https://github.com/PyTorchLightning/pytorch-lightning/archive/${LIGHTNING_VERSION}.zip --progress=bar:force:noscroll ; \ @@ -31,7 +67,12 @@ RUN \ mv pytorch-lightning-*/ pytorch-lightning ; \ rm *.zip ; \ fi && \ - pip install ./pytorch-lightning["extra"] -U --no-cache-dir && \ + +# Instalations + python -c "fname = './pytorch-lightning/requirements/extra.txt' ; lines = [line for line in open(fname).readlines() if not line.startswith('horovod')] ; open(fname, 'w').writelines(lines)" && \ + pip install -r ./pytorch-lightning/requirements/extra.txt -U --no-cache-dir && \ + pip install -r ./pytorch-lightning/requirements/examples.txt -U --no-cache-dir && \ + pip install ./pytorch-lightning --no-cache-dir && \ rm -rf pytorch-lightning RUN python --version && \ From 5ab91055562a0f9a6770c16daf0d226e0609e3da Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Fri, 26 Mar 2021 12:57:27 +0100 Subject: [PATCH 03/10] ... --- dockers/nvidia/Dockerfile | 3 --- 1 file changed, 3 deletions(-) diff --git a/dockers/nvidia/Dockerfile b/dockers/nvidia/Dockerfile index c80c1fa3fb894..2d4782073b147 100644 --- a/dockers/nvidia/Dockerfile +++ b/dockers/nvidia/Dockerfile @@ -37,11 +37,8 @@ RUN apt-get update -qq && \ cmake \ git \ wget \ - curl \ unzip \ ca-certificates \ - software-properties-common \ - libopenmpi-dev \ && \ # Cleaning From 20f0a7b3e8c181c2871913776a792a053e0133ad Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Fri, 26 Mar 2021 14:33:24 +0100 Subject: [PATCH 04/10] examples --- azure-pipelines.yml | 5 +++-- dockers/release/Dockerfile | 2 ++ pl_examples/{run_ddp-example.sh => run_ddp-examples.sh} | 0 pl_examples/run_examples-args.sh | 9 +++++++++ 4 files changed, 14 insertions(+), 2 deletions(-) rename pl_examples/{run_ddp-example.sh => run_ddp-examples.sh} (100%) create mode 100644 pl_examples/run_examples-args.sh diff --git a/azure-pipelines.yml b/azure-pipelines.yml index d88a31ae9775a..6bee088ec6d88 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -116,8 +116,9 @@ jobs: - script: | set -e python -m pytest pl_examples -v --maxfail=2 --durations=0 - python setup.py install --user --quiet - bash pl_examples/run_ddp-example.sh + pip install . --user --quiet + bash pl_examples/run_examples-args.sh --gpus 1 + bash pl_examples/run_ddp-examples.sh # cd pl_examples/basic_examples # bash submit_ddp_job.sh # bash submit_ddp2_job.sh diff --git a/dockers/release/Dockerfile b/dockers/release/Dockerfile index 0eec1e41a5a3f..09c136d01fa9c 100644 --- a/dockers/release/Dockerfile +++ b/dockers/release/Dockerfile @@ -25,6 +25,8 @@ COPY ./ ./pytorch-lightning/ # install dependencies RUN \ + mv pytorch-lightning/notebooks . && \ + mv pytorch-lightning/pl_examples . && \ #conda install "pip>20.1" && \ if [ ! -z "$LIGHTNING_VERSION" ] ; then \ rm -rf pytorch-lightning ; \ diff --git a/pl_examples/run_ddp-example.sh b/pl_examples/run_ddp-examples.sh similarity index 100% rename from pl_examples/run_ddp-example.sh rename to pl_examples/run_ddp-examples.sh diff --git a/pl_examples/run_examples-args.sh b/pl_examples/run_examples-args.sh new file mode 100644 index 0000000000000..f3f74e4f2dd98 --- /dev/null +++ b/pl_examples/run_examples-args.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +echo $@ + +ARGS_DEFAULT=" --default_root_dir %(tmpdir)s --max_epochs 1 --batch_size 32 --limit_train_batches 2 --limit_val_batches 2" + +python pl_examples/basic_examples/simple_image_classifier.py ${ARGS_DEFAULT} $@ +python pl_examples/basic_examples/backbone_image_classifier.py ${ARGS_DEFAULT} $@ +python pl_examples/basic_examples/autoencoder.py ${ARGS_DEFAULT} $@ From 547b6be56d7ffbf508b9a53d1f4e353ab244a7f5 Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Fri, 26 Mar 2021 14:39:21 +0100 Subject: [PATCH 05/10] examples --- azure-pipelines.yml | 2 +- pl_examples/run_examples-args.sh | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 6bee088ec6d88..76c8b01c43f19 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -117,7 +117,7 @@ jobs: set -e python -m pytest pl_examples -v --maxfail=2 --durations=0 pip install . --user --quiet - bash pl_examples/run_examples-args.sh --gpus 1 + bash pl_examples/run_examples-args.sh --gpus 1 --max_epochs 1 --batch_size 32 bash pl_examples/run_ddp-examples.sh # cd pl_examples/basic_examples # bash submit_ddp_job.sh diff --git a/pl_examples/run_examples-args.sh b/pl_examples/run_examples-args.sh index f3f74e4f2dd98..5e0dfa3a2b602 100644 --- a/pl_examples/run_examples-args.sh +++ b/pl_examples/run_examples-args.sh @@ -2,8 +2,8 @@ echo $@ -ARGS_DEFAULT=" --default_root_dir %(tmpdir)s --max_epochs 1 --batch_size 32 --limit_train_batches 2 --limit_val_batches 2" +python pl_examples/basic_examples/simple_image_classifier.py $@ -python pl_examples/basic_examples/simple_image_classifier.py ${ARGS_DEFAULT} $@ -python pl_examples/basic_examples/backbone_image_classifier.py ${ARGS_DEFAULT} $@ -python pl_examples/basic_examples/autoencoder.py ${ARGS_DEFAULT} $@ +python pl_examples/basic_examples/backbone_image_classifier.py $@ + +python pl_examples/basic_examples/autoencoder.py $@ From a43636ee3f1092d8e98980ddd7bf2a46454ae17b Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Fri, 26 Mar 2021 14:42:13 +0100 Subject: [PATCH 06/10] examples --- dockers/nvidia/Dockerfile | 3 +++ dockers/release/Dockerfile | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/dockers/nvidia/Dockerfile b/dockers/nvidia/Dockerfile index 2d4782073b147..064695f7c1a8e 100644 --- a/dockers/nvidia/Dockerfile +++ b/dockers/nvidia/Dockerfile @@ -57,6 +57,9 @@ RUN apt-get update -qq && \ COPY ./ ./pytorch-lightning/ RUN \ + mv pytorch-lightning/notebooks . && \ + mv pytorch-lightning/pl_examples . && \ + # replace by specific version if asked if [ ! -z "$LIGHTNING_VERSION" ] ; then \ rm -rf pytorch-lightning ; \ wget https://github.com/PyTorchLightning/pytorch-lightning/archive/${LIGHTNING_VERSION}.zip --progress=bar:force:noscroll ; \ diff --git a/dockers/release/Dockerfile b/dockers/release/Dockerfile index 09c136d01fa9c..4f8a1c82a41e1 100644 --- a/dockers/release/Dockerfile +++ b/dockers/release/Dockerfile @@ -27,7 +27,7 @@ COPY ./ ./pytorch-lightning/ RUN \ mv pytorch-lightning/notebooks . && \ mv pytorch-lightning/pl_examples . && \ - #conda install "pip>20.1" && \ + # replace by specific version if asked if [ ! -z "$LIGHTNING_VERSION" ] ; then \ rm -rf pytorch-lightning ; \ wget https://github.com/PyTorchLightning/pytorch-lightning/archive/${LIGHTNING_VERSION}.zip --progress=bar:force:noscroll ; \ From e58ebd1c7c18c9e5b47d6ec8ec9a40610d9609da Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Fri, 26 Mar 2021 14:57:34 +0100 Subject: [PATCH 07/10] examples --- azure-pipelines.yml | 4 ++-- pl_examples/run_ddp-examples.sh | 15 ++++++++------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 76c8b01c43f19..ad203374979e6 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -117,8 +117,8 @@ jobs: set -e python -m pytest pl_examples -v --maxfail=2 --durations=0 pip install . --user --quiet - bash pl_examples/run_examples-args.sh --gpus 1 --max_epochs 1 --batch_size 32 - bash pl_examples/run_ddp-examples.sh + bash pl_examples/run_examples-args.sh --gpus 1 --max_epochs 1 --batch_size 64 --limit_train_batches 5 --limit_val_batches 3 + bash pl_examples/run_ddp-examples.sh --max_epochs 1 --batch_size 32 --limit_train_batches 2 --limit_val_batches 2 # cd pl_examples/basic_examples # bash submit_ddp_job.sh # bash submit_ddp2_job.sh diff --git a/pl_examples/run_ddp-examples.sh b/pl_examples/run_ddp-examples.sh index f0c7695e766f2..6cc36364e397d 100644 --- a/pl_examples/run_ddp-examples.sh +++ b/pl_examples/run_ddp-examples.sh @@ -1,12 +1,13 @@ #!/bin/bash -ARGS_DEFAULT=" --default_root_dir %(tmpdir)s --max_epochs 1 --batch_size 32 --limit_train_batches 2 --limit_val_batches 2" ARGS_EXTRA_DDP=" --gpus 2 --accelerator ddp" ARGS_EXTRA_AMP=" --precision 16" -python pl_examples/basic_examples/simple_image_classifier.py ${ARGS_DEFAULT} ${ARGS_EXTRA_DDP} -python pl_examples/basic_examples/simple_image_classifier.py ${ARGS_DEFAULT} ${ARGS_EXTRA_DDP} ${ARGS_EXTRA_AMP} -python pl_examples/basic_examples/backbone_image_classifier.py ${ARGS_DEFAULT} ${ARGS_EXTRA_DDP} -python pl_examples/basic_examples/backbone_image_classifier.py ${ARGS_DEFAULT} ${ARGS_EXTRA_DDP} ${ARGS_EXTRA_AMP} -python pl_examples/basic_examples/autoencoder.py ${ARGS_DEFAULT} ${ARGS_EXTRA_DDP} -python pl_examples/basic_examples/autoencoder.py ${ARGS_DEFAULT} ${ARGS_EXTRA_DDP} ${ARGS_EXTRA_AMP} +python pl_examples/basic_examples/simple_image_classifier.py $@ ${ARGS_EXTRA_DDP} +python pl_examples/basic_examples/simple_image_classifier.py $@ ${ARGS_EXTRA_DDP} ${ARGS_EXTRA_AMP} + +python pl_examples/basic_examples/backbone_image_classifier.py $@ ${ARGS_EXTRA_DDP} +python pl_examples/basic_examples/backbone_image_classifier.py $@ ${ARGS_EXTRA_DDP} ${ARGS_EXTRA_AMP} + +python pl_examples/basic_examples/autoencoder.py $@ ${ARGS_EXTRA_DDP} +python pl_examples/basic_examples/autoencoder.py $@ ${ARGS_EXTRA_DDP} ${ARGS_EXTRA_AMP} From fc84c19820a1b6e073cca5d6bd2cfcd0664327b3 Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Fri, 26 Mar 2021 16:01:36 +0100 Subject: [PATCH 08/10] examples --- dockers/nvidia/Dockerfile | 3 ++- dockers/release/Dockerfile | 3 ++- pl_examples/run_examples-args.sh | 12 +++++++++--- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/dockers/nvidia/Dockerfile b/dockers/nvidia/Dockerfile index 064695f7c1a8e..2d52d317f59b8 100644 --- a/dockers/nvidia/Dockerfile +++ b/dockers/nvidia/Dockerfile @@ -54,9 +54,10 @@ RUN apt-get update -qq && \ rm get-pip.py && \ pip --version -COPY ./ ./pytorch-lightning/ +COPY ./ /home/pytorch-lightning/ RUN \ + cd /home && \ mv pytorch-lightning/notebooks . && \ mv pytorch-lightning/pl_examples . && \ # replace by specific version if asked diff --git a/dockers/release/Dockerfile b/dockers/release/Dockerfile index 4f8a1c82a41e1..5cd53385f660b 100644 --- a/dockers/release/Dockerfile +++ b/dockers/release/Dockerfile @@ -21,10 +21,11 @@ MAINTAINER PyTorchLightning ARG LIGHTNING_VERSION="" -COPY ./ ./pytorch-lightning/ +COPY ./ /home/pytorch-lightning/ # install dependencies RUN \ + cd /home && \ mv pytorch-lightning/notebooks . && \ mv pytorch-lightning/pl_examples . && \ # replace by specific version if asked diff --git a/pl_examples/run_examples-args.sh b/pl_examples/run_examples-args.sh index 5e0dfa3a2b602..352869538cb18 100644 --- a/pl_examples/run_examples-args.sh +++ b/pl_examples/run_examples-args.sh @@ -2,8 +2,14 @@ echo $@ -python pl_examples/basic_examples/simple_image_classifier.py $@ +full_path=$(realpath $0) +echo $full_path -python pl_examples/basic_examples/backbone_image_classifier.py $@ +dir_path=$(dirname $full_path) +echo $dir_path -python pl_examples/basic_examples/autoencoder.py $@ +python ${dir_path}/basic_examples/simple_image_classifier.py $@ + +python ${dir_path}/basic_examples/backbone_image_classifier.py $@ + +python ${dir_path}/basic_examples/autoencoder.py $@ From 53279ec4011375b776d39800b577647172406773 Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Fri, 26 Mar 2021 16:41:46 +0100 Subject: [PATCH 09/10] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Carlos MocholĂ­ --- dockers/nvidia/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dockers/nvidia/Dockerfile b/dockers/nvidia/Dockerfile index 2d52d317f59b8..b3a18fcb29c3e 100644 --- a/dockers/nvidia/Dockerfile +++ b/dockers/nvidia/Dockerfile @@ -69,7 +69,7 @@ RUN \ rm *.zip ; \ fi && \ -# Instalations +# Installations python -c "fname = './pytorch-lightning/requirements/extra.txt' ; lines = [line for line in open(fname).readlines() if not line.startswith('horovod')] ; open(fname, 'w').writelines(lines)" && \ pip install -r ./pytorch-lightning/requirements/extra.txt -U --no-cache-dir && \ pip install -r ./pytorch-lightning/requirements/examples.txt -U --no-cache-dir && \ From f55b4ddf14f9b3575222e0ddd9a11216620579d4 Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Fri, 26 Mar 2021 18:11:21 +0100 Subject: [PATCH 10/10] Apply suggestions from code review Co-authored-by: Sean Naren --- dockers/nvidia/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dockers/nvidia/Dockerfile b/dockers/nvidia/Dockerfile index b3a18fcb29c3e..ad1169c4450dd 100644 --- a/dockers/nvidia/Dockerfile +++ b/dockers/nvidia/Dockerfile @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -FROM nvcr.io/nvidia/cuda:11.2.2-runtime-ubuntu20.04 +FROM nvcr.io/nvidia/cuda:11.1.1-runtime-ubuntu20.04 MAINTAINER PyTorchLightning