From 29bdc12464db7dd252d501759b4d10ac711585f1 Mon Sep 17 00:00:00 2001 From: Indrayana Rustandi Date: Tue, 29 Sep 2020 06:46:48 -0400 Subject: [PATCH 01/26] add MNIST DALI example, update README.md --- pl_examples/basic_examples/README.md | 9 +- pl_examples/basic_examples/mnist_dali.py | 202 +++++++++++++++++++++++ 2 files changed, 210 insertions(+), 1 deletion(-) create mode 100644 pl_examples/basic_examples/mnist_dali.py diff --git a/pl_examples/basic_examples/README.md b/pl_examples/basic_examples/README.md index 4dcf06a74bf92..4f168240dbebf 100644 --- a/pl_examples/basic_examples/README.md +++ b/pl_examples/basic_examples/README.md @@ -14,7 +14,14 @@ python mnist.py python mnist.py --gpus 2 --distributed_backend 'dp' ``` ---- +--- +#### MNIST with DALI +The MNIST example above using [NVIDIA DALI](https://developer.nvidia.com/DALI). +```bash +python mnist_dali.py +``` + +--- #### Image classifier Generic image classifier with an arbitrary backbone (ie: a simple system) ```bash diff --git a/pl_examples/basic_examples/mnist_dali.py b/pl_examples/basic_examples/mnist_dali.py new file mode 100644 index 0000000000000..d1b326e3a0713 --- /dev/null +++ b/pl_examples/basic_examples/mnist_dali.py @@ -0,0 +1,202 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from argparse import ArgumentParser + +import numpy as np +from random import shuffle + +import torch +import pytorch_lightning as pl +from torch.nn import functional as F +from torch.utils.data import DataLoader, random_split + +try: + from torchvision.datasets.mnist import MNIST + from torchvision import transforms +except Exception as e: + from tests.base.datasets import MNIST + +from nvidia.dali.pipeline import Pipeline +import nvidia.dali.ops as ops +import nvidia.dali.types as types +from nvidia.dali.plugin.pytorch import DALIClassificationIterator + + +class ExternalMNISTInputIterator(object): + def __init__(self, mnist_ds, batch_size): + self.batch_size = batch_size + self.mnist_ds = mnist_ds + self.indices = list(range(len(self.mnist_ds))) + shuffle(self.indices) + + def __iter__(self): + self.i = 0 + self.n = len(self.mnist_ds) + return self + + def __next__(self): + batch = [] + labels = [] + for _ in range(self.batch_size): + index = self.indices[self.i] + img, label = self.mnist_ds[index] + batch.append(img.numpy()) + labels.append(np.array([label], dtype = np.uint8)) + self.i = (self.i + 1) % self.n + return (batch, labels) + + +class ExternalSourcePipeline(Pipeline): + def __init__(self, batch_size, eii, num_threads, device_id): + super(ExternalSourcePipeline, self).__init__(batch_size, + num_threads, + device_id, + seed=12) + self.source = ops.ExternalSource(source = eii, num_outputs = 2) + + def define_graph(self): + images, labels = self.source() + return images, labels + + +# we extend DALIClassificationIterator with the __len__() function so that we can call len() on it +class DALIClassificationLoader(DALIClassificationIterator): + def __init__( + self, + pipelines, + size=-1, + reader_name=None, + auto_reset=False, + fill_last_batch=True, + dynamic_shape=False, + last_batch_padded=False, + ): + super().__init__(pipelines, + size, + reader_name, + auto_reset, + fill_last_batch, + dynamic_shape, + last_batch_padded) + + def __len__(self): + batch_count = self._size // (self._num_gpus * self.batch_size) + last_batch = 1 if self._fill_last_batch else 0 + return batch_count + last_batch + + +class LitClassifier(pl.LightningModule): + def __init__(self, hidden_dim=128, learning_rate=1e-3): + super().__init__() + self.save_hyperparameters() + + self.l1 = torch.nn.Linear(28 * 28, self.hparams.hidden_dim) + self.l2 = torch.nn.Linear(self.hparams.hidden_dim, 10) + + def forward(self, x): + x = x.view(x.size(0), -1) + x = torch.relu(self.l1(x)) + x = torch.relu(self.l2(x)) + return x + + def split_batch(self, batch): + return batch[0]['data'], batch[0]['label'].squeeze().long() + + def training_step(self, batch, batch_idx): + x, y = self.split_batch(batch) + y_hat = self(x) + loss = F.cross_entropy(y_hat, y) + return loss + + def validation_step(self, batch, batch_idx): + x, y = self.split_batch(batch) + y_hat = self(x) + loss = F.cross_entropy(y_hat, y) + result = pl.EvalResult(checkpoint_on=loss) + result.log('valid_loss', loss) + return result + + def test_step(self, batch, batch_idx): + x, y = self.split_batch(batch) + y_hat = self(x) + loss = F.cross_entropy(y_hat, y) + result = pl.EvalResult(checkpoint_on=loss) + result.log('test_loss', loss) + return result + + def configure_optimizers(self): + return torch.optim.Adam(self.parameters(), lr=self.hparams.learning_rate) + + @staticmethod + def add_model_specific_args(parent_parser): + parser = ArgumentParser(parents=[parent_parser], add_help=False) + parser.add_argument('--hidden_dim', type=int, default=128) + parser.add_argument('--learning_rate', type=float, default=0.0001) + return parser + + +def cli_main(): + pl.seed_everything(1234) + + # ------------ + # args + # ------------ + parser = ArgumentParser() + parser.add_argument('--batch_size', default=32, type=int) + parser = pl.Trainer.add_argparse_args(parser) + parser = LitClassifier.add_model_specific_args(parser) + args = parser.parse_args() + + # ------------ + # data + # ------------ + dataset = MNIST('', train=True, download=True, transform=transforms.ToTensor()) + mnist_test = MNIST('', train=False, download=True, transform=transforms.ToTensor()) + mnist_train, mnist_val = random_split(dataset, [55000, 5000]) + + eii_train = ExternalMNISTInputIterator(mnist_train, args.batch_size) + eii_val = ExternalMNISTInputIterator(mnist_val, args.batch_size) + eii_test = ExternalMNISTInputIterator(mnist_test, args.batch_size) + + pipe_train = ExternalSourcePipeline(batch_size=args.batch_size, eii=eii_train, num_threads=2, device_id=0) + pipe_train.build() + train_loader = DALIClassificationLoader(pipe_train, size=len(mnist_train), auto_reset=True, fill_last_batch=False) + + pipe_val = ExternalSourcePipeline(batch_size=args.batch_size, eii=eii_val, num_threads=2, device_id=0) + pipe_val.build() + val_loader = DALIClassificationLoader(pipe_val, size=len(mnist_val), auto_reset=True, fill_last_batch=False) + + pipe_test = ExternalSourcePipeline(batch_size=args.batch_size, eii=eii_test, num_threads=2, device_id=0) + pipe_test.build() + test_loader = DALIClassificationLoader(pipe_test, size=len(mnist_test), auto_reset=True, fill_last_batch=False) + + # ------------ + # model + # ------------ + model = LitClassifier(args.hidden_dim, args.learning_rate) + + # ------------ + # training + # ------------ + trainer = pl.Trainer.from_argparse_args(args) + trainer.fit(model, train_loader, val_loader) + + # ------------ + # testing + # ------------ + trainer.test(test_dataloaders=test_loader) + + +if __name__ == '__main__': + cli_main() From 743afb7b96da17a8b940b8600373b62b552281e4 Mon Sep 17 00:00:00 2001 From: Indrayana Rustandi Date: Tue, 29 Sep 2020 08:42:36 -0400 Subject: [PATCH 02/26] Fix PEP8 warnings --- pl_examples/basic_examples/mnist_dali.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/pl_examples/basic_examples/mnist_dali.py b/pl_examples/basic_examples/mnist_dali.py index d1b326e3a0713..7ce184d30cbbb 100644 --- a/pl_examples/basic_examples/mnist_dali.py +++ b/pl_examples/basic_examples/mnist_dali.py @@ -52,7 +52,7 @@ def __next__(self): index = self.indices[self.i] img, label = self.mnist_ds[index] batch.append(img.numpy()) - labels.append(np.array([label], dtype = np.uint8)) + labels.append(np.array([label], dtype=np.uint8)) self.i = (self.i + 1) % self.n return (batch, labels) @@ -60,10 +60,10 @@ def __next__(self): class ExternalSourcePipeline(Pipeline): def __init__(self, batch_size, eii, num_threads, device_id): super(ExternalSourcePipeline, self).__init__(batch_size, - num_threads, - device_id, - seed=12) - self.source = ops.ExternalSource(source = eii, num_outputs = 2) + num_threads, + device_id, + seed=12) + self.source = ops.ExternalSource(source=eii, num_outputs=2) def define_graph(self): images, labels = self.source() @@ -80,14 +80,14 @@ def __init__( auto_reset=False, fill_last_batch=True, dynamic_shape=False, - last_batch_padded=False, + last_batch_padded=False, ): - super().__init__(pipelines, - size, - reader_name, - auto_reset, - fill_last_batch, - dynamic_shape, + super().__init__(pipelines, + size, + reader_name, + auto_reset, + fill_last_batch, + dynamic_shape, last_batch_padded) def __len__(self): From cd9c892a9c8f13ec46dbdaec8d1bd91446fe394d Mon Sep 17 00:00:00 2001 From: Indrayana Rustandi Date: Tue, 29 Sep 2020 11:06:28 -0400 Subject: [PATCH 03/26] reformatted using black --- pl_examples/basic_examples/mnist_dali.py | 31 +++++++++--------------- 1 file changed, 11 insertions(+), 20 deletions(-) diff --git a/pl_examples/basic_examples/mnist_dali.py b/pl_examples/basic_examples/mnist_dali.py index 7ce184d30cbbb..a6d740a30f79b 100644 --- a/pl_examples/basic_examples/mnist_dali.py +++ b/pl_examples/basic_examples/mnist_dali.py @@ -59,10 +59,7 @@ def __next__(self): class ExternalSourcePipeline(Pipeline): def __init__(self, batch_size, eii, num_threads, device_id): - super(ExternalSourcePipeline, self).__init__(batch_size, - num_threads, - device_id, - seed=12) + super(ExternalSourcePipeline, self).__init__(batch_size, num_threads, device_id, seed=12) self.source = ops.ExternalSource(source=eii, num_outputs=2) def define_graph(self): @@ -82,13 +79,7 @@ def __init__( dynamic_shape=False, last_batch_padded=False, ): - super().__init__(pipelines, - size, - reader_name, - auto_reset, - fill_last_batch, - dynamic_shape, - last_batch_padded) + super().__init__(pipelines, size, reader_name, auto_reset, fill_last_batch, dynamic_shape, last_batch_padded) def __len__(self): batch_count = self._size // (self._num_gpus * self.batch_size) @@ -111,7 +102,7 @@ def forward(self, x): return x def split_batch(self, batch): - return batch[0]['data'], batch[0]['label'].squeeze().long() + return batch[0]["data"], batch[0]["label"].squeeze().long() def training_step(self, batch, batch_idx): x, y = self.split_batch(batch) @@ -124,7 +115,7 @@ def validation_step(self, batch, batch_idx): y_hat = self(x) loss = F.cross_entropy(y_hat, y) result = pl.EvalResult(checkpoint_on=loss) - result.log('valid_loss', loss) + result.log("valid_loss", loss) return result def test_step(self, batch, batch_idx): @@ -132,7 +123,7 @@ def test_step(self, batch, batch_idx): y_hat = self(x) loss = F.cross_entropy(y_hat, y) result = pl.EvalResult(checkpoint_on=loss) - result.log('test_loss', loss) + result.log("test_loss", loss) return result def configure_optimizers(self): @@ -141,8 +132,8 @@ def configure_optimizers(self): @staticmethod def add_model_specific_args(parent_parser): parser = ArgumentParser(parents=[parent_parser], add_help=False) - parser.add_argument('--hidden_dim', type=int, default=128) - parser.add_argument('--learning_rate', type=float, default=0.0001) + parser.add_argument("--hidden_dim", type=int, default=128) + parser.add_argument("--learning_rate", type=float, default=0.0001) return parser @@ -153,7 +144,7 @@ def cli_main(): # args # ------------ parser = ArgumentParser() - parser.add_argument('--batch_size', default=32, type=int) + parser.add_argument("--batch_size", default=32, type=int) parser = pl.Trainer.add_argparse_args(parser) parser = LitClassifier.add_model_specific_args(parser) args = parser.parse_args() @@ -161,8 +152,8 @@ def cli_main(): # ------------ # data # ------------ - dataset = MNIST('', train=True, download=True, transform=transforms.ToTensor()) - mnist_test = MNIST('', train=False, download=True, transform=transforms.ToTensor()) + dataset = MNIST("", train=True, download=True, transform=transforms.ToTensor()) + mnist_test = MNIST("", train=False, download=True, transform=transforms.ToTensor()) mnist_train, mnist_val = random_split(dataset, [55000, 5000]) eii_train = ExternalMNISTInputIterator(mnist_train, args.batch_size) @@ -198,5 +189,5 @@ def cli_main(): trainer.test(test_dataloaders=test_loader) -if __name__ == '__main__': +if __name__ == "__main__": cli_main() From 221fe9b78b70f4bd89e85ab1db7b8a68104203d3 Mon Sep 17 00:00:00 2001 From: Indrayana Rustandi Date: Tue, 29 Sep 2020 11:08:53 -0400 Subject: [PATCH 04/26] add mnist_dali to test_examples.py --- pl_examples/test_examples.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pl_examples/test_examples.py b/pl_examples/test_examples.py index 7fe5d4ed604dc..051264dc2c238 100644 --- a/pl_examples/test_examples.py +++ b/pl_examples/test_examples.py @@ -86,9 +86,10 @@ @pytest.mark.parametrize('cli_args', [cpu_args]) def test_examples_cpu(cli_args): from pl_examples.basic_examples.mnist import cli_main as mnist_cli + from pl_examples.basic_examples.mnist_dali import cli_main as mnist_dali_cli from pl_examples.basic_examples.image_classifier import cli_main as ic_cli from pl_examples.basic_examples.autoencoder import cli_main as ae_cli - for cli_cmd in [mnist_cli, ic_cli, ae_cli]: + for cli_cmd in [mnist_cli, mnist_dali_cli, ic_cli, ae_cli]: with mock.patch("argparse._sys.argv", ["any.py"] + cli_args.strip().split()): cli_cmd() From 4b4ebe9f53b1a28b722df196f9b6e4113f96053c Mon Sep 17 00:00:00 2001 From: Indrayana Rustandi Date: Tue, 29 Sep 2020 12:19:21 -0400 Subject: [PATCH 05/26] Add documentation as docstrings --- pl_examples/basic_examples/mnist_dali.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/pl_examples/basic_examples/mnist_dali.py b/pl_examples/basic_examples/mnist_dali.py index a6d740a30f79b..4a461c0a13366 100644 --- a/pl_examples/basic_examples/mnist_dali.py +++ b/pl_examples/basic_examples/mnist_dali.py @@ -34,6 +34,9 @@ class ExternalMNISTInputIterator(object): + """ + This iterator class wraps torchvision's MNIST dataset and returns the images and labels in batches + """ def __init__(self, mnist_ds, batch_size): self.batch_size = batch_size self.mnist_ds = mnist_ds @@ -58,6 +61,9 @@ def __next__(self): class ExternalSourcePipeline(Pipeline): + """ + This DALI pipeline class just contains the MNIST iterator + """ def __init__(self, batch_size, eii, num_threads, device_id): super(ExternalSourcePipeline, self).__init__(batch_size, num_threads, device_id, seed=12) self.source = ops.ExternalSource(source=eii, num_outputs=2) @@ -67,8 +73,10 @@ def define_graph(self): return images, labels -# we extend DALIClassificationIterator with the __len__() function so that we can call len() on it class DALIClassificationLoader(DALIClassificationIterator): + """ + This class extends DALI's original DALIClassificationIterator with the __len__() function so that we can call len() on it + """ def __init__( self, pipelines, From 4cb797eb542a851404d996d055204df0a1e6f0e7 Mon Sep 17 00:00:00 2001 From: Indrayana Rustandi Date: Wed, 30 Sep 2020 07:35:18 -0400 Subject: [PATCH 06/26] add nvidia-pyindex and nvidia-dali-cuda100 --- requirements/examples.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/requirements/examples.txt b/requirements/examples.txt index c87d10a39346f..0cd14a5f3ccfc 100644 --- a/requirements/examples.txt +++ b/requirements/examples.txt @@ -1,2 +1,4 @@ torchvision>=0.4.1 -gym>=0.17.0 \ No newline at end of file +gym>=0.17.0 +nvidia-pyindex +nvidia-dali-cuda100 From 3b3a5ddf41722a457f9c7ec9ee5d0ebbfbf3f6fb Mon Sep 17 00:00:00 2001 From: Indrayana Rustandi Date: Wed, 30 Sep 2020 07:39:06 -0400 Subject: [PATCH 07/26] replace nvidia-pyindex with --extra-index-url --- requirements/examples.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/examples.txt b/requirements/examples.txt index 0cd14a5f3ccfc..1492feab94f7f 100644 --- a/requirements/examples.txt +++ b/requirements/examples.txt @@ -1,4 +1,4 @@ +--extra-index-url https://developer.download.nvidia.com/compute/redist torchvision>=0.4.1 gym>=0.17.0 -nvidia-pyindex nvidia-dali-cuda100 From 31fa2a9b3321f9cb225db5dc156566b1c0be368b Mon Sep 17 00:00:00 2001 From: Indrayana Rustandi Date: Wed, 30 Sep 2020 09:25:33 -0400 Subject: [PATCH 08/26] mark mnist_dali test as Linux and GPU only --- pl_examples/test_examples.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/pl_examples/test_examples.py b/pl_examples/test_examples.py index 051264dc2c238..110b5cdcb3c87 100644 --- a/pl_examples/test_examples.py +++ b/pl_examples/test_examples.py @@ -86,10 +86,19 @@ @pytest.mark.parametrize('cli_args', [cpu_args]) def test_examples_cpu(cli_args): from pl_examples.basic_examples.mnist import cli_main as mnist_cli - from pl_examples.basic_examples.mnist_dali import cli_main as mnist_dali_cli from pl_examples.basic_examples.image_classifier import cli_main as ic_cli from pl_examples.basic_examples.autoencoder import cli_main as ae_cli - for cli_cmd in [mnist_cli, mnist_dali_cli, ic_cli, ae_cli]: + for cli_cmd in [mnist_cli, ic_cli, ae_cli]: with mock.patch("argparse._sys.argv", ["any.py"] + cli_args.strip().split()): cli_cmd() + + +@pytest.mark.skipif(torch.cuda.device_count() < 1, reason="test requires GPU machine") +@pytest.mark.skipif(platform.system() != 'Linux', reason='Only applies to Linux platform.') +@pytest.mark.parametrize('cli_args', [cpu_args]) +def test_examples_mnist_dali(cli_args): + from pl_examples.basic_examples.mnist_dali import cli_main + + with mock.patch("argparse._sys.argv", ["any.py"] + cli_args.strip().split()): + cli_main() From daa9a4b83e86707395a2696624739a372824abf7 Mon Sep 17 00:00:00 2001 From: Indrayana Rustandi Date: Wed, 30 Sep 2020 09:38:57 -0400 Subject: [PATCH 09/26] adjust CUDA docker and examples.txt, fix import error in test_examples.py --- dockers/base-cuda/Dockerfile | 2 ++ pl_examples/test_examples.py | 1 + requirements/examples.txt | 2 -- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/dockers/base-cuda/Dockerfile b/dockers/base-cuda/Dockerfile index 75dec1906ab3e..95a48307d2351 100644 --- a/dockers/base-cuda/Dockerfile +++ b/dockers/base-cuda/Dockerfile @@ -7,6 +7,7 @@ ARG CUDNN_VERSION=7 ARG CUDA_VERSION=10.1 +ARG CUDA_DALI_VERSION=100 FROM nvidia/cuda:${CUDA_VERSION}-cudnn${CUDNN_VERSION}-devel # FROM nvidia/cuda:${CUDA_VERSION}-devel @@ -94,6 +95,7 @@ RUN \ MAKEFLAGS="-j$(nproc)" ; pip install -r requirements-extra.txt && \ pip install -r requirements-tests.txt --upgrade-strategy only-if-needed --no-cache-dir && \ pip install -r requirements-examples.txt --upgrade-strategy only-if-needed --no-cache-dir && \ + pip install --extra-index-url https://developer.download.nvidia.com/compute/redist nvidia-dali-cuda-${CUDA_DALI_VERSION} \ rm requirements* && \ # Show what we have pip --version && \ diff --git a/pl_examples/test_examples.py b/pl_examples/test_examples.py index 110b5cdcb3c87..72f8084ff6e93 100644 --- a/pl_examples/test_examples.py +++ b/pl_examples/test_examples.py @@ -1,4 +1,5 @@ from unittest import mock +import platform import torch import pytest diff --git a/requirements/examples.txt b/requirements/examples.txt index 1492feab94f7f..6e48778cb222a 100644 --- a/requirements/examples.txt +++ b/requirements/examples.txt @@ -1,4 +1,2 @@ ---extra-index-url https://developer.download.nvidia.com/compute/redist torchvision>=0.4.1 gym>=0.17.0 -nvidia-dali-cuda100 From 780d5189e78eb306ca6c27791884e41ec4e6345b Mon Sep 17 00:00:00 2001 From: Indrayana Rustandi Date: Wed, 30 Sep 2020 10:37:13 -0400 Subject: [PATCH 10/26] adjust the GPU check --- pl_examples/test_examples.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pl_examples/test_examples.py b/pl_examples/test_examples.py index 72f8084ff6e93..ddcedd602f78f 100644 --- a/pl_examples/test_examples.py +++ b/pl_examples/test_examples.py @@ -95,7 +95,7 @@ def test_examples_cpu(cli_args): cli_cmd() -@pytest.mark.skipif(torch.cuda.device_count() < 1, reason="test requires GPU machine") +@pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires GPU machine") @pytest.mark.skipif(platform.system() != 'Linux', reason='Only applies to Linux platform.') @pytest.mark.parametrize('cli_args', [cpu_args]) def test_examples_mnist_dali(cli_args): From 095011118fc74cac2cb1f63792a0aeae0904fa74 Mon Sep 17 00:00:00 2001 From: Indrayana Rustandi Date: Thu, 22 Oct 2020 05:38:25 -0400 Subject: [PATCH 11/26] Exit when DALI is not available --- pl_examples/basic_examples/mnist_dali.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/pl_examples/basic_examples/mnist_dali.py b/pl_examples/basic_examples/mnist_dali.py index 4a461c0a13366..b1390c0993d7b 100644 --- a/pl_examples/basic_examples/mnist_dali.py +++ b/pl_examples/basic_examples/mnist_dali.py @@ -15,6 +15,7 @@ import numpy as np from random import shuffle +import sys import torch import pytorch_lightning as pl @@ -27,10 +28,13 @@ except Exception as e: from tests.base.datasets import MNIST -from nvidia.dali.pipeline import Pipeline -import nvidia.dali.ops as ops -import nvidia.dali.types as types -from nvidia.dali.plugin.pytorch import DALIClassificationIterator +try: + from nvidia.dali.pipeline import Pipeline + import nvidia.dali.ops as ops + import nvidia.dali.types as types + from nvidia.dali.plugin.pytorch import DALIClassificationIterator +except (ImportError, ModuleNotFoundError): + sys.exit('NVIDIA DALI is not available, exiting') class ExternalMNISTInputIterator(object): From d5e5779c5590d0b28739e9ea33c3874e29640b82 Mon Sep 17 00:00:00 2001 From: Indrayana Rustandi Date: Thu, 22 Oct 2020 09:52:00 -0400 Subject: [PATCH 12/26] remove requirements-examples.txt and DALI pip install --- dockers/base-cuda/Dockerfile | 2 -- 1 file changed, 2 deletions(-) diff --git a/dockers/base-cuda/Dockerfile b/dockers/base-cuda/Dockerfile index d303ca53c2fa8..b106836d38acf 100644 --- a/dockers/base-cuda/Dockerfile +++ b/dockers/base-cuda/Dockerfile @@ -110,8 +110,6 @@ RUN \ # Install all requirements MAKEFLAGS="-j$(nproc)" ; pip install -r requirements-extra.txt && \ pip install -r requirements-tests.txt --upgrade-strategy only-if-needed && \ - pip install -r requirements-examples.txt --upgrade-strategy only-if-needed && \ - pip install --extra-index-url https://developer.download.nvidia.com/compute/redist nvidia-dali-cuda-${CUDA_DALI_VERSION} \ rm requirements* RUN \ From 9575a04157f8ae367de21d11fab6c9b4413ba582 Mon Sep 17 00:00:00 2001 From: SeanNaren Date: Wed, 4 Nov 2020 20:07:39 +0000 Subject: [PATCH 13/26] Refactored example, moved to new logging api, added runtime check for test and dali script --- pl_examples/basic_examples/mnist_dali.py | 43 ++++++++++++------------ pl_examples/test_examples.py | 16 +++++++-- 2 files changed, 35 insertions(+), 24 deletions(-) diff --git a/pl_examples/basic_examples/mnist_dali.py b/pl_examples/basic_examples/mnist_dali.py index b1390c0993d7b..2e558872cb225 100644 --- a/pl_examples/basic_examples/mnist_dali.py +++ b/pl_examples/basic_examples/mnist_dali.py @@ -12,15 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. from argparse import ArgumentParser - -import numpy as np from random import shuffle -import sys +import numpy as np import torch -import pytorch_lightning as pl from torch.nn import functional as F -from torch.utils.data import DataLoader, random_split +from torch.utils.data import random_split + +import pytorch_lightning as pl try: from torchvision.datasets.mnist import MNIST @@ -34,13 +33,14 @@ import nvidia.dali.types as types from nvidia.dali.plugin.pytorch import DALIClassificationIterator except (ImportError, ModuleNotFoundError): - sys.exit('NVIDIA DALI is not available, exiting') + raise RuntimeError('NVIDIA DALI is not available') class ExternalMNISTInputIterator(object): """ This iterator class wraps torchvision's MNIST dataset and returns the images and labels in batches """ + def __init__(self, mnist_ds, batch_size): self.batch_size = batch_size self.mnist_ds = mnist_ds @@ -68,6 +68,7 @@ class ExternalSourcePipeline(Pipeline): """ This DALI pipeline class just contains the MNIST iterator """ + def __init__(self, batch_size, eii, num_threads, device_id): super(ExternalSourcePipeline, self).__init__(batch_size, num_threads, device_id, seed=12) self.source = ops.ExternalSource(source=eii, num_outputs=2) @@ -81,15 +82,16 @@ class DALIClassificationLoader(DALIClassificationIterator): """ This class extends DALI's original DALIClassificationIterator with the __len__() function so that we can call len() on it """ + def __init__( - self, - pipelines, - size=-1, - reader_name=None, - auto_reset=False, - fill_last_batch=True, - dynamic_shape=False, - last_batch_padded=False, + self, + pipelines, + size=-1, + reader_name=None, + auto_reset=False, + fill_last_batch=True, + dynamic_shape=False, + last_batch_padded=False, ): super().__init__(pipelines, size, reader_name, auto_reset, fill_last_batch, dynamic_shape, last_batch_padded) @@ -104,8 +106,8 @@ def __init__(self, hidden_dim=128, learning_rate=1e-3): super().__init__() self.save_hyperparameters() - self.l1 = torch.nn.Linear(28 * 28, self.hparams.hidden_dim) - self.l2 = torch.nn.Linear(self.hparams.hidden_dim, 10) + self.l1 = torch.nn.Linear(28 * 28, hidden_dim) + self.l2 = torch.nn.Linear(hidden_dim, 10) def forward(self, x): x = x.view(x.size(0), -1) @@ -120,23 +122,20 @@ def training_step(self, batch, batch_idx): x, y = self.split_batch(batch) y_hat = self(x) loss = F.cross_entropy(y_hat, y) + self.log('loss', loss) return loss def validation_step(self, batch, batch_idx): x, y = self.split_batch(batch) y_hat = self(x) loss = F.cross_entropy(y_hat, y) - result = pl.EvalResult(checkpoint_on=loss) - result.log("valid_loss", loss) - return result + self.log("valid_loss", loss) def test_step(self, batch, batch_idx): x, y = self.split_batch(batch) y_hat = self(x) loss = F.cross_entropy(y_hat, y) - result = pl.EvalResult(checkpoint_on=loss) - result.log("test_loss", loss) - return result + self.log("test_loss", loss) def configure_optimizers(self): return torch.optim.Adam(self.parameters(), lr=self.hparams.learning_rate) diff --git a/pl_examples/test_examples.py b/pl_examples/test_examples.py index ddcedd602f78f..f2366c721002c 100644 --- a/pl_examples/test_examples.py +++ b/pl_examples/test_examples.py @@ -1,7 +1,18 @@ -from unittest import mock import platform -import torch +from unittest import mock + import pytest +import torch + +try: + from nvidia.dali.pipeline import Pipeline + import nvidia.dali.ops as ops + import nvidia.dali.types as types + from nvidia.dali.plugin.pytorch import DALIClassificationIterator + + DALI_AVAILABLE = True +except (ImportError, ModuleNotFoundError): + DALI_AVAILABLE = False dp_16_args = """ --max_epochs 1 \ @@ -95,6 +106,7 @@ def test_examples_cpu(cli_args): cli_cmd() +@pytest.mark.skipif(not DALI_AVAILABLE, reason="Nvidia DALI required") @pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires GPU machine") @pytest.mark.skipif(platform.system() != 'Linux', reason='Only applies to Linux platform.') @pytest.mark.parametrize('cli_args', [cpu_args]) From 8d911286b270b303f99eca004e225e876f470b90 Mon Sep 17 00:00:00 2001 From: SeanNaren Date: Wed, 4 Nov 2020 20:15:03 +0000 Subject: [PATCH 14/26] Patch to reflect the mnist example module --- pl_examples/basic_examples/mnist_dali.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/pl_examples/basic_examples/mnist_dali.py b/pl_examples/basic_examples/mnist_dali.py index 2e558872cb225..823005fda5ce2 100644 --- a/pl_examples/basic_examples/mnist_dali.py +++ b/pl_examples/basic_examples/mnist_dali.py @@ -106,8 +106,8 @@ def __init__(self, hidden_dim=128, learning_rate=1e-3): super().__init__() self.save_hyperparameters() - self.l1 = torch.nn.Linear(28 * 28, hidden_dim) - self.l2 = torch.nn.Linear(hidden_dim, 10) + self.l1 = torch.nn.Linear(28 * 28, self.hparams.hidden_dim) + self.l2 = torch.nn.Linear(self.hparams.hidden_dim, 10) def forward(self, x): x = x.view(x.size(0), -1) @@ -122,20 +122,19 @@ def training_step(self, batch, batch_idx): x, y = self.split_batch(batch) y_hat = self(x) loss = F.cross_entropy(y_hat, y) - self.log('loss', loss) return loss def validation_step(self, batch, batch_idx): x, y = self.split_batch(batch) y_hat = self(x) loss = F.cross_entropy(y_hat, y) - self.log("valid_loss", loss) + self.log('valid_loss', loss) def test_step(self, batch, batch_idx): x, y = self.split_batch(batch) y_hat = self(x) loss = F.cross_entropy(y_hat, y) - self.log("test_loss", loss) + self.log('test_loss', loss) def configure_optimizers(self): return torch.optim.Adam(self.parameters(), lr=self.hparams.learning_rate) @@ -143,8 +142,8 @@ def configure_optimizers(self): @staticmethod def add_model_specific_args(parent_parser): parser = ArgumentParser(parents=[parent_parser], add_help=False) - parser.add_argument("--hidden_dim", type=int, default=128) - parser.add_argument("--learning_rate", type=float, default=0.0001) + parser.add_argument('--hidden_dim', type=int, default=128) + parser.add_argument('--learning_rate', type=float, default=0.0001) return parser @@ -155,7 +154,7 @@ def cli_main(): # args # ------------ parser = ArgumentParser() - parser.add_argument("--batch_size", default=32, type=int) + parser.add_argument('--batch_size', default=32, type=int) parser = pl.Trainer.add_argparse_args(parser) parser = LitClassifier.add_model_specific_args(parser) args = parser.parse_args() @@ -163,8 +162,8 @@ def cli_main(): # ------------ # data # ------------ - dataset = MNIST("", train=True, download=True, transform=transforms.ToTensor()) - mnist_test = MNIST("", train=False, download=True, transform=transforms.ToTensor()) + dataset = MNIST('', train=True, download=True, transform=transforms.ToTensor()) + mnist_test = MNIST('', train=False, download=True, transform=transforms.ToTensor()) mnist_train, mnist_val = random_split(dataset, [55000, 5000]) eii_train = ExternalMNISTInputIterator(mnist_train, args.batch_size) From 3c6998dab68dc80ebc320bb7099b6a6fc2496c0e Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Wed, 4 Nov 2020 21:48:27 +0100 Subject: [PATCH 15/26] add req. --- requirements/examples.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements/examples.txt b/requirements/examples.txt index 0afa62f9ffa95..c0e068def10b7 100644 --- a/requirements/examples.txt +++ b/requirements/examples.txt @@ -1,2 +1,3 @@ torchvision>=0.4.1,<0.9.0 gym>=0.17.0 +nvidia-dali --extra-index-url https://developer.download.nvidia.com/compute/redist \ No newline at end of file From d256e6dd2ea344490d7ea688f28497c003bf6931 Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Wed, 4 Nov 2020 21:52:13 +0100 Subject: [PATCH 16/26] Apply suggestions from code review --- pl_examples/basic_examples/mnist_dali.py | 4 ++-- pl_examples/test_examples.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pl_examples/basic_examples/mnist_dali.py b/pl_examples/basic_examples/mnist_dali.py index 823005fda5ce2..0138e9044be48 100644 --- a/pl_examples/basic_examples/mnist_dali.py +++ b/pl_examples/basic_examples/mnist_dali.py @@ -24,7 +24,7 @@ try: from torchvision.datasets.mnist import MNIST from torchvision import transforms -except Exception as e: +except Exception: from tests.base.datasets import MNIST try: @@ -33,7 +33,7 @@ import nvidia.dali.types as types from nvidia.dali.plugin.pytorch import DALIClassificationIterator except (ImportError, ModuleNotFoundError): - raise RuntimeError('NVIDIA DALI is not available') + raise ImportError('NVIDIA DALI is not available') class ExternalMNISTInputIterator(object): diff --git a/pl_examples/test_examples.py b/pl_examples/test_examples.py index f2366c721002c..c0b4bff23373c 100644 --- a/pl_examples/test_examples.py +++ b/pl_examples/test_examples.py @@ -9,10 +9,10 @@ import nvidia.dali.ops as ops import nvidia.dali.types as types from nvidia.dali.plugin.pytorch import DALIClassificationIterator - - DALI_AVAILABLE = True except (ImportError, ModuleNotFoundError): DALI_AVAILABLE = False +else: + DALI_AVAILABLE = True dp_16_args = """ --max_epochs 1 \ From 832b5e02c2c32c457d6542990df05590849b6c89 Mon Sep 17 00:00:00 2001 From: SeanNaren Date: Wed, 4 Nov 2020 21:02:06 +0000 Subject: [PATCH 17/26] Removed requirement as it breaks CPU install, added note in README to install DALI --- pl_examples/basic_examples/README.md | 1 + requirements/examples.txt | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/pl_examples/basic_examples/README.md b/pl_examples/basic_examples/README.md index 4f168240dbebf..18ae204396290 100644 --- a/pl_examples/basic_examples/README.md +++ b/pl_examples/basic_examples/README.md @@ -17,6 +17,7 @@ python mnist.py --gpus 2 --distributed_backend 'dp' --- #### MNIST with DALI The MNIST example above using [NVIDIA DALI](https://developer.nvidia.com/DALI). +Requires NVIDIA DALI to be installed based on your CUDA version, see [here](https://docs.nvidia.com/deeplearning/dali/user-guide/docs/installation.html). ```bash python mnist_dali.py ``` diff --git a/requirements/examples.txt b/requirements/examples.txt index c0e068def10b7..0afa62f9ffa95 100644 --- a/requirements/examples.txt +++ b/requirements/examples.txt @@ -1,3 +1,2 @@ torchvision>=0.4.1,<0.9.0 gym>=0.17.0 -nvidia-dali --extra-index-url https://developer.download.nvidia.com/compute/redist \ No newline at end of file From 7751cbd667f283defe7f6c2cc938aad790b49393 Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Thu, 5 Nov 2020 16:59:06 +0100 Subject: [PATCH 18/26] add DALI to Drone --- .drone.yml | 1 + dockers/base-cuda/Dockerfile | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/.drone.yml b/.drone.yml index 5e6c08f7a8256..bf6fb130dce0b 100644 --- a/.drone.yml +++ b/.drone.yml @@ -32,6 +32,7 @@ steps: - pip --version - nvidia-smi - pip install -r ./requirements/devel.txt --upgrade-strategy only-if-needed -v --no-cache-dir + - pip install --extra-index-url https://developer.download.nvidia.com/compute/redist nvidia-dali-cuda100 - pip list - coverage run --source pytorch_lightning -m pytest pytorch_lightning tests -v --color=yes --durations=25 # --flake8 - python -m pytest benchmarks pl_examples -v --color=yes --maxfail=2 --durations=0 # --flake8 diff --git a/dockers/base-cuda/Dockerfile b/dockers/base-cuda/Dockerfile index f2db952efde64..e22b5a862a7d7 100644 --- a/dockers/base-cuda/Dockerfile +++ b/dockers/base-cuda/Dockerfile @@ -21,7 +21,6 @@ ARG CUDNN_VERSION=8 ARG CUDA_VERSION=10.2 -ARG CUDA_DALI_VERSION=100 # FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu20.04 FROM nvidia/cuda:${CUDA_VERSION}-cudnn${CUDNN_VERSION}-devel-ubuntu18.04 From 6472e7fd94d93104de315886a07f8af0b5c01990 Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Thu, 5 Nov 2020 17:40:40 +0100 Subject: [PATCH 19/26] test examples --- pl_examples/test_examples.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/pl_examples/test_examples.py b/pl_examples/test_examples.py index c0b4bff23373c..58b49006447aa 100644 --- a/pl_examples/test_examples.py +++ b/pl_examples/test_examples.py @@ -5,9 +5,9 @@ import torch try: - from nvidia.dali.pipeline import Pipeline import nvidia.dali.ops as ops import nvidia.dali.types as types + from nvidia.dali.pipeline import Pipeline from nvidia.dali.plugin.pytorch import DALIClassificationIterator except (ImportError, ModuleNotFoundError): DALI_AVAILABLE = False @@ -40,7 +40,7 @@ --precision 16 \ """ - +# TODO # @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") # @pytest.mark.parametrize('cli_args', [dp_16_args]) # def test_examples_dp_mnist(cli_args): @@ -50,6 +50,7 @@ # cli_main() +# TODO # @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") # @pytest.mark.parametrize('cli_args', [dp_16_args]) # def test_examples_dp_image_classifier(cli_args): @@ -57,8 +58,9 @@ # # with mock.patch("argparse._sys.argv", ["any.py"] + cli_args.strip().split()): # cli_main() -# -# + + +# TODO # @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") # @pytest.mark.parametrize('cli_args', [dp_16_args]) # def test_examples_dp_autoencoder(cli_args): @@ -68,6 +70,7 @@ # cli_main() +# TODO # @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") # @pytest.mark.parametrize('cli_args', [ddp_args]) # def test_examples_ddp_mnist(cli_args): @@ -75,8 +78,9 @@ # # with mock.patch("argparse._sys.argv", ["any.py"] + cli_args.strip().split()): # cli_main() -# -# + + +# TODO # @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") # @pytest.mark.parametrize('cli_args', [ddp_args]) # def test_examples_ddp_image_classifier(cli_args): @@ -84,8 +88,9 @@ # # with mock.patch("argparse._sys.argv", ["any.py"] + cli_args.strip().split()): # cli_main() -# -# + + +# TODO # @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") # @pytest.mark.parametrize('cli_args', [ddp_args]) # def test_examples_ddp_autoencoder(cli_args): From abb1d6b1e16facb5e89acc0a232ae662ce6cd2f6 Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Thu, 5 Nov 2020 18:36:52 +0100 Subject: [PATCH 20/26] Apply suggestions from code review --- pl_examples/basic_examples/mnist_dali.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pl_examples/basic_examples/mnist_dali.py b/pl_examples/basic_examples/mnist_dali.py index 0138e9044be48..1e8f05b18de98 100644 --- a/pl_examples/basic_examples/mnist_dali.py +++ b/pl_examples/basic_examples/mnist_dali.py @@ -13,6 +13,7 @@ # limitations under the License. from argparse import ArgumentParser from random import shuffle +from warnings import warn import numpy as np import torch @@ -28,12 +29,12 @@ from tests.base.datasets import MNIST try: - from nvidia.dali.pipeline import Pipeline import nvidia.dali.ops as ops import nvidia.dali.types as types + from nvidia.dali.pipeline import Pipeline from nvidia.dali.plugin.pytorch import DALIClassificationIterator except (ImportError, ModuleNotFoundError): - raise ImportError('NVIDIA DALI is not available') + warn('NVIDIA DALI is not available') class ExternalMNISTInputIterator(object): From a6223aaa085cd6f04e42825c95d7833c3569541a Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Thu, 5 Nov 2020 18:46:41 +0100 Subject: [PATCH 21/26] imports --- pl_examples/basic_examples/mnist_dali.py | 1 + pl_examples/test_examples.py | 5 +---- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/pl_examples/basic_examples/mnist_dali.py b/pl_examples/basic_examples/mnist_dali.py index 1e8f05b18de98..b14ffa15550ed 100644 --- a/pl_examples/basic_examples/mnist_dali.py +++ b/pl_examples/basic_examples/mnist_dali.py @@ -35,6 +35,7 @@ from nvidia.dali.plugin.pytorch import DALIClassificationIterator except (ImportError, ModuleNotFoundError): warn('NVIDIA DALI is not available') + ops, types, Pipeline, DALIClassificationIterator = ..., ..., ..., ... class ExternalMNISTInputIterator(object): diff --git a/pl_examples/test_examples.py b/pl_examples/test_examples.py index 58b49006447aa..60f10a637e583 100644 --- a/pl_examples/test_examples.py +++ b/pl_examples/test_examples.py @@ -5,10 +5,7 @@ import torch try: - import nvidia.dali.ops as ops - import nvidia.dali.types as types - from nvidia.dali.pipeline import Pipeline - from nvidia.dali.plugin.pytorch import DALIClassificationIterator + from nvidia.dali import ops, types, pipeline, plugin except (ImportError, ModuleNotFoundError): DALI_AVAILABLE = False else: From 3d3e75f1ee8c94fdd183720eb7088ad548442935 Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Thu, 5 Nov 2020 18:55:44 +0100 Subject: [PATCH 22/26] ABC --- pl_examples/basic_examples/mnist_dali.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pl_examples/basic_examples/mnist_dali.py b/pl_examples/basic_examples/mnist_dali.py index b14ffa15550ed..b127fff9c25f7 100644 --- a/pl_examples/basic_examples/mnist_dali.py +++ b/pl_examples/basic_examples/mnist_dali.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from abc import ABC from argparse import ArgumentParser from random import shuffle from warnings import warn @@ -35,7 +36,7 @@ from nvidia.dali.plugin.pytorch import DALIClassificationIterator except (ImportError, ModuleNotFoundError): warn('NVIDIA DALI is not available') - ops, types, Pipeline, DALIClassificationIterator = ..., ..., ..., ... + ops, types, Pipeline, DALIClassificationIterator = ..., ..., ABC, ABC class ExternalMNISTInputIterator(object): From 61da5e168c7534ec2a97044d630496c08f62aa5c Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Thu, 5 Nov 2020 19:33:10 +0100 Subject: [PATCH 23/26] cuda --- .drone.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.drone.yml b/.drone.yml index bf6fb130dce0b..3d9b4db343079 100644 --- a/.drone.yml +++ b/.drone.yml @@ -32,7 +32,7 @@ steps: - pip --version - nvidia-smi - pip install -r ./requirements/devel.txt --upgrade-strategy only-if-needed -v --no-cache-dir - - pip install --extra-index-url https://developer.download.nvidia.com/compute/redist nvidia-dali-cuda100 + - pip install --extra-index-url https://developer.download.nvidia.com/compute/redist nvidia-dali-cuda${CUDA_VERSION/./} --upgrade-strategy only-if-needed - pip list - coverage run --source pytorch_lightning -m pytest pytorch_lightning tests -v --color=yes --durations=25 # --flake8 - python -m pytest benchmarks pl_examples -v --color=yes --maxfail=2 --durations=0 # --flake8 From c5cb5498d347b92571f84f15f487301371647a56 Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Thu, 5 Nov 2020 19:57:15 +0100 Subject: [PATCH 24/26] cuda --- .drone.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.drone.yml b/.drone.yml index 3d9b4db343079..156dcc60a9489 100644 --- a/.drone.yml +++ b/.drone.yml @@ -32,7 +32,7 @@ steps: - pip --version - nvidia-smi - pip install -r ./requirements/devel.txt --upgrade-strategy only-if-needed -v --no-cache-dir - - pip install --extra-index-url https://developer.download.nvidia.com/compute/redist nvidia-dali-cuda${CUDA_VERSION/./} --upgrade-strategy only-if-needed + - pip install --extra-index-url https://developer.download.nvidia.com/compute/redist nvidia-dali-cuda${CUDA_VERSION%%.*}0 --upgrade-strategy only-if-needed - pip list - coverage run --source pytorch_lightning -m pytest pytorch_lightning tests -v --color=yes --durations=25 # --flake8 - python -m pytest benchmarks pl_examples -v --color=yes --maxfail=2 --durations=0 # --flake8 From 8c092984c0ba799e0003f5b6d8a460f6e6e22a89 Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Fri, 6 Nov 2020 01:26:22 +0100 Subject: [PATCH 25/26] pip DALI --- .drone.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.drone.yml b/.drone.yml index 156dcc60a9489..9774ffaaaecc7 100644 --- a/.drone.yml +++ b/.drone.yml @@ -32,7 +32,8 @@ steps: - pip --version - nvidia-smi - pip install -r ./requirements/devel.txt --upgrade-strategy only-if-needed -v --no-cache-dir - - pip install --extra-index-url https://developer.download.nvidia.com/compute/redist nvidia-dali-cuda${CUDA_VERSION%%.*}0 --upgrade-strategy only-if-needed + # when Image has defined CUDa version we can switch to this package spec "nvidia-dali-cuda${CUDA_VERSION%%.*}0" + - pip install --extra-index-url https://developer.download.nvidia.com/compute/redist nvidia-dali-cuda100 --upgrade-strategy only-if-needed - pip list - coverage run --source pytorch_lightning -m pytest pytorch_lightning tests -v --color=yes --durations=25 # --flake8 - python -m pytest benchmarks pl_examples -v --color=yes --maxfail=2 --durations=0 # --flake8 From f7afb45818b884069429328fd576c607892bf6b8 Mon Sep 17 00:00:00 2001 From: SeanNaren Date: Fri, 6 Nov 2020 13:45:36 +0000 Subject: [PATCH 26/26] Move build into init function --- pl_examples/basic_examples/mnist_dali.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pl_examples/basic_examples/mnist_dali.py b/pl_examples/basic_examples/mnist_dali.py index b127fff9c25f7..649198053a01b 100644 --- a/pl_examples/basic_examples/mnist_dali.py +++ b/pl_examples/basic_examples/mnist_dali.py @@ -75,6 +75,7 @@ class ExternalSourcePipeline(Pipeline): def __init__(self, batch_size, eii, num_threads, device_id): super(ExternalSourcePipeline, self).__init__(batch_size, num_threads, device_id, seed=12) self.source = ops.ExternalSource(source=eii, num_outputs=2) + self.build() def define_graph(self): images, labels = self.source() @@ -174,15 +175,12 @@ def cli_main(): eii_test = ExternalMNISTInputIterator(mnist_test, args.batch_size) pipe_train = ExternalSourcePipeline(batch_size=args.batch_size, eii=eii_train, num_threads=2, device_id=0) - pipe_train.build() train_loader = DALIClassificationLoader(pipe_train, size=len(mnist_train), auto_reset=True, fill_last_batch=False) pipe_val = ExternalSourcePipeline(batch_size=args.batch_size, eii=eii_val, num_threads=2, device_id=0) - pipe_val.build() val_loader = DALIClassificationLoader(pipe_val, size=len(mnist_val), auto_reset=True, fill_last_batch=False) pipe_test = ExternalSourcePipeline(batch_size=args.batch_size, eii=eii_test, num_threads=2, device_id=0) - pipe_test.build() test_loader = DALIClassificationLoader(pipe_test, size=len(mnist_test), auto_reset=True, fill_last_batch=False) # ------------