From f75f445d165492c23cc53a43c2a11d6c77713140 Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Mon, 22 Feb 2021 17:10:21 +0000
Subject: [PATCH 01/60] Initial changes

---
 pytorch_lightning/accelerators/__init__.py    |   1 +
 pytorch_lightning/accelerators/ipu.py         |  32 ++++
 .../plugins/training_type/ipu.py              | 154 ++++++++++++++++++
 .../connectors/accelerator_connector.py       |  20 ++-
 pytorch_lightning/utilities/__init__.py       |   1 +
 pytorch_lightning/utilities/enums.py          |   1 +
 pytorch_lightning/utilities/imports.py        |   1 +
 7 files changed, 209 insertions(+), 1 deletion(-)
 create mode 100644 pytorch_lightning/accelerators/ipu.py
 create mode 100644 pytorch_lightning/plugins/training_type/ipu.py

diff --git a/pytorch_lightning/accelerators/__init__.py b/pytorch_lightning/accelerators/__init__.py
index 05e15fe1f1767..2a460a27e373a 100644
--- a/pytorch_lightning/accelerators/__init__.py
+++ b/pytorch_lightning/accelerators/__init__.py
@@ -13,4 +13,5 @@
 from pytorch_lightning.accelerators.accelerator import Accelerator  # noqa F401
 from pytorch_lightning.accelerators.cpu import CPUAccelerator  # noqa F401
 from pytorch_lightning.accelerators.gpu import GPUAccelerator  # noqa F401
+from pytorch_lightning.accelerators.ipu import IPUAccelerator  # noqa F401
 from pytorch_lightning.accelerators.tpu import TPUAccelerator  # noqa F401
diff --git a/pytorch_lightning/accelerators/ipu.py b/pytorch_lightning/accelerators/ipu.py
new file mode 100644
index 0000000000000..8374bc1bc1554
--- /dev/null
+++ b/pytorch_lightning/accelerators/ipu.py
@@ -0,0 +1,32 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from collections import Callable
+
+from torch.optim import Optimizer
+
+from pytorch_lightning.accelerators.accelerator import Accelerator
+from pytorch_lightning.utilities.exceptions import MisconfigurationException
+
+
+class IPUAccelerator(Accelerator):
+
+    def setup_optimizers(self, trainer):
+        super().setup_optimizers(trainer)
+
+        if len(self.optimizers) > 1:
+            raise MisconfigurationException("IPUs currently only support one optimizer.")
+
+    def optimizer_step(self, optimizer: Optimizer, opt_idx: int, lambda_closure: Callable, **kwargs):
+        # Optimizer step is handled by the IPU accelerator.
+        lambda_closure()
diff --git a/pytorch_lightning/plugins/training_type/ipu.py b/pytorch_lightning/plugins/training_type/ipu.py
new file mode 100644
index 0000000000000..95d78fb8f815c
--- /dev/null
+++ b/pytorch_lightning/plugins/training_type/ipu.py
@@ -0,0 +1,154 @@
+import json
+import os
+from typing import Any, Iterable, Optional, Union
+
+import torch
+from torch.utils.data import DataLoader
+
+from pytorch_lightning import _logger as log
+from pytorch_lightning import LightningModule
+from pytorch_lightning.overrides.base import _LightningModuleWrapperBase
+from pytorch_lightning.plugins.training_type.training_type_plugin import TrainingTypePlugin
+from pytorch_lightning.utilities import _POPTORCH_AVAILABLE
+from pytorch_lightning.utilities.apply_func import apply_to_collection
+from pytorch_lightning.utilities.exceptions import MisconfigurationException
+
+if _POPTORCH_AVAILABLE:
+    import poptorch
+
+    if not poptorch.ipuHardwareIsAvailable():
+        raise MisconfigurationException("IPU Accelerator requires IPUs to run.")
+
+# todo: No idea what's happening with grad accumulation, need to check since IPUs handle grad accum.
+# todo: or even lr scheduling...
+
+
+class LightningIPUModule(_LightningModuleWrapperBase):
+
+    def __init__(self, pl_module: LightningModule, precision: int):
+        super().__init__(pl_module)
+        self.precision = precision
+
+    def forward(self, *inputs, **kwargs):
+        if self.precision == 16:
+            inputs = self._move_float_tensors_to_half(inputs)
+
+        return super().forward(*inputs, **kwargs)
+
+    @staticmethod
+    def batch_to(data):
+        return data.half()
+
+    def _move_float_tensors_to_half(self, batch: Any):
+        batch = apply_to_collection(batch, (torch.FloatTensor, torch.cuda.FloatTensor), function=self.batch_to)
+        return batch
+
+
+class IPUPlugin(TrainingTypePlugin):
+
+    def __init__(
+        self,
+        mixed_precision: bool,
+        half: bool = False,
+        device_iterations: int = 1,
+        replication_factor: int = 1,
+        autoround_num_ipus: bool = True,
+        autoreport: bool = True,
+        autoreport_dir: Optional[str] = None
+    ):
+        super().__init__()
+        self.half = half
+        self.mixed_precision = mixed_precision
+        self.device_iterations = device_iterations
+        self.replication_factor = replication_factor
+        self.autoround_num_ipus = autoround_num_ipus
+        self.autoreport = autoreport
+        self.autoreport_dir = autoreport_dir
+
+        if self.autoreport:
+            options = {"autoReport.all": self.autoreport}
+            if self.autoreport_dir:
+                if not os.path.exists(self.autoreport_dir):
+                    os.makedirs(self.autoreport_dir)
+                options["autoReport.directory"] = self.autoreport_dir
+            os.environ["POPLAR_ENGINE_OPTIONS"] = json.dumps(options)
+
+    @property
+    def on_gpu(self) -> bool:
+        return False
+
+    @property
+    def root_device(self) -> torch.device:
+        pass
+
+    def model_to_device(self) -> None:
+        pass
+
+    @property
+    def is_global_zero(self) -> bool:
+        return True
+
+    def reduce(self, tensor: Union[torch.Tensor, Any], *args: Any, **kwargs: Any) -> Union[torch.Tensor, Any]:
+        return tensor
+
+    def barrier(self, name: Optional[str] = None) -> None:
+        pass
+
+    def broadcast(self, obj: object, src: int = 0) -> object:
+        return object
+
+    @property
+    def lightning_module(self) -> Optional[LightningModule]:
+        return self.model.module if isinstance(self.model, LightningIPUModule) else self.model
+
+    def pre_dispatch(self) -> None:
+        if self.half:
+            log.info('Using 16bit precision, converting model to FP16.')
+            self.model = self.model.half()
+        precision = 16 if self.half or self.mixed_precision else 32
+
+        # Separate models are instantiated for different stages, but they share the same weights on host.
+        # When validation/test models are run, they sync weights first.
+        # Create model for training which will run training.
+
+        optimizer = self.lightning_module.trainer.optimizers[0]
+        self.model = poptorch.trainingModel(
+            model=LightningIPUModule(self.lightning_module, precision),
+            options=self._create_opts(is_train_model=True),
+            optimizer=optimizer
+        )
+
+        # Create model for training which will run validation.
+        self.validation_model = LightningIPUModule(self.lightning_module, precision)
+        self.validation_model = poptorch.inferenceModel(
+            model=self.validation_model,
+            options=self._create_opts(is_train_model=False),
+        )
+
+    def _create_opts(self, is_train_model):
+        opts = poptorch.Options()
+        opts.deviceIterations(self.device_iterations)
+        opts.replicationFactor(self.replication_factor)
+        gradient_accumulation = self.lightning_module.trainer.accumulate_grad_batches if is_train_model else 1
+        opts.Training.gradientAccumulation(gradient_accumulation)
+        opts.autoRoundNumIPUs(self.autoround_num_ipus)
+        return opts
+
+    def process_dataloader(self, dataloader: Union[Iterable, DataLoader]) -> Union[Iterable, DataLoader]:
+        dataloader = self._convert_to_poptorch_loader(
+            dataloader=dataloader, opts=self._create_opts(is_train_model=self.lightning_module.training)
+        )
+        return dataloader
+
+    def _convert_to_poptorch_loader(self, dataloader, opts):
+        skip_keys = ['dataset_kind']
+        if dataloader.batch_size:
+            # re-create batch sampler in new poptorch loader
+            skip_keys += ['batch_sampler']
+
+        dl_args = {k: v for k, v in dataloader.__dict__.items() if not k.startswith('_') and k not in skip_keys}
+        dl_args["options"] = opts
+        multiprocessing_context = dataloader.multiprocessing_context
+        dataloader = poptorch.DataLoader(**dl_args)
+        dataloader.multiprocessing_context = multiprocessing_context
+        return dataloader
diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index 7021081d6cc90..8308555307874 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -21,6 +21,7 @@
 from pytorch_lightning.accelerators.accelerator import Accelerator
 from pytorch_lightning.accelerators.cpu import CPUAccelerator
 from pytorch_lightning.accelerators.gpu import GPUAccelerator
+from pytorch_lightning.accelerators.ipu import IPUAccelerator
 from pytorch_lightning.accelerators.tpu import TPUAccelerator
 from pytorch_lightning.plugins import (
     ApexMixedPrecisionPlugin,
@@ -43,6 +44,7 @@
     TrainingTypePlugin,
 )
 from pytorch_lightning.plugins.environments import ClusterEnvironment, SLURMEnvironment, TorchElasticEnvironment
+from pytorch_lightning.plugins.training_type.ipu import IPUPlugin
 from pytorch_lightning.tuner.auto_gpu_select import pick_multiple_gpus
 from pytorch_lightning.utilities import (
     _APEX_AVAILABLE,
@@ -229,6 +231,10 @@ def on_cpu(self) -> bool:
     def on_tpu(self) -> bool:
         return self.tpu_cores is not None
 
+    @property
+    def on_ipu(self) -> bool:
+        return self._device_type == DeviceType.IPU
+
     @property
     def tpu_id(self) -> Optional[int]:
         if self.on_tpu and isinstance(self.tpu_cores, list):
@@ -292,7 +298,9 @@ def parallel_devices(self) -> Union[List[torch.device], int]:
 
     @property
     def root_gpu(self) -> Optional[int]:
-        return self.accelerator.root_device.index if not isinstance(self.accelerator, TPUAccelerator) else None
+        return self.accelerator.root_device.index if not isinstance(
+            self.accelerator, (IPUAccelerator, TPUAccelerator)
+        ) else None
 
     @property
     def is_using_torchelastic(self) -> bool:
@@ -303,6 +311,9 @@ def select_precision_plugin(self) -> PrecisionPlugin:
         # set precision type
         self.amp_type = AMPType.from_str(self.amp_type)
 
+        if self._device_type == DeviceType.IPU:
+            return IPUPrecisionPlugin(self.precision)
+
         if self._distrib_type == DistributedType.DEEPSPEED or isinstance(self._training_type_plugin, DeepSpeedPlugin):
             return DeepSpeedPrecisionPlugin(self.precision)
 
@@ -401,6 +412,8 @@ def select_training_type_plugin(self) -> TrainingTypePlugin:
                 plugin = SingleTPUPlugin(self.tpu_id)
             else:
                 plugin = TPUSpawnPlugin(parallel_devices=list(range(self.tpu_cores)))
+        elif self.on_ipu:
+            plugin = IPUPlugin(mixed_precision=self.precision == 32)
         else:
             single_gpu_ordinal = device_parser.determine_root_gpu_device(self.parallel_device_ids)
             plugin = SingleDevicePlugin(device=torch.device(f"cuda:{single_gpu_ordinal}" if self.on_gpu else "cpu"))
@@ -436,6 +449,8 @@ def select_accelerator(self) -> Accelerator:
             acc_cls = GPUAccelerator
         elif self.on_tpu:
             acc_cls = TPUAccelerator
+        elif self.on_ipu:
+            acc_cls = IPUAccelerator
         else:
             acc_cls = CPUAccelerator
 
@@ -496,6 +511,9 @@ def set_distributed_mode(self, distributed_backend: Optional[str] = None):
         # special case with TPUs
         elif self.distributed_backend == 'tpu':
             self._device_type = DeviceType.TPU
+        # special case with IPUs
+        elif self.distributed_backend == 'ipu':
+            self._device_type = DeviceType.IPU
         elif self.distributed_backend and self._distrib_type is None:
             self._distrib_type = DistributedType(self.distributed_backend)
 
diff --git a/pytorch_lightning/utilities/__init__.py b/pytorch_lightning/utilities/__init__.py
index cf3aa06f305b8..9b25838d8ab41 100644
--- a/pytorch_lightning/utilities/__init__.py
+++ b/pytorch_lightning/utilities/__init__.py
@@ -36,6 +36,7 @@
     _module_available,
     _NATIVE_AMP_AVAILABLE,
     _OMEGACONF_AVAILABLE,
+    _POPTORCH_AVAILABLE,
     _RPC_AVAILABLE,
     _TORCH_GREATER_EQUAL_1_6,
     _TORCH_GREATER_EQUAL_1_7,
diff --git a/pytorch_lightning/utilities/enums.py b/pytorch_lightning/utilities/enums.py
index 3e4add4fb68d1..ae03beaf4fb42 100644
--- a/pytorch_lightning/utilities/enums.py
+++ b/pytorch_lightning/utilities/enums.py
@@ -83,4 +83,5 @@ class DeviceType(LightningEnum):
     """
     CPU = 'CPU'
     GPU = 'GPU'
+    IPU = 'IPU'
     TPU = 'TPU'
diff --git a/pytorch_lightning/utilities/imports.py b/pytorch_lightning/utilities/imports.py
index 8024997382457..a3b88a3d13366 100644
--- a/pytorch_lightning/utilities/imports.py
+++ b/pytorch_lightning/utilities/imports.py
@@ -64,6 +64,7 @@ def _compare_version(package: str, op, version) -> bool:
 _HYDRA_EXPERIMENTAL_AVAILABLE = _module_available("hydra.experimental")
 _NATIVE_AMP_AVAILABLE = _module_available("torch.cuda.amp") and hasattr(torch.cuda.amp, "autocast")
 _OMEGACONF_AVAILABLE = _module_available("omegaconf")
+_POPTORCH_AVAILABLE = _module_available('poptorch')
 _RPC_AVAILABLE = not _IS_WINDOWS and _module_available('torch.distributed.rpc')
 _TORCHTEXT_AVAILABLE = _module_available("torchtext")
 _TORCHVISION_AVAILABLE = _module_available('torchvision')

From dc9744b00ceeb9c53054f2f1624963963f011289 Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Wed, 24 Mar 2021 22:57:47 +0000
Subject: [PATCH 02/60] Add broken example for now

---
 pl_examples/ipu_examples/__init__.py          |   0
 pl_examples/ipu_examples/mnist.py             | 118 ++++++++++++++++++
 pytorch_lightning/plugins/__init__.py         |   2 +
 .../plugins/precision/ipu_precision.py        |   5 +
 4 files changed, 125 insertions(+)
 create mode 100644 pl_examples/ipu_examples/__init__.py
 create mode 100644 pl_examples/ipu_examples/mnist.py
 create mode 100644 pytorch_lightning/plugins/precision/ipu_precision.py

diff --git a/pl_examples/ipu_examples/__init__.py b/pl_examples/ipu_examples/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/pl_examples/ipu_examples/mnist.py b/pl_examples/ipu_examples/mnist.py
new file mode 100644
index 0000000000000..db125d5157057
--- /dev/null
+++ b/pl_examples/ipu_examples/mnist.py
@@ -0,0 +1,118 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from argparse import ArgumentParser
+from pprint import pprint
+
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+
+import pytorch_lightning as pl
+from pl_examples import cli_lightning_logo
+from pl_examples.basic_examples.mnist_datamodule import MNISTDataModule
+from pytorch_lightning.accelerators import IPUAccelerator
+
+
+class Block(nn.Module):
+
+    def __init__(self, in_channels, num_filters, kernel_size, pool_size):
+        super(Block, self).__init__()
+        self.conv = nn.Conv2d(in_channels=in_channels, out_channels=num_filters, kernel_size=kernel_size)
+        self.pool = nn.MaxPool2d(kernel_size=pool_size)
+        self.relu = nn.ReLU()
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.pool(x)
+        x = self.relu(x)
+        return x
+
+
+class LitClassifier(pl.LightningModule):
+
+    def __init__(self, learning_rate=1e-3):
+        super().__init__()
+        self.save_hyperparameters()
+
+        self.layer1 = Block(1, 32, 3, 2)
+        self.layer2 = Block(32, 64, 3, 2)
+        self.layer3 = nn.Linear(1600, 128)
+        self.layer3_act = nn.ReLU()
+        self.layer3_dropout = torch.nn.Dropout(0.5)
+        self.layer4 = nn.Linear(128, 10)
+        self.softmax = nn.Softmax(1)
+
+    def forward(self, x):
+        x = self.layer1(x)
+        x = self.layer2(x)
+        # Flatten layer
+        x = x.view(-1, 1600)
+        x = self.layer3_act(self.layer3(x))
+        x = self.layer4(self.layer3_dropout(x))
+        x = self.softmax(x)
+        return x
+
+    def training_step(self, batch):
+        x, y = batch
+        y_hat = self(x)
+        loss = F.cross_entropy(y_hat, y)
+        return loss
+
+    def validation_step(self, batch):
+        x, y = batch
+        y_hat = self(x)
+        loss = F.cross_entropy(y_hat, y)
+        return loss
+
+    def test_step(self, batch):
+        x, y = batch
+        y_hat = self(x)
+        loss = F.cross_entropy(y_hat, y)
+        return loss
+
+    def configure_optimizers(self):
+        return torch.optim.AdamW(self.parameters(), lr=self.hparams.learning_rate)
+
+    @staticmethod
+    def add_model_specific_args(parent_parser):
+        parser = ArgumentParser(parents=[parent_parser], add_help=False)
+        parser.add_argument('--learning_rate', type=float, default=0.0001)
+        return parser
+
+
+def cli_main():
+    parser = ArgumentParser()
+    parser = pl.Trainer.add_argparse_args(parser)
+    parser = LitClassifier.add_model_specific_args(parser)
+    parser = IPUAccelerator.add_argparse_args(parser)
+    parser = MNISTDataModule.add_argparse_args(parser)
+    args = parser.parse_args()
+
+    dm = MNISTDataModule.from_argparse_args(args)
+
+    model = LitClassifier(args.learning_rate)
+
+    accelerator = IPUAccelerator.from_argparse_args(args)
+    trainer = pl.Trainer.from_argparse_args(args, accelerator=accelerator)
+
+    trainer.fit(model, datamodule=dm)
+
+    result = trainer.test(model, datamodule=dm)
+    pprint(result)
+
+
+if __name__ == '__main__':
+    cli_lightning_logo()
+    cli_main()
diff --git a/pytorch_lightning/plugins/__init__.py b/pytorch_lightning/plugins/__init__.py
index a67235baa4767..66e80c1178d15 100644
--- a/pytorch_lightning/plugins/__init__.py
+++ b/pytorch_lightning/plugins/__init__.py
@@ -12,6 +12,7 @@
 from pytorch_lightning.plugins.training_type.deepspeed import DeepSpeedPlugin  # noqa: F401
 from pytorch_lightning.plugins.training_type.dp import DataParallelPlugin  # noqa: F401
 from pytorch_lightning.plugins.training_type.horovod import HorovodPlugin  # noqa: F401
+from pytorch_lightning.plugins.training_type.ipu import IPUPlugin  # noqa: F401
 from pytorch_lightning.plugins.training_type.parallel import ParallelPlugin  # noqa: F401
 from pytorch_lightning.plugins.training_type.rpc import RPCPlugin  # noqa: F401
 from pytorch_lightning.plugins.training_type.rpc_sequential import RPCSequentialPlugin  # noqa: F401
@@ -32,6 +33,7 @@
     "DeepSpeedPrecisionPlugin",
     "DoublePrecisionPlugin",
     "HorovodPlugin",
+    "IPUPlugin",
     "NativeMixedPrecisionPlugin",
     "PrecisionPlugin",
     "ShardedNativeMixedPrecisionPlugin",
diff --git a/pytorch_lightning/plugins/precision/ipu_precision.py b/pytorch_lightning/plugins/precision/ipu_precision.py
new file mode 100644
index 0000000000000..744ac1bd5fb82
--- /dev/null
+++ b/pytorch_lightning/plugins/precision/ipu_precision.py
@@ -0,0 +1,5 @@
+from pytorch_lightning.plugins import PrecisionPlugin
+
+
+class IPUPrecisionPlugin(PrecisionPlugin):
+    pass

From 931bb74ad74d0b29d50937cf6fed1c930c0b07da Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Wed, 7 Apr 2021 23:27:24 +0100
Subject: [PATCH 03/60] Fix reference

---
 pytorch_lightning/plugins/__init__.py                         | 2 ++
 pytorch_lightning/trainer/connectors/accelerator_connector.py | 3 ++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/pytorch_lightning/plugins/__init__.py b/pytorch_lightning/plugins/__init__.py
index 66e80c1178d15..a9e6fa1bce619 100644
--- a/pytorch_lightning/plugins/__init__.py
+++ b/pytorch_lightning/plugins/__init__.py
@@ -2,6 +2,7 @@
 from pytorch_lightning.plugins.precision.apex_amp import ApexMixedPrecisionPlugin  # noqa: F401
 from pytorch_lightning.plugins.precision.deepspeed_precision import DeepSpeedPrecisionPlugin  # noqa: F401
 from pytorch_lightning.plugins.precision.double import DoublePrecisionPlugin  # noqa: F401
+from pytorch_lightning.plugins.precision.ipu_precision import IPUPrecisionPlugin  # noqa: F401
 from pytorch_lightning.plugins.precision.native_amp import NativeMixedPrecisionPlugin  # noqa: F401
 from pytorch_lightning.plugins.precision.precision_plugin import PrecisionPlugin  # noqa: F401
 from pytorch_lightning.plugins.precision.sharded_native_amp import ShardedNativeMixedPrecisionPlugin  # noqa: F401
@@ -34,6 +35,7 @@
     "DoublePrecisionPlugin",
     "HorovodPlugin",
     "IPUPlugin",
+    "IPUPrecisionPlugin",
     "NativeMixedPrecisionPlugin",
     "PrecisionPlugin",
     "ShardedNativeMixedPrecisionPlugin",
diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index 808df52a91f10..61d7917dfec41 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -36,6 +36,7 @@
     DoublePrecisionPlugin,
     HorovodPlugin,
     IPUPlugin,
+    IPUPrecisionPlugin,
     NativeMixedPrecisionPlugin,
     PrecisionPlugin,
     ShardedNativeMixedPrecisionPlugin,
@@ -324,7 +325,7 @@ def select_precision_plugin(self) -> PrecisionPlugin:
         self.amp_type = AMPType.from_str(self.amp_type)
 
         if self._device_type == DeviceType.IPU:
-            return IPUPrecisionPlugin(self.precision)
+            return IPUPrecisionPlugin()
 
         if self._distrib_type == DistributedType.DEEPSPEED or isinstance(self._training_type_plugin, DeepSpeedPlugin):
             return DeepSpeedPrecisionPlugin(self.precision)

From c617f02abe15a553e3f5a8176ffac409661dbcf9 Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Tue, 11 May 2021 12:50:02 +0100
Subject: [PATCH 04/60] Fix format

---
 pytorch_lightning/trainer/connectors/accelerator_connector.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index 56670ecd68e93..3437d2b0bcff3 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -550,7 +550,7 @@ def set_distributed_mode(self, distributed_backend: Optional[str] = None):
             if isinstance(self.tpu_cores, int):
                 self._distrib_type = DistributedType.TPU_SPAWN
         elif self.distributed_backend == 'ipu':
-                self._device_type = DeviceType.IPU
+            self._device_type = DeviceType.IPU
         elif self.distributed_backend and self._distrib_type is None:
             self._distrib_type = DistributedType(self.distributed_backend)
 

From 522a81fe75dfb6e5c3503a00d28585e662978421 Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Tue, 11 May 2021 17:14:20 +0100
Subject: [PATCH 05/60] Code runs

---
 pl_examples/ipu_examples/mnist.py             |  25 +---
 .../plugins/precision/ipu_precision.py        |  23 +++-
 .../plugins/training_type/ipu.py              | 114 +++++++++++++-----
 .../connectors/accelerator_connector.py       |   4 +-
 4 files changed, 112 insertions(+), 54 deletions(-)

diff --git a/pl_examples/ipu_examples/mnist.py b/pl_examples/ipu_examples/mnist.py
index db125d5157057..2f084d41b4124 100644
--- a/pl_examples/ipu_examples/mnist.py
+++ b/pl_examples/ipu_examples/mnist.py
@@ -20,9 +20,7 @@
 from torch.nn import functional as F
 
 import pytorch_lightning as pl
-from pl_examples import cli_lightning_logo
 from pl_examples.basic_examples.mnist_datamodule import MNISTDataModule
-from pytorch_lightning.accelerators import IPUAccelerator
 
 
 class Block(nn.Module):
@@ -64,20 +62,17 @@ def forward(self, x):
         x = self.softmax(x)
         return x
 
-    def training_step(self, batch):
-        x, y = batch
+    def training_step(self, x, y, batch_idx):
         y_hat = self(x)
         loss = F.cross_entropy(y_hat, y)
         return loss
 
-    def validation_step(self, batch):
-        x, y = batch
+    def validation_step(self, x, y):
         y_hat = self(x)
         loss = F.cross_entropy(y_hat, y)
         return loss
 
-    def test_step(self, batch):
-        x, y = batch
+    def test_step(self, x, y):
         y_hat = self(x)
         loss = F.cross_entropy(y_hat, y)
         return loss
@@ -86,17 +81,15 @@ def configure_optimizers(self):
         return torch.optim.AdamW(self.parameters(), lr=self.hparams.learning_rate)
 
     @staticmethod
-    def add_model_specific_args(parent_parser):
-        parser = ArgumentParser(parents=[parent_parser], add_help=False)
+    def add_model_specific_args(parser):
         parser.add_argument('--learning_rate', type=float, default=0.0001)
         return parser
 
 
-def cli_main():
+if __name__ == '__main__':
     parser = ArgumentParser()
     parser = pl.Trainer.add_argparse_args(parser)
     parser = LitClassifier.add_model_specific_args(parser)
-    parser = IPUAccelerator.add_argparse_args(parser)
     parser = MNISTDataModule.add_argparse_args(parser)
     args = parser.parse_args()
 
@@ -104,15 +97,9 @@ def cli_main():
 
     model = LitClassifier(args.learning_rate)
 
-    accelerator = IPUAccelerator.from_argparse_args(args)
-    trainer = pl.Trainer.from_argparse_args(args, accelerator=accelerator)
+    trainer = pl.Trainer.from_argparse_args(args, max_epochs=10, accelerator='ipu')
 
     trainer.fit(model, datamodule=dm)
 
     result = trainer.test(model, datamodule=dm)
     pprint(result)
-
-
-if __name__ == '__main__':
-    cli_lightning_logo()
-    cli_main()
diff --git a/pytorch_lightning/plugins/precision/ipu_precision.py b/pytorch_lightning/plugins/precision/ipu_precision.py
index 744ac1bd5fb82..4e88a6cf73fe1 100644
--- a/pytorch_lightning/plugins/precision/ipu_precision.py
+++ b/pytorch_lightning/plugins/precision/ipu_precision.py
@@ -1,5 +1,24 @@
-from pytorch_lightning.plugins import PrecisionPlugin
+from typing import Any
+
+from torch import Tensor
+
+from pytorch_lightning.plugins.precision.precision_plugin import PrecisionPlugin
 
 
 class IPUPrecisionPlugin(PrecisionPlugin):
-    pass
+
+    def __init__(self, precision: int) -> None:
+        super().__init__()
+        self.precision = precision
+
+    def backward(
+        self,
+        closure_loss: Tensor,
+        *args: Any,
+        **kwargs: Any,
+    ) -> Tensor:
+        # IPU internally manages bwd step.
+        return closure_loss
+
+    def clip_gradients(self, *args, **kwargs) -> None:
+        pass
diff --git a/pytorch_lightning/plugins/training_type/ipu.py b/pytorch_lightning/plugins/training_type/ipu.py
index 95d78fb8f815c..385bc09edd6b8 100644
--- a/pytorch_lightning/plugins/training_type/ipu.py
+++ b/pytorch_lightning/plugins/training_type/ipu.py
@@ -1,14 +1,17 @@
+import inspect
 import json
 import os
-from typing import Any, Iterable, Optional, Union
+from typing import Any, Iterable, List, Optional, Union
 
 import torch
 from torch.utils.data import DataLoader
 
 from pytorch_lightning import _logger as log
-from pytorch_lightning import LightningModule
+from pytorch_lightning.core.lightning import LightningModule
 from pytorch_lightning.overrides.base import _LightningModuleWrapperBase
-from pytorch_lightning.plugins.training_type.training_type_plugin import TrainingTypePlugin
+from pytorch_lightning.plugins.environments.cluster_environment import ClusterEnvironment
+from pytorch_lightning.plugins.training_type.parallel import ParallelPlugin
+from pytorch_lightning.trainer.supporters import CombinedLoader
 from pytorch_lightning.utilities import _POPTORCH_AVAILABLE
 from pytorch_lightning.utilities.apply_func import apply_to_collection
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
@@ -44,21 +47,21 @@ def _move_float_tensors_to_half(self, batch: Any):
         return batch
 
 
-class IPUPlugin(TrainingTypePlugin):
+class IPUPlugin(ParallelPlugin):
 
     def __init__(
         self,
-        mixed_precision: bool,
         half: bool = False,
         device_iterations: int = 1,
         replication_factor: int = 1,
         autoround_num_ipus: bool = True,
         autoreport: bool = True,
-        autoreport_dir: Optional[str] = None
+        autoreport_dir: Optional[str] = None,
+        parallel_devices: Optional[List[torch.device]] = None,
+        cluster_environment: Optional[ClusterEnvironment] = None,
     ):
-        super().__init__()
+        super().__init__(parallel_devices, cluster_environment)
         self.half = half
-        self.mixed_precision = mixed_precision
         self.device_iterations = device_iterations
         self.replication_factor = replication_factor
         self.autoround_num_ipus = autoround_num_ipus
@@ -94,23 +97,25 @@ def reduce(self, tensor: Union[torch.Tensor, Any], *args: Any, **kwargs: Any) ->
     def barrier(self, name: Optional[str] = None) -> None:
         pass
 
+    def all_gather(self, tensor: torch.Tensor, group: Optional[Any] = None, sync_grads: bool = False) -> torch.Tensor:
+        return tensor
+
     def broadcast(self, obj: object, src: int = 0) -> object:
-        return object
+        return obj
 
     @property
     def lightning_module(self) -> Optional[LightningModule]:
-        return self.model.module if isinstance(self.model, LightningIPUModule) else self.model
+        model = self.model.module if isinstance(self.model, poptorch.PoplarExecutor) else self.model
+        return model.module if isinstance(model, LightningIPUModule) else model
 
     def pre_dispatch(self) -> None:
         if self.half:
             log.info('Using 16bit precision, converting model to FP16.')
             self.model = self.model.half()
-        precision = 16 if self.half or self.mixed_precision else 32
+        precision = self.lightning_module.trainer.accelerator.precision_plugin.precision
+        precision = 16 if self.half else precision
 
-        # Separate models are instantiated for different stages, but they share the same weights on host.
-        # When validation/test models are run, they sync weights first.
         # Create model for training which will run training.
-
         optimizer = self.lightning_module.trainer.optimizers[0]
         self.model = poptorch.trainingModel(
             model=LightningIPUModule(self.lightning_module, precision),
@@ -118,12 +123,16 @@ def pre_dispatch(self) -> None:
             optimizer=optimizer
         )
 
-        # Create model for training which will run validation.
-        self.validation_model = LightningIPUModule(self.lightning_module, precision)
-        self.validation_model = poptorch.inferenceModel(
-            model=self.validation_model,
-            options=self._create_opts(is_train_model=False),
-        )
+        # Separate models are instantiated for different stages, but they share the same weights on host.
+        # When validation/test models are run, they sync weights first.
+
+        # todo: not sure this is the cleanest way to do this...
+        self.inference_models = {}
+        for x in ('val', 'test', 'predict'):
+            self.inference_models[x] = poptorch.inferenceModel(
+                model=LightningIPUModule(self.lightning_module, precision),
+                options=self._create_opts(is_train_model=False),
+            )
 
     def _create_opts(self, is_train_model):
         opts = poptorch.Options()
@@ -135,20 +144,63 @@ def _create_opts(self, is_train_model):
         return opts
 
     def process_dataloader(self, dataloader: Union[Iterable, DataLoader]) -> Union[Iterable, DataLoader]:
-        dataloader = self._convert_to_poptorch_loader(
-            dataloader=dataloader, opts=self._create_opts(is_train_model=self.lightning_module.training)
-        )
+        if isinstance(dataloader, CombinedLoader):
+            dataloader.loaders = apply_to_collection(
+                dataloader.loaders,
+                DataLoader,
+                self.process_dataloader,
+            )
+            return dataloader
+
+        if not isinstance(dataloader, poptorch.DataLoader):
+            dataloader = self._convert_to_poptorch_loader(
+                dataloader=dataloader, opts=self._create_opts(is_train_model=self.lightning_module.training)
+            )
         return dataloader
 
-    def _convert_to_poptorch_loader(self, dataloader, opts):
-        skip_keys = ['dataset_kind']
-        if dataloader.batch_size:
-            # re-create batch sampler in new poptorch loader
-            skip_keys += ['batch_sampler']
+    def _convert_to_poptorch_loader(self, dataloader: Union[Iterable, DataLoader],
+                                    opts: poptorch.Options) -> Union[Iterable, DataLoader]:
+        skip_keys = ('sampler', 'batch_sampler', 'dataset_kind')
+
+        attrs = {k: v for k, v in vars(dataloader).items() if not k.startswith("_")}
+
+        params = set(inspect.signature(dataloader.__init__).parameters)
+        contains_dataset = True
+
+        if type(dataloader) is not DataLoader:
+            contains_dataset = "dataset" in params
+            params.update(inspect.signature(DataLoader.__init__).parameters)
+
+        dl_args = {name: attrs[name] for name in params if name in attrs and name not in skip_keys}
 
-        dl_args = {k: v for k, v in dataloader.__dict__.items() if not k.startswith('_') and k not in skip_keys}
-        dl_args["options"] = opts
         multiprocessing_context = dataloader.multiprocessing_context
-        dataloader = poptorch.DataLoader(**dl_args)
+        dl_args['multiprocessing_context'] = multiprocessing_context
+        if not contains_dataset:
+            dl_args.pop('dataset')
+
+        dataloader = poptorch.DataLoader(**dl_args, options=opts)
         dataloader.multiprocessing_context = multiprocessing_context
         return dataloader
+
+    def training_step(self, *args, **kwargs):
+        # todo: we shouldn't need to drop the batch idx here
+        # also the args are now being passed as individual args which is different, i.e
+        # def training_step(batch, batch_idx):
+        # becomes
+        # def training_step(x, y):
+        # where x  and y are the batch arguments...
+        args = args[0]  # Drop the batch idx
+        return self.model(*args, **kwargs)
+
+    def validation_step(self, *args, **kwargs):
+        batch_idx = torch.tensor(args[1], dtype=torch.int)
+        args = args[0]  # Drop the batch idx
+        return self.inference_models['val'](*args, batch_idx, **kwargs)
+
+    def test_step(self, *args, **kwargs):
+        args = args[0]  # Drop the batch idx
+        return self.inference_models['test'](*args, **kwargs)
+
+    def predict_step(self, *args, **kwargs):
+        args = args[0]  # Drop the batch idx
+        return self.inference_models['predict'](*args, **kwargs)
diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index 3437d2b0bcff3..eb58cf061a0d9 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -347,7 +347,7 @@ def select_precision_plugin(self) -> PrecisionPlugin:
         self.amp_type = AMPType.from_str(self.amp_type)
 
         if self._device_type == DeviceType.IPU:
-            return IPUPrecisionPlugin()
+            return IPUPrecisionPlugin(self.precision)
 
         if self._distrib_type == DistributedType.DEEPSPEED or isinstance(self._training_type_plugin, DeepSpeedPlugin):
             return DeepSpeedPrecisionPlugin(self.precision)
@@ -446,7 +446,7 @@ def select_training_type_plugin(self) -> TrainingTypePlugin:
         elif self.on_tpu and isinstance(self.tpu_cores, list):
             plugin = SingleTPUPlugin(self.tpu_id)
         elif self.on_ipu:
-            plugin = IPUPlugin(mixed_precision=self.precision == 32)
+            plugin = IPUPlugin()
         else:
             single_gpu_ordinal = device_parser.determine_root_gpu_device(self.parallel_device_ids)
             plugin = SingleDevicePlugin(device=torch.device(f"cuda:{single_gpu_ordinal}" if self.on_gpu else "cpu"))

From 0c003608503f940a777bd7754adab867e734c05c Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Wed, 26 May 2021 11:31:14 +0100
Subject: [PATCH 06/60] Fixes

---
 pl_examples/ipu_examples/mnist.py             | 75 +++++++------------
 .../plugins/training_type/ipu.py              | 75 ++++++++++++-------
 .../training_type/training_type_plugin.py     | 16 ++++
 .../connectors/accelerator_connector.py       |  9 ++-
 pytorch_lightning/trainer/data_loading.py     |  7 ++
 pytorch_lightning/trainer/trainer.py          |  5 +-
 6 files changed, 106 insertions(+), 81 deletions(-)

diff --git a/pl_examples/ipu_examples/mnist.py b/pl_examples/ipu_examples/mnist.py
index 2f084d41b4124..8db1a6dfde949 100644
--- a/pl_examples/ipu_examples/mnist.py
+++ b/pl_examples/ipu_examples/mnist.py
@@ -16,88 +16,65 @@
 from pprint import pprint
 
 import torch
-import torch.nn as nn
 from torch.nn import functional as F
 
 import pytorch_lightning as pl
 from pl_examples.basic_examples.mnist_datamodule import MNISTDataModule
 
 
-class Block(nn.Module):
-
-    def __init__(self, in_channels, num_filters, kernel_size, pool_size):
-        super(Block, self).__init__()
-        self.conv = nn.Conv2d(in_channels=in_channels, out_channels=num_filters, kernel_size=kernel_size)
-        self.pool = nn.MaxPool2d(kernel_size=pool_size)
-        self.relu = nn.ReLU()
-
-    def forward(self, x):
-        x = self.conv(x)
-        x = self.pool(x)
-        x = self.relu(x)
-        return x
-
-
 class LitClassifier(pl.LightningModule):
 
-    def __init__(self, learning_rate=1e-3):
+    def __init__(
+        self,
+        hidden_dim: int = 128,
+        learning_rate: float = 0.0001,
+    ):
         super().__init__()
         self.save_hyperparameters()
 
-        self.layer1 = Block(1, 32, 3, 2)
-        self.layer2 = Block(32, 64, 3, 2)
-        self.layer3 = nn.Linear(1600, 128)
-        self.layer3_act = nn.ReLU()
-        self.layer3_dropout = torch.nn.Dropout(0.5)
-        self.layer4 = nn.Linear(128, 10)
-        self.softmax = nn.Softmax(1)
+        self.l1 = torch.nn.Linear(28 * 28, self.hparams.hidden_dim)
+        self.l2 = torch.nn.Linear(self.hparams.hidden_dim, 10)
 
     def forward(self, x):
-        x = self.layer1(x)
-        x = self.layer2(x)
-        # Flatten layer
-        x = x.view(-1, 1600)
-        x = self.layer3_act(self.layer3(x))
-        x = self.layer4(self.layer3_dropout(x))
-        x = self.softmax(x)
+        x = x.view(x.size(0), -1)
+        x = torch.relu(self.l1(x))
+        x = torch.relu(self.l2(x))
         return x
 
-    def training_step(self, x, y, batch_idx):
+    def training_step(self, batch, batch_idx):
+        x, y = batch
         y_hat = self(x)
         loss = F.cross_entropy(y_hat, y)
         return loss
 
-    def validation_step(self, x, y):
+    def validation_step(self, batch, batch_idx):
+        x, y = batch
         y_hat = self(x)
         loss = F.cross_entropy(y_hat, y)
         return loss
 
-    def test_step(self, x, y):
+    def test_step(self, batch, batch_idx):
+        x, y = batch
         y_hat = self(x)
         loss = F.cross_entropy(y_hat, y)
         return loss
 
-    def configure_optimizers(self):
-        return torch.optim.AdamW(self.parameters(), lr=self.hparams.learning_rate)
+    def on_validation_batch_end(self, outputs, batch, batch_idx: int, dataloader_idx: int) -> None:
+        self.log('val_loss', outputs.mean(), prog_bar=True)
 
-    @staticmethod
-    def add_model_specific_args(parser):
-        parser.add_argument('--learning_rate', type=float, default=0.0001)
-        return parser
+    def on_test_batch_end(self, outputs, batch, batch_idx: int, dataloader_idx: int) -> None:
+        self.log('test_loss', outputs.mean(), prog_bar=True)
 
+    def configure_optimizers(self):
+        return torch.optim.Adam(self.parameters(), lr=self.hparams.learning_rate)
 
-if __name__ == '__main__':
-    parser = ArgumentParser()
-    parser = pl.Trainer.add_argparse_args(parser)
-    parser = LitClassifier.add_model_specific_args(parser)
-    parser = MNISTDataModule.add_argparse_args(parser)
-    args = parser.parse_args()
 
-    dm = MNISTDataModule.from_argparse_args(args)
+if __name__ == '__main__':
+    dm = MNISTDataModule(batch_size=32)
 
-    model = LitClassifier(args.learning_rate)
+    model = LitClassifier()
 
-    trainer = pl.Trainer.from_argparse_args(args, max_epochs=10, accelerator='ipu')
+    trainer = pl.Trainer(max_epochs=10, accelerator='ipu', ipu_cores=8)
 
     trainer.fit(model, datamodule=dm)
 
diff --git a/pytorch_lightning/plugins/training_type/ipu.py b/pytorch_lightning/plugins/training_type/ipu.py
index 385bc09edd6b8..dbdc7fc931942 100644
--- a/pytorch_lightning/plugins/training_type/ipu.py
+++ b/pytorch_lightning/plugins/training_type/ipu.py
@@ -63,7 +63,6 @@ def __init__(
         super().__init__(parallel_devices, cluster_environment)
         self.half = half
         self.device_iterations = device_iterations
-        self.replication_factor = replication_factor
         self.autoround_num_ipus = autoround_num_ipus
         self.autoreport = autoreport
         self.autoreport_dir = autoreport_dir
@@ -117,11 +116,9 @@ def pre_dispatch(self) -> None:
 
         # Create model for training which will run training.
         optimizer = self.lightning_module.trainer.optimizers[0]
-        self.model = poptorch.trainingModel(
-            model=LightningIPUModule(self.lightning_module, precision),
-            options=self._create_opts(is_train_model=True),
-            optimizer=optimizer
-        )
+        model = LightningIPUModule(self.lightning_module, precision)
+
+        self.model = poptorch.trainingModel(model=model, options=self._create_opts(training=True), optimizer=optimizer)
 
         # Separate models are instantiated for different stages, but they share the same weights on host.
         # When validation/test models are run, they sync weights first.
@@ -130,19 +127,35 @@ def pre_dispatch(self) -> None:
         self.inference_models = {}
         for x in ('val', 'test', 'predict'):
             self.inference_models[x] = poptorch.inferenceModel(
-                model=LightningIPUModule(self.lightning_module, precision),
-                options=self._create_opts(is_train_model=False),
+                model=model,
+                options=self._create_opts(training=False),
             )
 
-    def _create_opts(self, is_train_model):
+    @property
+    def replication_factor(self):
+        return len(self.parallel_devices)
+
+    def _create_opts(self, training):
         opts = poptorch.Options()
         opts.deviceIterations(self.device_iterations)
         opts.replicationFactor(self.replication_factor)
-        gradient_accumulation = self.lightning_module.trainer.accumulate_grad_batches if is_train_model else 1
+        gradient_accumulation = self.lightning_module.trainer.accumulate_grad_batches if training else 1
         opts.Training.gradientAccumulation(gradient_accumulation)
         opts.autoRoundNumIPUs(self.autoround_num_ipus)
         return opts
 
+    def on_reset_train_dataloader(self, dataloader) -> Union[Iterable, DataLoader]:
+        return self.process_dataloader(dataloader)
+
+    def on_reset_val_dataloader(self, dataloader) -> Union[Iterable, DataLoader]:
+        return self.process_dataloader(dataloader)
+
+    def on_reset_test_dataloader(self, dataloader) -> Union[Iterable, DataLoader]:
+        return self.process_dataloader(dataloader)
+
+    def on_reset_predict_dataloader(self, dataloader) -> Union[Iterable, DataLoader]:
+        return self.process_dataloader(dataloader)
+
     def process_dataloader(self, dataloader: Union[Iterable, DataLoader]) -> Union[Iterable, DataLoader]:
         if isinstance(dataloader, CombinedLoader):
             dataloader.loaders = apply_to_collection(
@@ -151,15 +164,17 @@ def process_dataloader(self, dataloader: Union[Iterable, DataLoader]) -> Union[I
                 self.process_dataloader,
             )
             return dataloader
-
+        elif isinstance(dataloader, list):
+            dataloader = apply_to_collection(dataloader, DataLoader, self.process_dataloader)
+            return dataloader
         if not isinstance(dataloader, poptorch.DataLoader):
             dataloader = self._convert_to_poptorch_loader(
-                dataloader=dataloader, opts=self._create_opts(is_train_model=self.lightning_module.training)
+                dataloader=dataloader, opts=self._create_opts(training=self.lightning_module.training)
             )
         return dataloader
 
     def _convert_to_poptorch_loader(self, dataloader: Union[Iterable, DataLoader],
-                                    opts: poptorch.Options) -> Union[Iterable, DataLoader]:
+                                    opts: 'poptorch.Options') -> Union[Iterable, DataLoader]:
         skip_keys = ('sampler', 'batch_sampler', 'dataset_kind')
 
         attrs = {k: v for k, v in vars(dataloader).items() if not k.startswith("_")}
@@ -183,24 +198,28 @@ def _convert_to_poptorch_loader(self, dataloader: Union[Iterable, DataLoader],
         return dataloader
 
     def training_step(self, *args, **kwargs):
-        # todo: we shouldn't need to drop the batch idx here
-        # also the args are now being passed as individual args which is different, i.e
-        # def training_step(batch, batch_idx):
-        # becomes
-        # def training_step(x, y):
-        # where x  and y are the batch arguments...
-        args = args[0]  # Drop the batch idx
-        return self.model(*args, **kwargs)
+        args, batch_idx = self._prepare_input(args)
+        return self.model(args, batch_idx, **kwargs)
+
+    def _prepare_input(self, args):
+        args, batch_idx = args
+        # explicit conversion to tuple as Lists are not supported in jit as they are mutable
+        # todo: we probably want to apply this to all lists in the object
+        # todo: do we need to do additional checks for dicts?
+        args = tuple(args)
+        accumulate_grad_batches = self.lightning_module.trainer.accumulate_grad_batches
+        num_repeat = self.replication_factor * self.device_iterations * accumulate_grad_batches
+        batch_idx = torch.tensor(batch_idx, dtype=torch.int).unsqueeze(0).repeat(num_repeat)
+        return args, batch_idx
 
     def validation_step(self, *args, **kwargs):
-        batch_idx = torch.tensor(args[1], dtype=torch.int)
-        args = args[0]  # Drop the batch idx
-        return self.inference_models['val'](*args, batch_idx, **kwargs)
+        args, batch_idx = self._prepare_input(args)
+        return self.inference_models['val'](args, batch_idx, **kwargs)
 
     def test_step(self, *args, **kwargs):
-        args = args[0]  # Drop the batch idx
-        return self.inference_models['test'](*args, **kwargs)
+        args, batch_idx = self._prepare_input(args)
+        return self.inference_models['test'](args, batch_idx, **kwargs)
 
     def predict_step(self, *args, **kwargs):
-        args = args[0]  # Drop the batch idx
-        return self.inference_models['predict'](*args, **kwargs)
+        args, batch_idx = self._prepare_input(args)
+        return self.inference_models['predict'](args, batch_idx, **kwargs)
diff --git a/pytorch_lightning/plugins/training_type/training_type_plugin.py b/pytorch_lightning/plugins/training_type/training_type_plugin.py
index ede5717258040..b440b5685229f 100644
--- a/pytorch_lightning/plugins/training_type/training_type_plugin.py
+++ b/pytorch_lightning/plugins/training_type/training_type_plugin.py
@@ -186,6 +186,22 @@ def process_dataloader(self, dataloader: Union[Iterable, DataLoader]) -> Union[I
         """
         return dataloader
 
+    def on_reset_train_dataloader(self, dataloader) -> Union[Iterable, DataLoader]:
+        """Called before resetting the train dataloader."""
+        return dataloader
+
+    def on_reset_val_dataloader(self, dataloader) -> Union[Iterable, DataLoader]:
+        """Called before resetting the val dataloader."""
+        return dataloader
+
+    def on_reset_test_dataloader(self, dataloader) -> Union[Iterable, DataLoader]:
+        """Called before resetting the test dataloader."""
+        return dataloader
+
+    def on_reset_predict_dataloader(self, dataloader) -> Union[Iterable, DataLoader]:
+        """Called before resetting the predict dataloader."""
+        return dataloader
+
     def init_optimizers(self, trainer: 'pl.Trainer', model: 'pl.LightningModule'):
         return trainer.init_optimizers(model)
 
diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index eb58cf061a0d9..c5f3e7d656f7d 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -79,6 +79,7 @@ def __init__(
         self,
         num_processes,
         tpu_cores,
+        ipu_cores,
         distributed_backend,
         auto_select_gpus,
         gpus,
@@ -98,6 +99,7 @@ def __init__(
 
         self.num_processes = num_processes
         self.tpu_cores = device_parser.parse_tpu_cores(tpu_cores)
+        self.ipu_cores = ipu_cores
         self.distributed_backend = distributed_backend
         self.auto_select_gpus = auto_select_gpus
         self.gpus = gpus
@@ -250,7 +252,7 @@ def on_tpu(self) -> bool:
 
     @property
     def on_ipu(self) -> bool:
-        return self._device_type == DeviceType.IPU
+        return self.ipu_cores is not None
 
     @property
     def tpu_id(self) -> Optional[int]:
@@ -314,6 +316,9 @@ def parallel_devices(self) -> List[Union[torch.device, int]]:
             # https://github.com/PyTorchLightning/pytorch-lightning/issues/3169
             if isinstance(self.tpu_cores, int):
                 devices = list(range(self.tpu_cores))
+        elif self.on_ipu:
+            if isinstance(self.ipu_cores, int):
+                devices = list(range(self.ipu_cores))
         else:
             devices = [torch.device("cpu")] * self.num_processes
         return devices
@@ -446,7 +451,7 @@ def select_training_type_plugin(self) -> TrainingTypePlugin:
         elif self.on_tpu and isinstance(self.tpu_cores, list):
             plugin = SingleTPUPlugin(self.tpu_id)
         elif self.on_ipu:
-            plugin = IPUPlugin()
+            plugin = IPUPlugin(parallel_devices=self.parallel_devices)
         else:
             single_gpu_ordinal = device_parser.determine_root_gpu_device(self.parallel_device_ids)
             plugin = SingleDevicePlugin(device=torch.device(f"cuda:{single_gpu_ordinal}" if self.on_gpu else "cpu"))
diff --git a/pytorch_lightning/trainer/data_loading.py b/pytorch_lightning/trainer/data_loading.py
index 29711b23d8546..42d7a8d4e2328 100644
--- a/pytorch_lightning/trainer/data_loading.py
+++ b/pytorch_lightning/trainer/data_loading.py
@@ -261,6 +261,9 @@ def reset_train_dataloader(self, model: LightningModule) -> None:
         # wrap the sequence of train loaders to a CombinedLoader object for computing the num_training_batches
         self.train_dataloader = CombinedLoader(self.train_dataloader, self._multiple_trainloader_mode)
 
+        # todo (sean): should be the accelerator, not the training type plugin
+        self.train_dataloader = self.accelerator.training_type_plugin.on_reset_train_dataloader(self.train_dataloader)
+
         self.num_training_batches = len(self.train_dataloader) if has_len(self.train_dataloader) else float('inf')
 
         if isinstance(self.limit_train_batches, int) or self.limit_train_batches == 0.0:
@@ -361,6 +364,10 @@ def _reset_eval_dataloader(
         # add worker_init_fn for correct seeding in worker processes
         apply_to_collection(dataloaders, dtype=DataLoader, function=self.auto_add_worker_init_fn)
 
+        hook_name = f"on_reset_{mode}_dataloader"
+        # todo (sean): should be the accelerator, not the training type plugin
+        dataloaders = getattr(self.accelerator.training_type_plugin, hook_name)(dataloaders)
+
         loader_num_batches = []
 
         # determine number of batches
diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py
index 8732d8c33dce7..9b84a761d9fa4 100644
--- a/pytorch_lightning/trainer/trainer.py
+++ b/pytorch_lightning/trainer/trainer.py
@@ -102,6 +102,7 @@ def __init__(
         gpus: Optional[Union[List[int], str, int]] = None,
         auto_select_gpus: bool = False,
         tpu_cores: Optional[Union[List[int], str, int]] = None,
+        ipu_cores: Optional[int] = None,
         log_gpu_memory: Optional[str] = None,
         progress_bar_refresh_rate: Optional[int] = None,
         overfit_batches: Union[int, float] = 0.0,
@@ -318,8 +319,8 @@ def __init__(
         self.optimizer_connector = OptimizerConnector(self)
 
         self.accelerator_connector = AcceleratorConnector(
-            num_processes, tpu_cores, distributed_backend, auto_select_gpus, gpus, num_nodes, sync_batchnorm, benchmark,
-            replace_sampler_ddp, deterministic, precision, amp_backend, amp_level, plugins
+            num_processes, tpu_cores, ipu_cores, distributed_backend, auto_select_gpus, gpus, num_nodes, sync_batchnorm,
+            benchmark, replace_sampler_ddp, deterministic, precision, amp_backend, amp_level, plugins
         )
         self.logger_connector = LoggerConnector(self, log_gpu_memory)
         self.model_connector = ModelConnector(self)

From adbdb2a023e83b861b58ddbd37ffa50dc1809b42 Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Wed, 26 May 2021 11:38:16 +0100
Subject: [PATCH 07/60] Clear up files

---
 pl_examples/ipu_examples/mnist.py              | 1 -
 pytorch_lightning/plugins/training_type/ipu.py | 1 -
 2 files changed, 2 deletions(-)

diff --git a/pl_examples/ipu_examples/mnist.py b/pl_examples/ipu_examples/mnist.py
index 8db1a6dfde949..bc535daf94542 100644
--- a/pl_examples/ipu_examples/mnist.py
+++ b/pl_examples/ipu_examples/mnist.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from argparse import ArgumentParser
 from pprint import pprint
 
 import torch
diff --git a/pytorch_lightning/plugins/training_type/ipu.py b/pytorch_lightning/plugins/training_type/ipu.py
index dbdc7fc931942..68be0ba1451bd 100644
--- a/pytorch_lightning/plugins/training_type/ipu.py
+++ b/pytorch_lightning/plugins/training_type/ipu.py
@@ -53,7 +53,6 @@ def __init__(
         self,
         half: bool = False,
         device_iterations: int = 1,
-        replication_factor: int = 1,
         autoround_num_ipus: bool = True,
         autoreport: bool = True,
         autoreport_dir: Optional[str] = None,

From 3e733af990f137273f645608a45bcb9c30282017 Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Thu, 27 May 2021 11:10:59 +0100
Subject: [PATCH 08/60] Add tests, helpers, fixes

---
 pl_examples/ipu_examples/mnist.py             |  29 +--
 .../plugins/training_type/ipu.py              |  61 +++---
 .../connectors/accelerator_connector.py       |  10 +-
 pytorch_lightning/utilities/__init__.py       |   1 +
 pytorch_lightning/utilities/imports.py        |   6 +
 tests/accelerators/test_ipu.py                | 176 ++++++++++++++++++
 tests/helpers/runif.py                        |   7 +
 7 files changed, 253 insertions(+), 37 deletions(-)
 create mode 100644 tests/accelerators/test_ipu.py

diff --git a/pl_examples/ipu_examples/mnist.py b/pl_examples/ipu_examples/mnist.py
index bc535daf94542..980dd3430446c 100644
--- a/pl_examples/ipu_examples/mnist.py
+++ b/pl_examples/ipu_examples/mnist.py
@@ -48,21 +48,28 @@ def training_step(self, batch, batch_idx):
 
     def validation_step(self, batch, batch_idx):
         x, y = batch
-        y_hat = self(x)
-        loss = F.cross_entropy(y_hat, y)
-        return loss
+        logits = self(x)
+        acc = self.accuracy(logits, y)
+        return acc
 
     def test_step(self, batch, batch_idx):
         x, y = batch
-        y_hat = self(x)
-        loss = F.cross_entropy(y_hat, y)
-        return loss
+        logits = self(x)
+        acc = self.accuracy(logits, y)
+        return acc
+
+    def accuracy(self, logits, y):
+        # todo (sean): currently IPU poptorch doesn't implicit convert bools to tensor
+        # hence we use an explicit calculation for accuracy here. Once fixed in poptorch
+        # we can use the accuracy metric.
+        acc = torch.sum(torch.eq(torch.argmax(logits, -1), y).to(torch.float32)) / len(y)
+        return acc
 
-    def on_validation_batch_end(self, outputs, batch, batch_idx: int, dataloader_idx: int) -> None:
-        self.log('val_loss', outputs.mean(), prog_bar=True)
+    def validation_epoch_end(self, outputs) -> None:
+        self.log('val_acc', torch.stack(outputs).mean(), prog_bar=True)
 
-    def on_test_batch_end(self, outputs, batch, batch_idx: int, dataloader_idx: int) -> None:
-        self.log('test_loss', outputs.mean(), prog_bar=True)
+    def test_epoch_end(self, outputs) -> None:
+        self.log('test_acc', torch.stack(outputs).mean())
 
     def configure_optimizers(self):
         return torch.optim.Adam(self.parameters(), lr=self.hparams.learning_rate)
@@ -73,7 +80,7 @@ def configure_optimizers(self):
 
     model = LitClassifier()
 
-    trainer = pl.Trainer(max_epochs=10, accelerator='ipu', ipu_cores=8)
+    trainer = pl.Trainer(max_epochs=2, ipu_cores=8)
 
     trainer.fit(model, datamodule=dm)
 
diff --git a/pytorch_lightning/plugins/training_type/ipu.py b/pytorch_lightning/plugins/training_type/ipu.py
index 68be0ba1451bd..4b729b6c8c034 100644
--- a/pytorch_lightning/plugins/training_type/ipu.py
+++ b/pytorch_lightning/plugins/training_type/ipu.py
@@ -107,25 +107,32 @@ def lightning_module(self) -> Optional[LightningModule]:
         return model.module if isinstance(model, LightningIPUModule) else model
 
     def pre_dispatch(self) -> None:
+        '''
+        The issue here is we assume we're training.
+        What if we're not training?
+        I say
+        '''
         if self.half:
-            log.info('Using 16bit precision, converting model to FP16.')
+            log.info('Using full 16bit precision, converting LightningModule weights to FP16.')
             self.model = self.model.half()
         precision = self.lightning_module.trainer.accelerator.precision_plugin.precision
         precision = 16 if self.half else precision
 
-        # Create model for training which will run training.
-        optimizer = self.lightning_module.trainer.optimizers[0]
         model = LightningIPUModule(self.lightning_module, precision)
-
-        self.model = poptorch.trainingModel(model=model, options=self._create_opts(training=True), optimizer=optimizer)
+        self.model = model
 
         # Separate models are instantiated for different stages, but they share the same weights on host.
         # When validation/test models are run, they sync weights first.
 
-        # todo: not sure this is the cleanest way to do this...
-        self.inference_models = {}
+        self.poptorch_wrapped_models = {}
+        if self.lightning_module.trainer.training:
+            # Create model for training which will run training.
+            optimizer = self.lightning_module.trainer.optimizers[0]
+            self.poptorch_wrapped_models['train'] = poptorch.trainingModel(
+                model=model, options=self._create_opts(training=True), optimizer=optimizer
+            )
         for x in ('val', 'test', 'predict'):
-            self.inference_models[x] = poptorch.inferenceModel(
+            self.poptorch_wrapped_models[x] = poptorch.inferenceModel(
                 model=model,
                 options=self._create_opts(training=False),
             )
@@ -141,6 +148,10 @@ def _create_opts(self, training):
         gradient_accumulation = self.lightning_module.trainer.accumulate_grad_batches if training else 1
         opts.Training.gradientAccumulation(gradient_accumulation)
         opts.autoRoundNumIPUs(self.autoround_num_ipus)
+
+        # todo (sean): unsure if this is necessary but to be safe.
+        if os.environ.get("PL_GLOBAL_SEED"):
+            opts.randomSeed(int(os.environ["PL_GLOBAL_SEED"]))
         return opts
 
     def on_reset_train_dataloader(self, dataloader) -> Union[Iterable, DataLoader]:
@@ -197,28 +208,32 @@ def _convert_to_poptorch_loader(self, dataloader: Union[Iterable, DataLoader],
         return dataloader
 
     def training_step(self, *args, **kwargs):
-        args, batch_idx = self._prepare_input(args)
-        return self.model(args, batch_idx, **kwargs)
+        args = self._prepare_input(args)
+        return self.poptorch_wrapped_models['train'](*args, **kwargs)
 
     def _prepare_input(self, args):
-        args, batch_idx = args
-        # explicit conversion to tuple as Lists are not supported in jit as they are mutable
-        # todo: we probably want to apply this to all lists in the object
-        # todo: do we need to do additional checks for dicts?
-        args = tuple(args)
+        # Ensure we replicate primitives values to have enough dimensions to split across devices
         accumulate_grad_batches = self.lightning_module.trainer.accumulate_grad_batches
         num_repeat = self.replication_factor * self.device_iterations * accumulate_grad_batches
-        batch_idx = torch.tensor(batch_idx, dtype=torch.int).unsqueeze(0).repeat(num_repeat)
-        return args, batch_idx
+
+        def to_tuple(x):
+            return tuple(x)
+
+        def to_tensor(x):
+            return torch.tensor(x).unsqueeze(0).repeat(num_repeat)
+
+        args = apply_to_collection(args, dtype=list, function=to_tuple)
+        args = apply_to_collection(args, dtype=(int, float), function=to_tensor)
+        return args
 
     def validation_step(self, *args, **kwargs):
-        args, batch_idx = self._prepare_input(args)
-        return self.inference_models['val'](args, batch_idx, **kwargs)
+        args = self._prepare_input(args)
+        return self.poptorch_wrapped_models['val'](*args, **kwargs)
 
     def test_step(self, *args, **kwargs):
-        args, batch_idx = self._prepare_input(args)
-        return self.inference_models['test'](args, batch_idx, **kwargs)
+        args = self._prepare_input(args)
+        return self.poptorch_wrapped_models['test'](*args, **kwargs)
 
     def predict_step(self, *args, **kwargs):
-        args, batch_idx = self._prepare_input(args)
-        return self.inference_models['predict'](args, batch_idx, **kwargs)
+        args = self._prepare_input(args)
+        return self.poptorch_wrapped_models['predict'](*args, **kwargs)
diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index c91b2b71dc693..1d50a93b0b086 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -60,6 +60,7 @@
 from pytorch_lightning.utilities import (
     _APEX_AVAILABLE,
     _HOROVOD_AVAILABLE,
+    _IPU_AVAILABLE,
     _NATIVE_AMP_AVAILABLE,
     _TPU_AVAILABLE,
     AMPType,
@@ -367,7 +368,7 @@ def select_precision_plugin(self) -> PrecisionPlugin:
         # set precision type
         self.amp_type = AMPType.from_str(self.amp_type)
 
-        if self._device_type == DeviceType.IPU:
+        if self.on_ipu:
             return IPUPrecisionPlugin(self.precision)
 
         if self._distrib_type == DistributedType.DEEPSPEED or isinstance(self._training_type_plugin, DeepSpeedPlugin):
@@ -632,8 +633,11 @@ def set_distributed_mode(self, distributed_backend: Optional[str] = None):
             )
 
         rank_zero_info(f'GPU available: {torch.cuda.is_available()}, used: {self._device_type == DeviceType.GPU}')
-        num_cores = self.tpu_cores if self.tpu_cores is not None else 0
-        rank_zero_info(f'TPU available: {_TPU_AVAILABLE}, using: {num_cores} TPU cores')
+        num_tpu_cores = self.tpu_cores if self.tpu_cores is not None else 0
+        rank_zero_info(f'TPU available: {_TPU_AVAILABLE}, using: {num_tpu_cores} TPU cores')
+
+        num_ipu_cores = self.ipu_cores if self.ipu_cores is not None else 0
+        rank_zero_info(f'IPU available: {_IPU_AVAILABLE}, using: {num_ipu_cores} IPU cores')
 
         if torch.cuda.is_available() and self._device_type != DeviceType.GPU:
             rank_zero_warn(
diff --git a/pytorch_lightning/utilities/__init__.py b/pytorch_lightning/utilities/__init__.py
index 9920c9e41cb8f..613a5013d5198 100644
--- a/pytorch_lightning/utilities/__init__.py
+++ b/pytorch_lightning/utilities/__init__.py
@@ -43,6 +43,7 @@
     _HOROVOD_AVAILABLE,
     _HYDRA_AVAILABLE,
     _HYDRA_EXPERIMENTAL_AVAILABLE,
+    _IPU_AVAILABLE,
     _IS_INTERACTIVE,
     _module_available,
     _NATIVE_AMP_AVAILABLE,
diff --git a/pytorch_lightning/utilities/imports.py b/pytorch_lightning/utilities/imports.py
index a6f2b192a97f7..2a51b01404821 100644
--- a/pytorch_lightning/utilities/imports.py
+++ b/pytorch_lightning/utilities/imports.py
@@ -97,3 +97,9 @@ def _compare_version(package: str, op, version) -> bool:
 from pytorch_lightning.utilities.xla_device import XLADeviceUtils  # noqa: E402
 
 _TPU_AVAILABLE = XLADeviceUtils.tpu_device_exists()
+
+if _POPTORCH_AVAILABLE:
+    import poptorch
+    _IPU_AVAILABLE = poptorch.ipuHardwareIsAvailable()
+else:
+    _IPU_AVAILABLE = False
diff --git a/tests/accelerators/test_ipu.py b/tests/accelerators/test_ipu.py
new file mode 100644
index 0000000000000..6a585934e13fe
--- /dev/null
+++ b/tests/accelerators/test_ipu.py
@@ -0,0 +1,176 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+
+import pytest
+import torch
+import torch.nn.functional as F
+from torch.utils.data import DataLoader
+
+from pytorch_lightning import seed_everything, Trainer
+from tests.helpers.boring_model import BoringModel
+from tests.helpers.datamodules import ClassifDataModule
+from tests.helpers.datasets import SklearnDataset
+from tests.helpers.runif import RunIf
+from tests.helpers.simple_models import ClassificationModel
+
+
+class IPUModel(BoringModel):
+
+    def training_step(self, batch, batch_idx):
+        output = self(batch)
+        loss = self.loss(batch, output)
+        return loss
+
+    def validation_step(self, batch, batch_idx):
+        output = self(batch)
+        loss = self.loss(batch, output)
+        return loss
+
+    def test_step(self, batch, batch_idx):
+        output = self(batch)
+        loss = self.loss(batch, output)
+        return loss
+
+    def training_epoch_end(self, outputs) -> None:
+        pass
+
+    def validation_epoch_end(self, outputs) -> None:
+        pass
+
+    def test_epoch_end(self, outputs) -> None:
+        pass
+
+
+class IPUClassificationModel(ClassificationModel):
+
+    def training_step(self, batch, batch_idx):
+        x, y = batch
+        logits = self(x)
+        loss = F.cross_entropy(logits, y)
+        return loss
+
+    def validation_step(self, batch, batch_idx):
+        x, y = batch
+        logits = self(x)
+        acc = self.accuracy(logits, y)
+        return acc
+
+    def test_step(self, batch, batch_idx):
+        x, y = batch
+        logits = self(x)
+        acc = self.accuracy(logits, y)
+        return acc
+
+    def accuracy(self, logits, y):
+        # todo (sean): currently IPU poptorch doesn't implicit convert bools to tensor
+        # hence we use an explicit calculation for accuracy here. Once fixed in poptorch
+        # we can use the accuracy metric.
+        acc = torch.sum(torch.eq(torch.argmax(logits, -1), y).to(torch.float32)) / len(y)
+        return acc
+
+    def validation_epoch_end(self, outputs) -> None:
+        self.log('val_acc', torch.stack(outputs).mean())
+
+    def test_epoch_end(self, outputs) -> None:
+        self.log('test_acc', torch.stack(outputs).mean())
+
+
+@RunIf(ipu=True)
+@pytest.mark.parametrize('ipu_cores', [1, 4])
+def test_all_stages(tmpdir, ipu_cores):
+    model = IPUModel()
+    trainer = Trainer(fast_dev_run=True, accelerator='ipu', ipu_cores=ipu_cores)
+    trainer.fit(model)
+    trainer.validate(model)
+    trainer.test(model)
+    trainer.predict(model, model.val_dataloader())
+
+
+@RunIf(ipu=True)
+@pytest.mark.parametrize('ipu_cores', [1, 4])
+def test_inference_only(tmpdir, ipu_cores):
+    model = IPUModel()
+
+    trainer = Trainer(fast_dev_run=True, accelerator='ipu', ipu_cores=ipu_cores)
+    trainer.validate(model)
+    trainer.test(model)
+    trainer.predict(model, model.val_dataloader())
+
+
+def test_optimization(tmpdir):
+    seed_everything(42)
+
+    # Override to drop last uneven batch, as IPU poptorch does not support uneven inputs.
+    class DataModule(ClassifDataModule):
+
+        def train_dataloader(self):
+            return DataLoader(
+                SklearnDataset(self.x_train, self.y_train, self._x_type, self._y_type),
+                batch_size=self.batch_size,
+                drop_last=True
+            )
+
+        def val_dataloader(self):
+            return DataLoader(
+                SklearnDataset(self.x_valid, self.y_valid, self._x_type, self._y_type),
+                batch_size=self.batch_size,
+                drop_last=True
+            )
+
+        def test_dataloader(self):
+            return DataLoader(
+                SklearnDataset(self.x_test, self.y_test, self._x_type, self._y_type),
+                batch_size=self.batch_size,
+                drop_last=True
+            )
+
+    dm = DataModule(length=1024)
+    model = IPUClassificationModel()
+
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        max_epochs=1,
+        weights_summary=None,
+        deterministic=True,
+        ipu_cores=2,
+    )
+
+    # fit model
+    trainer.fit(model, dm)
+    assert trainer.state.finished, f"Training failed with {trainer.state}"
+    assert dm.trainer is not None
+
+    # validate
+    result = trainer.validate(datamodule=dm)
+    assert dm.trainer is not None
+    assert result[0]['val_acc'] > 0.7
+
+    # test
+    result = trainer.test(datamodule=dm)
+    assert dm.trainer is not None
+    test_result = result[0]['test_acc']
+    assert test_result > 0.6
+
+    # test saved model
+    model_path = os.path.join(tmpdir, 'model.pt')
+    trainer.save_checkpoint(model_path)
+
+    model = IPUClassificationModel.load_from_checkpoint(model_path)
+
+    trainer = Trainer(default_root_dir=tmpdir, deterministic=True)
+
+    result = trainer.test(model, dm.test_dataloader())
+    saved_result = result[0]['test_acc']
+    assert saved_result > 0.6 and (saved_result == test_result)
diff --git a/tests/helpers/runif.py b/tests/helpers/runif.py
index 630a341ec2d30..2e528cbd6430d 100644
--- a/tests/helpers/runif.py
+++ b/tests/helpers/runif.py
@@ -28,6 +28,7 @@
     _FAIRSCALE_PIPE_AVAILABLE,
     _HOROVOD_AVAILABLE,
     _NATIVE_AMP_AVAILABLE,
+    _POPTORCH_AVAILABLE,
     _RPC_AVAILABLE,
     _TORCH_QUANTIZE_AVAILABLE,
     _TPU_AVAILABLE,
@@ -63,6 +64,7 @@ def __new__(
         amp_apex: bool = False,
         amp_native: bool = False,
         tpu: bool = False,
+        ipu: bool = False,
         horovod: bool = False,
         horovod_nccl: bool = False,
         skip_windows: bool = False,
@@ -85,6 +87,7 @@ def __new__(
             amp_apex: NVIDIA Apex is installed
             amp_native: if native PyTorch native AMP is supported
             tpu: if TPU is available
+            ipu: if IPU is available
             horovod: if Horovod is installed
             horovod_nccl: if Horovod is installed with NCCL support
             skip_windows: skip test for Windows platform (typically fo some limited torch functionality)
@@ -139,6 +142,10 @@ def __new__(
             conditions.append(not _TPU_AVAILABLE)
             reasons.append("TPU")
 
+        if ipu:
+            conditions.append(not _POPTORCH_AVAILABLE)
+            reasons.append("IPU")
+
         if horovod:
             conditions.append(not _HOROVOD_AVAILABLE)
             reasons.append("Horovod")

From a51f23ee5a0c0103c31f9462a082b76cea4d5d05 Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Thu, 27 May 2021 12:07:22 +0100
Subject: [PATCH 09/60] Small cleanups

---
 .../plugins/training_type/ipu.py              | 87 +++++++++----------
 tests/helpers/runif.py                        |  4 +-
 2 files changed, 44 insertions(+), 47 deletions(-)

diff --git a/pytorch_lightning/plugins/training_type/ipu.py b/pytorch_lightning/plugins/training_type/ipu.py
index 4b729b6c8c034..a3e0ef35141c9 100644
--- a/pytorch_lightning/plugins/training_type/ipu.py
+++ b/pytorch_lightning/plugins/training_type/ipu.py
@@ -4,6 +4,7 @@
 from typing import Any, Iterable, List, Optional, Union
 
 import torch
+from torch.nn import Module
 from torch.utils.data import DataLoader
 
 from pytorch_lightning import _logger as log
@@ -19,11 +20,8 @@
 if _POPTORCH_AVAILABLE:
     import poptorch
 
-    if not poptorch.ipuHardwareIsAvailable():
-        raise MisconfigurationException("IPU Accelerator requires IPUs to run.")
-
-# todo: No idea what's happening with grad accumulation, need to check since IPUs handle grad accum.
-# todo: or even lr scheduling...
+# todo: Check gradient accumulation to ensure this works, similar to DeepSpeed IPUs manage this.
+# todo: Check lr scheduling to ensure that when the LR is changed, we update the optimizer state.
 
 
 class LightningIPUModule(_LightningModuleWrapperBase):
@@ -65,6 +63,7 @@ def __init__(
         self.autoround_num_ipus = autoround_num_ipus
         self.autoreport = autoreport
         self.autoreport_dir = autoreport_dir
+        self.poptorch_models = {}
 
         if self.autoreport:
             options = {"autoReport.all": self.autoreport}
@@ -74,44 +73,16 @@ def __init__(
                 options["autoReport.directory"] = self.autoreport_dir
             os.environ["POPLAR_ENGINE_OPTIONS"] = json.dumps(options)
 
-    @property
-    def on_gpu(self) -> bool:
-        return False
-
-    @property
-    def root_device(self) -> torch.device:
-        pass
-
-    def model_to_device(self) -> None:
-        pass
-
-    @property
-    def is_global_zero(self) -> bool:
-        return True
-
-    def reduce(self, tensor: Union[torch.Tensor, Any], *args: Any, **kwargs: Any) -> Union[torch.Tensor, Any]:
-        return tensor
-
-    def barrier(self, name: Optional[str] = None) -> None:
-        pass
-
-    def all_gather(self, tensor: torch.Tensor, group: Optional[Any] = None, sync_grads: bool = False) -> torch.Tensor:
-        return tensor
-
-    def broadcast(self, obj: object, src: int = 0) -> object:
-        return obj
+    def setup(self, model: Module) -> None:
+        super().setup(model)
+        if not poptorch.ipuHardwareIsAvailable():
+            raise MisconfigurationException("IPU Accelerator requires IPUs to run.")
 
     @property
     def lightning_module(self) -> Optional[LightningModule]:
-        model = self.model.module if isinstance(self.model, poptorch.PoplarExecutor) else self.model
-        return model.module if isinstance(model, LightningIPUModule) else model
+        return self.model.module if isinstance(self.model, LightningIPUModule) else self.model
 
     def pre_dispatch(self) -> None:
-        '''
-        The issue here is we assume we're training.
-        What if we're not training?
-        I say
-        '''
         if self.half:
             log.info('Using full 16bit precision, converting LightningModule weights to FP16.')
             self.model = self.model.half()
@@ -124,15 +95,14 @@ def pre_dispatch(self) -> None:
         # Separate models are instantiated for different stages, but they share the same weights on host.
         # When validation/test models are run, they sync weights first.
 
-        self.poptorch_wrapped_models = {}
         if self.lightning_module.trainer.training:
             # Create model for training which will run training.
             optimizer = self.lightning_module.trainer.optimizers[0]
-            self.poptorch_wrapped_models['train'] = poptorch.trainingModel(
+            self.poptorch_models['train'] = poptorch.trainingModel(
                 model=model, options=self._create_opts(training=True), optimizer=optimizer
             )
         for x in ('val', 'test', 'predict'):
-            self.poptorch_wrapped_models[x] = poptorch.inferenceModel(
+            self.poptorch_models[x] = poptorch.inferenceModel(
                 model=model,
                 options=self._create_opts(training=False),
             )
@@ -209,7 +179,7 @@ def _convert_to_poptorch_loader(self, dataloader: Union[Iterable, DataLoader],
 
     def training_step(self, *args, **kwargs):
         args = self._prepare_input(args)
-        return self.poptorch_wrapped_models['train'](*args, **kwargs)
+        return self.poptorch_models['train'](*args, **kwargs)
 
     def _prepare_input(self, args):
         # Ensure we replicate primitives values to have enough dimensions to split across devices
@@ -228,12 +198,39 @@ def to_tensor(x):
 
     def validation_step(self, *args, **kwargs):
         args = self._prepare_input(args)
-        return self.poptorch_wrapped_models['val'](*args, **kwargs)
+        return self.poptorch_models['val'](*args, **kwargs)
 
     def test_step(self, *args, **kwargs):
         args = self._prepare_input(args)
-        return self.poptorch_wrapped_models['test'](*args, **kwargs)
+        return self.poptorch_models['test'](*args, **kwargs)
 
     def predict_step(self, *args, **kwargs):
         args = self._prepare_input(args)
-        return self.poptorch_wrapped_models['predict'](*args, **kwargs)
+        return self.poptorch_models['predict'](*args, **kwargs)
+
+    @property
+    def on_gpu(self) -> bool:
+        return False
+
+    @property
+    def root_device(self) -> torch.device:
+        pass
+
+    def model_to_device(self) -> None:
+        pass
+
+    @property
+    def is_global_zero(self) -> bool:
+        return True
+
+    def reduce(self, tensor: Union[torch.Tensor, Any], *args: Any, **kwargs: Any) -> Union[torch.Tensor, Any]:
+        return tensor
+
+    def barrier(self, name: Optional[str] = None) -> None:
+        pass
+
+    def all_gather(self, tensor: torch.Tensor, group: Optional[Any] = None, sync_grads: bool = False) -> torch.Tensor:
+        return tensor
+
+    def broadcast(self, obj: object, src: int = 0) -> object:
+        return obj
diff --git a/tests/helpers/runif.py b/tests/helpers/runif.py
index 2e528cbd6430d..737ddd68dff17 100644
--- a/tests/helpers/runif.py
+++ b/tests/helpers/runif.py
@@ -27,8 +27,8 @@
     _FAIRSCALE_FULLY_SHARDED_AVAILABLE,
     _FAIRSCALE_PIPE_AVAILABLE,
     _HOROVOD_AVAILABLE,
+    _IPU_AVAILABLE,
     _NATIVE_AMP_AVAILABLE,
-    _POPTORCH_AVAILABLE,
     _RPC_AVAILABLE,
     _TORCH_QUANTIZE_AVAILABLE,
     _TPU_AVAILABLE,
@@ -143,7 +143,7 @@ def __new__(
             reasons.append("TPU")
 
         if ipu:
-            conditions.append(not _POPTORCH_AVAILABLE)
+            conditions.append(not _IPU_AVAILABLE)
             reasons.append("IPU")
 
         if horovod:

From be7de87f4ec2024cdf3641f19661409e57d5d0ea Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Tue, 1 Jun 2021 12:17:24 +0100
Subject: [PATCH 10/60] Refactors based on review

---
 .../plugins/training_type/ipu.py              | 81 +++++++++++++------
 tests/accelerators/test_ipu.py                |  3 +
 2 files changed, 61 insertions(+), 23 deletions(-)

diff --git a/pytorch_lightning/plugins/training_type/ipu.py b/pytorch_lightning/plugins/training_type/ipu.py
index a3e0ef35141c9..fd2a701464918 100644
--- a/pytorch_lightning/plugins/training_type/ipu.py
+++ b/pytorch_lightning/plugins/training_type/ipu.py
@@ -1,6 +1,7 @@
 import inspect
 import json
 import os
+from enum import Enum
 from typing import Any, Iterable, List, Optional, Union
 
 import torch
@@ -20,9 +21,22 @@
 if _POPTORCH_AVAILABLE:
     import poptorch
 
-# todo: Check gradient accumulation to ensure this works, similar to DeepSpeed IPUs manage this.
+# todo: Check gradient accumulation to ensure this works, similar to DeepSpeed, IPUs manage this.
 # todo: Check lr scheduling to ensure that when the LR is changed, we update the optimizer state.
 
+# todo: does creating an inference model and a training model allocate double the IPU cores?
+# todo: can we have one inference model for test/val/predict which takes a bool to choose a path?
+
+
+class IPUStage(Enum):
+    training = torch.tensor([0])
+    validation = torch.tensor([1])
+    testing = torch.tensor([2])
+    predicting = torch.tensor([3])
+
+    def __eq__(self, other):
+        return torch.equal(self.value, other)
+
 
 class LightningIPUModule(_LightningModuleWrapperBase):
 
@@ -30,11 +44,23 @@ def __init__(self, pl_module: LightningModule, precision: int):
         super().__init__(pl_module)
         self.precision = precision
 
-    def forward(self, *inputs, **kwargs):
+    def forward(self, stage, *inputs, **kwargs):
         if self.precision == 16:
             inputs = self._move_float_tensors_to_half(inputs)
 
-        return super().forward(*inputs, **kwargs)
+        trainer = self.module.trainer
+        if trainer and IPUStage.training == stage:
+            output = self.module.training_step(*inputs, **kwargs)
+        elif trainer and IPUStage.testing == stage:
+            output = self.module.test_step(*inputs, **kwargs)
+        elif trainer and IPUStage.validation == stage:
+            output = self.module.validation_step(*inputs, **kwargs)
+        elif trainer and IPUStage.predicting == stage:
+            output = self.module.predict_step(*inputs, **kwargs)
+        else:
+            output = self.module(*inputs, **kwargs)
+
+        return output
 
     @staticmethod
     def batch_to(data):
@@ -63,7 +89,8 @@ def __init__(
         self.autoround_num_ipus = autoround_num_ipus
         self.autoreport = autoreport
         self.autoreport_dir = autoreport_dir
-        self.poptorch_models = {}
+        self.train_model = None
+        self.inference_model = None
 
         if self.autoreport:
             options = {"autoReport.all": self.autoreport}
@@ -89,23 +116,22 @@ def pre_dispatch(self) -> None:
         precision = self.lightning_module.trainer.accelerator.precision_plugin.precision
         precision = 16 if self.half else precision
 
-        model = LightningIPUModule(self.lightning_module, precision)
-        self.model = model
-
         # Separate models are instantiated for different stages, but they share the same weights on host.
         # When validation/test models are run, they sync weights first.
 
+        model = LightningIPUModule(self.lightning_module, precision)
+        self.model = model
         if self.lightning_module.trainer.training:
             # Create model for training which will run training.
             optimizer = self.lightning_module.trainer.optimizers[0]
-            self.poptorch_models['train'] = poptorch.trainingModel(
+            self.train_model = poptorch.trainingModel(
                 model=model, options=self._create_opts(training=True), optimizer=optimizer
             )
-        for x in ('val', 'test', 'predict'):
-            self.poptorch_models[x] = poptorch.inferenceModel(
-                model=model,
-                options=self._create_opts(training=False),
-            )
+
+        self.inference_model = poptorch.inferenceModel(
+            model=model,
+            options=self._create_opts(training=False),
+        )
 
     @property
     def replication_factor(self):
@@ -177,36 +203,45 @@ def _convert_to_poptorch_loader(self, dataloader: Union[Iterable, DataLoader],
         dataloader.multiprocessing_context = multiprocessing_context
         return dataloader
 
-    def training_step(self, *args, **kwargs):
-        args = self._prepare_input(args)
-        return self.poptorch_models['train'](*args, **kwargs)
+    @property
+    def _n_replicate(self):
+        accumulate_grad_batches = self.lightning_module.trainer.accumulate_grad_batches
+        return self.replication_factor * self.device_iterations * accumulate_grad_batches
 
     def _prepare_input(self, args):
-        # Ensure we replicate primitives values to have enough dimensions to split across devices
-        accumulate_grad_batches = self.lightning_module.trainer.accumulate_grad_batches
-        num_repeat = self.replication_factor * self.device_iterations * accumulate_grad_batches
 
         def to_tuple(x):
             return tuple(x)
 
         def to_tensor(x):
-            return torch.tensor(x).unsqueeze(0).repeat(num_repeat)
+            return torch.tensor(x).unsqueeze(0).repeat(self._n_replicate)
 
         args = apply_to_collection(args, dtype=list, function=to_tuple)
         args = apply_to_collection(args, dtype=(int, float), function=to_tensor)
         return args
 
+    def _prepare_stage(self, stage: IPUStage):
+        return stage.value.repeat(self._n_replicate)
+
+    def training_step(self, *args, **kwargs):
+        args = self._prepare_input(args)
+        stage = self._prepare_stage(IPUStage.training)
+        return self.train_model(stage, *args, **kwargs)
+
     def validation_step(self, *args, **kwargs):
         args = self._prepare_input(args)
-        return self.poptorch_models['val'](*args, **kwargs)
+        stage = self._prepare_stage(IPUStage.validation)
+        return self.inference_model(stage, *args, **kwargs)
 
     def test_step(self, *args, **kwargs):
         args = self._prepare_input(args)
-        return self.poptorch_models['test'](*args, **kwargs)
+        stage = self._prepare_stage(IPUStage.testing)
+        return self.inference_model(stage, *args, **kwargs)
 
     def predict_step(self, *args, **kwargs):
         args = self._prepare_input(args)
-        return self.poptorch_models['predict'](*args, **kwargs)
+        stage = self._prepare_stage(IPUStage.predicting)
+        return self.inference_model(stage, *args, **kwargs)
 
     @property
     def on_gpu(self) -> bool:
diff --git a/tests/accelerators/test_ipu.py b/tests/accelerators/test_ipu.py
index 6a585934e13fe..e8814435c0148 100644
--- a/tests/accelerators/test_ipu.py
+++ b/tests/accelerators/test_ipu.py
@@ -174,3 +174,6 @@ def test_dataloader(self):
     result = trainer.test(model, dm.test_dataloader())
     saved_result = result[0]['test_acc']
     assert saved_result > 0.6 and (saved_result == test_result)
+
+
+# todo add test for precision 16 and fully half precision + device iterations

From 83c8a79f63538b3c8e252f7625b5f4af63602b38 Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Tue, 1 Jun 2021 15:30:30 +0100
Subject: [PATCH 11/60] Swap to special tests

---
 tests/accelerators/test_ipu.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/accelerators/test_ipu.py b/tests/accelerators/test_ipu.py
index e8814435c0148..c14e47e2cd7c6 100644
--- a/tests/accelerators/test_ipu.py
+++ b/tests/accelerators/test_ipu.py
@@ -87,7 +87,7 @@ def test_epoch_end(self, outputs) -> None:
         self.log('test_acc', torch.stack(outputs).mean())
 
 
-@RunIf(ipu=True)
+@RunIf(ipu=True, special=True)
 @pytest.mark.parametrize('ipu_cores', [1, 4])
 def test_all_stages(tmpdir, ipu_cores):
     model = IPUModel()
@@ -98,7 +98,7 @@ def test_all_stages(tmpdir, ipu_cores):
     trainer.predict(model, model.val_dataloader())
 
 
-@RunIf(ipu=True)
+@RunIf(ipu=True, special=True)
 @pytest.mark.parametrize('ipu_cores', [1, 4])
 def test_inference_only(tmpdir, ipu_cores):
     model = IPUModel()

From a6018e549d4d723adb01deaf6d58f1f34b6c5124 Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Tue, 1 Jun 2021 15:56:48 +0100
Subject: [PATCH 12/60] Add special tests

---
 .azure-pipelines/ipu-tests.yml | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/.azure-pipelines/ipu-tests.yml b/.azure-pipelines/ipu-tests.yml
index 763549e88200b..ffaf13dca9bd0 100644
--- a/.azure-pipelines/ipu-tests.yml
+++ b/.azure-pipelines/ipu-tests.yml
@@ -89,3 +89,9 @@ jobs:
       env:
         MKL_THREADING_LAYER: "GNU"
       displayName: 'Testing: standard'
+
+    - bash: |
+        bash tests/special_tests.sh
+      env:
+        MKL_THREADING_LAYER: "GNU"
+      displayName: 'Testing: special'

From 0e71bbef307f5b473454714f8e9d85c047bfb395 Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Tue, 1 Jun 2021 16:32:52 +0100
Subject: [PATCH 13/60] Add source

---
 .azure-pipelines/ipu-tests.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.azure-pipelines/ipu-tests.yml b/.azure-pipelines/ipu-tests.yml
index ffaf13dca9bd0..5f45f96fb75d7 100644
--- a/.azure-pipelines/ipu-tests.yml
+++ b/.azure-pipelines/ipu-tests.yml
@@ -91,6 +91,9 @@ jobs:
       displayName: 'Testing: standard'
 
     - bash: |
+        source ${{ variables.poplar_sdk }}/poplar-ubuntu*/enable.sh
+        source ${{ variables.poplar_sdk }}/popart-ubuntu*/enable.sh
+
         bash tests/special_tests.sh
       env:
         MKL_THREADING_LAYER: "GNU"

From 6e38bd178b75867df0767d631a9e8d84fcdd53c3 Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Tue, 1 Jun 2021 21:54:37 +0100
Subject: [PATCH 14/60] Cleanups

---
 .../plugins/training_type/ipu.py              | 75 ++++++-------------
 1 file changed, 22 insertions(+), 53 deletions(-)

diff --git a/pytorch_lightning/plugins/training_type/ipu.py b/pytorch_lightning/plugins/training_type/ipu.py
index fd2a701464918..018e0a2311350 100644
--- a/pytorch_lightning/plugins/training_type/ipu.py
+++ b/pytorch_lightning/plugins/training_type/ipu.py
@@ -1,7 +1,6 @@
 import inspect
 import json
 import os
-from enum import Enum
 from typing import Any, Iterable, List, Optional, Union
 
 import torch
@@ -21,22 +20,6 @@
 if _POPTORCH_AVAILABLE:
     import poptorch
 
-# todo: Check gradient accumulation to ensure this works, similar to DeepSpeed, IPUs manage this.
-# todo: Check lr scheduling to ensure that when the LR is changed, we update the optimizer state.
-
-# todo: does creating an inference model and a training model allocate double the IPU cores?
-# todo: can we have one inference model for test/val/predict which takes a bool to choose a path?
-
-
-class IPUStage(Enum):
-    training = torch.tensor([0])
-    validation = torch.tensor([1])
-    testing = torch.tensor([2])
-    predicting = torch.tensor([3])
-
-    def __eq__(self, other):
-        return torch.equal(self.value, other)
-
 
 class LightningIPUModule(_LightningModuleWrapperBase):
 
@@ -44,23 +27,11 @@ def __init__(self, pl_module: LightningModule, precision: int):
         super().__init__(pl_module)
         self.precision = precision
 
-    def forward(self, stage, *inputs, **kwargs):
+    def forward(self, *inputs, **kwargs):
         if self.precision == 16:
             inputs = self._move_float_tensors_to_half(inputs)
 
-        trainer = self.module.trainer
-        if trainer and IPUStage.training == stage:
-            output = self.module.training_step(*inputs, **kwargs)
-        elif trainer and IPUStage.testing == stage:
-            output = self.module.test_step(*inputs, **kwargs)
-        elif trainer and IPUStage.validation == stage:
-            output = self.module.validation_step(*inputs, **kwargs)
-        elif trainer and IPUStage.predicting == stage:
-            output = self.module.predict_step(*inputs, **kwargs)
-        else:
-            output = self.module(*inputs, **kwargs)
-
-        return output
+        return super().forward(*inputs, **kwargs)
 
     @staticmethod
     def batch_to(data):
@@ -89,8 +60,7 @@ def __init__(
         self.autoround_num_ipus = autoround_num_ipus
         self.autoreport = autoreport
         self.autoreport_dir = autoreport_dir
-        self.train_model = None
-        self.inference_model = None
+        self.poptorch_models = {}
 
         if self.autoreport:
             options = {"autoReport.all": self.autoreport}
@@ -116,22 +86,23 @@ def pre_dispatch(self) -> None:
         precision = self.lightning_module.trainer.accelerator.precision_plugin.precision
         precision = 16 if self.half else precision
 
+        model = LightningIPUModule(self.lightning_module, precision)
+        self.model = model
+
         # Separate models are instantiated for different stages, but they share the same weights on host.
         # When validation/test models are run, they sync weights first.
 
-        model = LightningIPUModule(self.lightning_module, precision)
-        self.model = model
         if self.lightning_module.trainer.training:
             # Create model for training which will run training.
             optimizer = self.lightning_module.trainer.optimizers[0]
-            self.train_model = poptorch.trainingModel(
-                model=model, options=self._create_opts(training=True), optimizer=optimizer
+            model = poptorch.trainingModel(model=model, options=self._create_opts(training=True), optimizer=optimizer)
+            self.poptorch_models['train'] = model
+        for x in ('val', 'test', 'predict'):
+            model = poptorch.inferenceModel(
+                model=model,
+                options=self._create_opts(training=False),
             )
-
-        self.inference_model = poptorch.inferenceModel(
-            model=model,
-            options=self._create_opts(training=False),
-        )
+            self.poptorch_models[x] = model
 
     @property
     def replication_factor(self):
@@ -205,6 +176,7 @@ def _convert_to_poptorch_loader(self, dataloader: Union[Iterable, DataLoader],
 
     @property
     def _n_replicate(self):
+        # Ensure we replicate primitives values to have enough dimensions to split across devices
         accumulate_grad_batches = self.lightning_module.trainer.accumulate_grad_batches
         return self.replication_factor * self.device_iterations * accumulate_grad_batches
 
@@ -220,28 +192,21 @@ def to_tensor(x):
         args = apply_to_collection(args, dtype=(int, float), function=to_tensor)
         return args
 
-    def _prepare_stage(self, stage: IPUStage):
-        return stage.value.repeat(self._n_replicate)
-
     def training_step(self, *args, **kwargs):
         args = self._prepare_input(args)
-        stage = self._prepare_stage(IPUStage.training)
-        return self.train_model(stage, *args, **kwargs)
+        return self.poptorch_models['train'](*args, **kwargs)
 
     def validation_step(self, *args, **kwargs):
         args = self._prepare_input(args)
-        stage = self._prepare_stage(IPUStage.validation)
-        return self.inference_model(stage, *args, **kwargs)
+        return self.poptorch_models['val'](*args, **kwargs)
 
     def test_step(self, *args, **kwargs):
         args = self._prepare_input(args)
-        stage = self._prepare_stage(IPUStage.testing)
-        return self.inference_model(stage, *args, **kwargs)
+        return self.poptorch_models['test'](*args, **kwargs)
 
     def predict_step(self, *args, **kwargs):
         args = self._prepare_input(args)
-        stage = self._prepare_stage(IPUStage.predicting)
-        return self.inference_model(stage, *args, **kwargs)
+        return self.poptorch_models['predict'](*args, **kwargs)
 
     @property
     def on_gpu(self) -> bool:
@@ -269,3 +234,7 @@ def all_gather(self, tensor: torch.Tensor, group: Optional[Any] = None, sync_gra
 
     def broadcast(self, obj: object, src: int = 0) -> object:
         return obj
+
+    def teardown(self) -> None:
+        for k, model in self.poptorch_models.items():
+            model.destroy()

From 526383fd8c7b0dd968839e2341dad96c1bd7b2d2 Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Wed, 2 Jun 2021 12:08:06 +0100
Subject: [PATCH 15/60] Add logic to attach/detach model from devices

---
 pytorch_lightning/accelerators/accelerator.py | 48 ++++++++++++++-----
 .../plugins/training_type/ipu.py              | 38 +++++++++++++++
 .../training_type/training_type_plugin.py     | 32 +++++++++++++
 pytorch_lightning/trainer/evaluation_loop.py  |  4 ++
 pytorch_lightning/trainer/predict_loop.py     |  2 +
 pytorch_lightning/trainer/training_loop.py    |  1 +
 6 files changed, 113 insertions(+), 12 deletions(-)

diff --git a/pytorch_lightning/accelerators/accelerator.py b/pytorch_lightning/accelerators/accelerator.py
index 4ea017ae0c208..9c2b3dadf0961 100644
--- a/pytorch_lightning/accelerators/accelerator.py
+++ b/pytorch_lightning/accelerators/accelerator.py
@@ -179,10 +179,6 @@ def batch_to_device(
 
         return move_data_to_device(batch, device)
 
-    def on_train_start(self) -> None:
-        """Hook to do something upon the training start"""
-        pass
-
     def training_step(
         self,
         step_kwargs: Dict[str, Union[Any, int]],
@@ -348,14 +344,6 @@ def clip_gradients(
             model=self.model,
         )
 
-    def on_train_epoch_end(self) -> None:
-        """Hook to do something on the end of an training epoch."""
-        pass
-
-    def on_train_end(self) -> None:
-        """Hook to do something at the end of the training"""
-        pass
-
     def setup_optimizers(self, trainer: 'pl.Trainer') -> None:
         """
         Creates optimizers and schedulers
@@ -547,3 +535,39 @@ def setup_optimizers_in_pre_dispatch(self) -> bool:
 
     def update_global_step(self, total_batch_idx: int, current_global_step: int) -> int:
         return self.training_type_plugin.update_global_step(total_batch_idx, current_global_step)
+
+    def on_train_epoch_end(self) -> None:
+        """Hook to do something on the end of an training epoch."""
+        pass
+
+    def on_train_start(self) -> None:
+        """Called when train begins."""
+        return self.training_type_plugin.on_train_start()
+
+    def on_validation_start(self) -> None:
+        """Called when validation begins."""
+        return self.training_type_plugin.on_validation_start()
+
+    def on_test_start(self) -> None:
+        """Called when test begins."""
+        return self.training_type_plugin.on_test_start()
+
+    def on_predict_start(self) -> None:
+        """Called when predict begins."""
+        return self.training_type_plugin.on_predict_start()
+
+    def on_validation_end(self) -> None:
+        """Called when validation ends."""
+        return self.training_type_plugin.on_validation_end()
+
+    def on_test_end(self) -> None:
+        """Called when test end."""
+        return self.training_type_plugin.on_test_end()
+
+    def on_predict_end(self) -> None:
+        """Called when predict ends."""
+        return self.training_type_plugin.on_predict_end()
+
+    def on_train_end(self) -> None:
+        """Called when train ends."""
+        return self.training_type_plugin.on_train_end()
diff --git a/pytorch_lightning/plugins/training_type/ipu.py b/pytorch_lightning/plugins/training_type/ipu.py
index 018e0a2311350..572dfdae0e492 100644
--- a/pytorch_lightning/plugins/training_type/ipu.py
+++ b/pytorch_lightning/plugins/training_type/ipu.py
@@ -238,3 +238,41 @@ def broadcast(self, obj: object, src: int = 0) -> object:
     def teardown(self) -> None:
         for k, model in self.poptorch_models.items():
             model.destroy()
+
+    def _compiled(self, model):
+        return model._executable is not None
+
+    def detach_models(self):
+        for k, model in self.poptorch_models.items():
+            if self._compiled(model) and model.isAttachedToDevice():
+                model.detachFromDevice()
+
+    def load_model(self, stage):
+        self.detach_models()
+        model = self.poptorch_models[stage]
+        if self._compiled(model):
+            model.attachToDevice()
+
+    def on_train_start(self):
+        self.load_model('train')
+
+    def on_validation_start(self):
+        self.load_model('val')
+
+    def on_test_start(self):
+        self.load_model('test')
+
+    def on_predict_start(self):
+        self.load_model('predict')
+
+    def on_train_end(self):
+        self.detach_models()
+
+    def on_validation_end(self):
+        self.detach_models()
+
+    def on_test_end(self):
+        self.detach_models()
+
+    def on_predict_end(self):
+        self.detach_models()
diff --git a/pytorch_lightning/plugins/training_type/training_type_plugin.py b/pytorch_lightning/plugins/training_type/training_type_plugin.py
index c80f00d345e8a..d35e02968e753 100644
--- a/pytorch_lightning/plugins/training_type/training_type_plugin.py
+++ b/pytorch_lightning/plugins/training_type/training_type_plugin.py
@@ -330,3 +330,35 @@ def register_plugins(cls, plugin_registry):
     def should_rank_save_checkpoint(self) -> bool:
         """Returns whether the checkpoint should be saved (rank based)"""
         return self.is_global_zero
+
+    def on_train_start(self) -> None:
+        """Called when train begins."""
+        pass
+
+    def on_validation_start(self) -> None:
+        """Called when validation begins."""
+        pass
+
+    def on_test_start(self) -> None:
+        """Called when test begins."""
+        pass
+
+    def on_predict_start(self) -> None:
+        """Called when predict begins."""
+        pass
+
+    def on_train_end(self) -> None:
+        """Called when train ends."""
+        pass
+
+    def on_validation_end(self) -> None:
+        """Called when validation ends."""
+        pass
+
+    def on_test_end(self) -> None:
+        """Called when test end."""
+        pass
+
+    def on_predict_end(self):
+        """Called when predict ends."""
+        pass
diff --git a/pytorch_lightning/trainer/evaluation_loop.py b/pytorch_lightning/trainer/evaluation_loop.py
index f048297892533..d6d2f1af48599 100644
--- a/pytorch_lightning/trainer/evaluation_loop.py
+++ b/pytorch_lightning/trainer/evaluation_loop.py
@@ -79,8 +79,10 @@ def on_evaluation_start(self, *args: Any, **kwargs: Any) -> None:
         self.should_track_batch_outputs_for_epoch_end: bool = self._should_track_batch_outputs_for_epoch_end()
         if self.trainer.testing:
             self.trainer.call_hook('on_test_start', *args, **kwargs)
+            self.trainer.accelerator.on_test_start()
         else:
             self.trainer.call_hook('on_validation_start', *args, **kwargs)
+            self.trainer.accelerator.on_validation_start()
 
     def on_evaluation_model_eval(self) -> None:
         model_ref = self.trainer.lightning_module
@@ -99,8 +101,10 @@ def on_evaluation_model_train(self) -> None:
     def on_evaluation_end(self, *args: Any, **kwargs: Any) -> None:
         if self.trainer.testing:
             self.trainer.call_hook('on_test_end', *args, **kwargs)
+            self.trainer.accelerator.on_test_end()
         else:
             self.trainer.call_hook('on_validation_end', *args, **kwargs)
+            self.trainer.accelerator.on_validation_end()
 
         if self.trainer.state.fn != TrainerFn.FITTING:
             # summarize profile results
diff --git a/pytorch_lightning/trainer/predict_loop.py b/pytorch_lightning/trainer/predict_loop.py
index c06ced6662d81..25d4fd83d8cc5 100644
--- a/pytorch_lightning/trainer/predict_loop.py
+++ b/pytorch_lightning/trainer/predict_loop.py
@@ -141,6 +141,7 @@ def on_predict_start(self) -> None:
         # hook
         self.trainer.call_hook("on_predict_start")
         self.trainer.call_hook("on_predict_epoch_start")
+        self.trainer.accelerator.on_predict_start()
 
     def on_predict_epoch_end(self) -> Optional[_PREDICT_OUTPUT]:
         self.trainer.profiler.describe()
@@ -162,3 +163,4 @@ def on_predict_end(self):
 
         # hook
         self.trainer.call_hook("on_predict_end")
+        self.trainer.accelerator.on_predict_end()
diff --git a/pytorch_lightning/trainer/training_loop.py b/pytorch_lightning/trainer/training_loop.py
index 62138790138ee..32aca773466a8 100644
--- a/pytorch_lightning/trainer/training_loop.py
+++ b/pytorch_lightning/trainer/training_loop.py
@@ -101,6 +101,7 @@ def should_skip_training(self) -> bool:
     def on_train_start(self):
         # hook
         self.trainer.call_hook("on_train_start")
+        self.trainer.accelerator.on_train_start()
 
     def on_train_end(self):
         if self._teardown_already_run:

From e18039c1a2e06ee5ea8a4a412d469409dab419dc Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Wed, 2 Jun 2021 12:51:02 +0100
Subject: [PATCH 16/60] Fixes for tests

---
 pl_examples/ipu_examples/mnist.py                    |  7 +++----
 pytorch_lightning/plugins/training_type/deepspeed.py |  2 +-
 pytorch_lightning/plugins/training_type/ipu.py       | 10 +++++-----
 pytorch_lightning/utilities/device_dtype_mixin.py    |  2 +-
 tests/callbacks/test_pruning.py                      |  2 +-
 tests/plugins/test_deepspeed_plugin.py               |  3 ++-
 6 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/pl_examples/ipu_examples/mnist.py b/pl_examples/ipu_examples/mnist.py
index 980dd3430446c..aba24ccbeef34 100644
--- a/pl_examples/ipu_examples/mnist.py
+++ b/pl_examples/ipu_examples/mnist.py
@@ -19,6 +19,7 @@
 
 import pytorch_lightning as pl
 from pl_examples.basic_examples.mnist_datamodule import MNISTDataModule
+from pytorch_lightning.plugins import IPUPlugin
 
 
 class LitClassifier(pl.LightningModule):
@@ -80,9 +81,7 @@ def configure_optimizers(self):
 
     model = LitClassifier()
 
-    trainer = pl.Trainer(max_epochs=2, ipu_cores=8)
+    trainer = pl.Trainer(max_epochs=2, ipu_cores=8, plugins=IPUPlugin(device_iterations=1), profiler='simple')
 
     trainer.fit(model, datamodule=dm)
-
-    result = trainer.test(model, datamodule=dm)
-    pprint(result)
+    trainer.test(model, datamodule=dm)
diff --git a/pytorch_lightning/plugins/training_type/deepspeed.py b/pytorch_lightning/plugins/training_type/deepspeed.py
index 8dd04aafa6b86..33e66423624b5 100644
--- a/pytorch_lightning/plugins/training_type/deepspeed.py
+++ b/pytorch_lightning/plugins/training_type/deepspeed.py
@@ -63,7 +63,7 @@ def forward(self, *inputs, **kwargs):
 
     @staticmethod
     def batch_to(data):
-        return data.half()
+        return data.convert_model_to_half()
 
     def _move_float_tensors_to_half(self, batch: Any):
         batch = apply_to_collection(batch, (torch.FloatTensor, torch.cuda.FloatTensor), function=self.batch_to)
diff --git a/pytorch_lightning/plugins/training_type/ipu.py b/pytorch_lightning/plugins/training_type/ipu.py
index 572dfdae0e492..8cc43f22dd75f 100644
--- a/pytorch_lightning/plugins/training_type/ipu.py
+++ b/pytorch_lightning/plugins/training_type/ipu.py
@@ -46,16 +46,16 @@ class IPUPlugin(ParallelPlugin):
 
     def __init__(
         self,
-        half: bool = False,
         device_iterations: int = 1,
         autoround_num_ipus: bool = True,
         autoreport: bool = True,
         autoreport_dir: Optional[str] = None,
+        convert_model_to_half: bool = False,
         parallel_devices: Optional[List[torch.device]] = None,
         cluster_environment: Optional[ClusterEnvironment] = None,
     ):
         super().__init__(parallel_devices, cluster_environment)
-        self.half = half
+        self.convert_model_to_half = convert_model_to_half
         self.device_iterations = device_iterations
         self.autoround_num_ipus = autoround_num_ipus
         self.autoreport = autoreport
@@ -80,11 +80,11 @@ def lightning_module(self) -> Optional[LightningModule]:
         return self.model.module if isinstance(self.model, LightningIPUModule) else self.model
 
     def pre_dispatch(self) -> None:
-        if self.half:
+        if self.convert_model_to_half:
             log.info('Using full 16bit precision, converting LightningModule weights to FP16.')
             self.model = self.model.half()
         precision = self.lightning_module.trainer.accelerator.precision_plugin.precision
-        precision = 16 if self.half else precision
+        precision = 16 if self.convert_model_to_half else precision
 
         model = LightningIPUModule(self.lightning_module, precision)
         self.model = model
@@ -250,7 +250,7 @@ def detach_models(self):
     def load_model(self, stage):
         self.detach_models()
         model = self.poptorch_models[stage]
-        if self._compiled(model):
+        if self._compiled(model) and not model.isAttachedToDevice():
             model.attachToDevice()
 
     def on_train_start(self):
diff --git a/pytorch_lightning/utilities/device_dtype_mixin.py b/pytorch_lightning/utilities/device_dtype_mixin.py
index 13f16d9b426ac..eeb44ed917faf 100644
--- a/pytorch_lightning/utilities/device_dtype_mixin.py
+++ b/pytorch_lightning/utilities/device_dtype_mixin.py
@@ -90,7 +90,7 @@ def to(self, *args, **kwargs) -> Module:
             >>> module.weight #doctest: +ELLIPSIS
             tensor([[...]], dtype=torch.float64)
             >>> cpu = torch.device('cpu')
-            >>> module.to(cpu, dtype=torch.half, non_blocking=True)
+            >>> module.to(cpu, dtype=torch.convert_model_to_half, non_blocking=True)
             ExampleModule()
             >>> module.weight #doctest: +ELLIPSIS
             tensor([[...]], dtype=torch.float16)
diff --git a/tests/callbacks/test_pruning.py b/tests/callbacks/test_pruning.py
index d4957905454d8..f198b29d24e84 100644
--- a/tests/callbacks/test_pruning.py
+++ b/tests/callbacks/test_pruning.py
@@ -161,7 +161,7 @@ def test_pruning_callback(
     )
 
 
-@RunIf(special=True)
+@RunIf(special=True, min_gpus=2)
 @pytest.mark.parametrize("parameters_to_prune", [False, True])
 @pytest.mark.parametrize("use_global_unstructured", [False, True])
 def test_pruning_callback_ddp(tmpdir, use_global_unstructured: bool, parameters_to_prune: bool):
diff --git a/tests/plugins/test_deepspeed_plugin.py b/tests/plugins/test_deepspeed_plugin.py
index 85d069b90288d..7c8753094ce0c 100644
--- a/tests/plugins/test_deepspeed_plugin.py
+++ b/tests/plugins/test_deepspeed_plugin.py
@@ -618,7 +618,8 @@ def _assert_save_model_is_equal(model, tmpdir, trainer, cls=BoringModel):
     if trainer.global_rank == 0:
         saved_model = cls.load_from_checkpoint(checkpoint_path)
         if model.dtype == torch.half:
-            saved_model = saved_model.half()  # model is loaded in float32 as default, move it to float16
+            saved_model = saved_model.convert_model_to_half(
+            )  # model is loaded in float32 as default, move it to float16
         model = model.cpu()
         # Assert model parameters are identical after loading
         for orig_param, trained_model_param in zip(model.parameters(), saved_model.parameters()):

From 2e43fee1b1d360eea0efcfd60f720071ce82b40c Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Wed, 2 Jun 2021 14:06:46 +0100
Subject: [PATCH 17/60] Fixes for tests

---
 pl_examples/ipu_examples/mnist.py                    | 2 +-
 pytorch_lightning/plugins/training_type/deepspeed.py | 2 +-
 pytorch_lightning/utilities/device_dtype_mixin.py    | 2 +-
 tests/plugins/test_deepspeed_plugin.py               | 3 +--
 4 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/pl_examples/ipu_examples/mnist.py b/pl_examples/ipu_examples/mnist.py
index aba24ccbeef34..6463b48f0c88b 100644
--- a/pl_examples/ipu_examples/mnist.py
+++ b/pl_examples/ipu_examples/mnist.py
@@ -81,7 +81,7 @@ def configure_optimizers(self):
 
     model = LitClassifier()
 
-    trainer = pl.Trainer(max_epochs=2, ipu_cores=8, plugins=IPUPlugin(device_iterations=1), profiler='simple')
+    trainer = pl.Trainer(max_epochs=2, ipu_cores=8)
 
     trainer.fit(model, datamodule=dm)
     trainer.test(model, datamodule=dm)
diff --git a/pytorch_lightning/plugins/training_type/deepspeed.py b/pytorch_lightning/plugins/training_type/deepspeed.py
index 33e66423624b5..8dd04aafa6b86 100644
--- a/pytorch_lightning/plugins/training_type/deepspeed.py
+++ b/pytorch_lightning/plugins/training_type/deepspeed.py
@@ -63,7 +63,7 @@ def forward(self, *inputs, **kwargs):
 
     @staticmethod
     def batch_to(data):
-        return data.convert_model_to_half()
+        return data.half()
 
     def _move_float_tensors_to_half(self, batch: Any):
         batch = apply_to_collection(batch, (torch.FloatTensor, torch.cuda.FloatTensor), function=self.batch_to)
diff --git a/pytorch_lightning/utilities/device_dtype_mixin.py b/pytorch_lightning/utilities/device_dtype_mixin.py
index eeb44ed917faf..13f16d9b426ac 100644
--- a/pytorch_lightning/utilities/device_dtype_mixin.py
+++ b/pytorch_lightning/utilities/device_dtype_mixin.py
@@ -90,7 +90,7 @@ def to(self, *args, **kwargs) -> Module:
             >>> module.weight #doctest: +ELLIPSIS
             tensor([[...]], dtype=torch.float64)
             >>> cpu = torch.device('cpu')
-            >>> module.to(cpu, dtype=torch.convert_model_to_half, non_blocking=True)
+            >>> module.to(cpu, dtype=torch.half, non_blocking=True)
             ExampleModule()
             >>> module.weight #doctest: +ELLIPSIS
             tensor([[...]], dtype=torch.float16)
diff --git a/tests/plugins/test_deepspeed_plugin.py b/tests/plugins/test_deepspeed_plugin.py
index 7c8753094ce0c..85d069b90288d 100644
--- a/tests/plugins/test_deepspeed_plugin.py
+++ b/tests/plugins/test_deepspeed_plugin.py
@@ -618,8 +618,7 @@ def _assert_save_model_is_equal(model, tmpdir, trainer, cls=BoringModel):
     if trainer.global_rank == 0:
         saved_model = cls.load_from_checkpoint(checkpoint_path)
         if model.dtype == torch.half:
-            saved_model = saved_model.convert_model_to_half(
-            )  # model is loaded in float32 as default, move it to float16
+            saved_model = saved_model.half()  # model is loaded in float32 as default, move it to float16
         model = model.cpu()
         # Assert model parameters are identical after loading
         for orig_param, trained_model_param in zip(model.parameters(), saved_model.parameters()):

From 53d31a0c7a9c283b7eb3f1b7f033c9d8c205136d Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Wed, 2 Jun 2021 14:07:59 +0100
Subject: [PATCH 18/60] Move earlier

---
 pytorch_lightning/plugins/training_type/ipu.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pytorch_lightning/plugins/training_type/ipu.py b/pytorch_lightning/plugins/training_type/ipu.py
index 8cc43f22dd75f..1e34c079ab998 100644
--- a/pytorch_lightning/plugins/training_type/ipu.py
+++ b/pytorch_lightning/plugins/training_type/ipu.py
@@ -70,8 +70,8 @@ def __init__(
                 options["autoReport.directory"] = self.autoreport_dir
             os.environ["POPLAR_ENGINE_OPTIONS"] = json.dumps(options)
 
-    def setup(self, model: Module) -> None:
-        super().setup(model)
+    def setup_environment(self) -> None:
+        super().setup_environment()
         if not poptorch.ipuHardwareIsAvailable():
             raise MisconfigurationException("IPU Accelerator requires IPUs to run.")
 

From 62414323bab1a1d60b57512a8c629dd9099e25c0 Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Wed, 2 Jun 2021 14:51:36 +0100
Subject: [PATCH 19/60] Cleanups

---
 pl_examples/ipu_examples/mnist.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/pl_examples/ipu_examples/mnist.py b/pl_examples/ipu_examples/mnist.py
index 6463b48f0c88b..c907f4a15af48 100644
--- a/pl_examples/ipu_examples/mnist.py
+++ b/pl_examples/ipu_examples/mnist.py
@@ -12,14 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from pprint import pprint
-
 import torch
 from torch.nn import functional as F
 
 import pytorch_lightning as pl
 from pl_examples.basic_examples.mnist_datamodule import MNISTDataModule
-from pytorch_lightning.plugins import IPUPlugin
 
 
 class LitClassifier(pl.LightningModule):
@@ -60,7 +57,7 @@ def test_step(self, batch, batch_idx):
         return acc
 
     def accuracy(self, logits, y):
-        # todo (sean): currently IPU poptorch doesn't implicit convert bools to tensor
+        # currently IPU poptorch doesn't implicit convert bools to tensor
         # hence we use an explicit calculation for accuracy here. Once fixed in poptorch
         # we can use the accuracy metric.
         acc = torch.sum(torch.eq(torch.argmax(logits, -1), y).to(torch.float32)) / len(y)

From d249a131b06f8f5c36cd4c061bfb56202e7b1042 Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Wed, 2 Jun 2021 16:49:26 +0100
Subject: [PATCH 20/60] Add check for nvcc

---
 tests/special_tests.sh | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/special_tests.sh b/tests/special_tests.sh
index cf81700291b8d..b6de1ca69ecef 100755
--- a/tests/special_tests.sh
+++ b/tests/special_tests.sh
@@ -68,7 +68,9 @@ for i in "${!files_arr[@]}"; do
   done < <(echo "$test_code")
 done
 
-nvprof --profile-from-start off -o trace_name.prof -- python ${defaults} tests/test_profiler.py::test_pytorch_profiler_nested_emit_nvtx
+if nvcc --version; then
+    nvprof --profile-from-start off -o trace_name.prof -- python ${defaults} tests/test_profiler.py::test_pytorch_profiler_nested_emit_nvtx
+fi
 
 # echo test report
 printf '=%.s' {1..80}

From d08cf39ac977dc6f79ef93579bbfe9e8593a82b2 Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Wed, 2 Jun 2021 21:17:53 +0100
Subject: [PATCH 21/60] Add tests, cleanups

---
 pytorch_lightning/accelerators/accelerator.py | 13 ++++
 .../plugins/training_type/ipu.py              | 32 ++++++++-
 .../training_type/training_type_plugin.py     | 13 ++++
 pytorch_lightning/trainer/evaluation_loop.py  |  4 --
 pytorch_lightning/trainer/predict_loop.py     |  2 -
 pytorch_lightning/trainer/trainer.py          |  5 +-
 pytorch_lightning/trainer/training_loop.py    |  9 +--
 tests/accelerators/test_ipu.py                | 67 +++++++++++++++++--
 tests/plugins/test_sharded_plugin.py          |  2 +-
 9 files changed, 123 insertions(+), 24 deletions(-)

diff --git a/pytorch_lightning/accelerators/accelerator.py b/pytorch_lightning/accelerators/accelerator.py
index 9c2b3dadf0961..e4a8b1ff4cb08 100644
--- a/pytorch_lightning/accelerators/accelerator.py
+++ b/pytorch_lightning/accelerators/accelerator.py
@@ -571,3 +571,16 @@ def on_predict_end(self) -> None:
     def on_train_end(self) -> None:
         """Called when train ends."""
         return self.training_type_plugin.on_train_end()
+
+    def on_train_batch_start(self, batch: Any, batch_idx: int, dataloader_idx: int) -> None:
+        """
+        Called in the training loop before anything happens for that batch.
+
+        If you return -1 here, you will skip training for the rest of the current epoch.
+
+        Args:
+            batch: The batched data as it is returned by the training DataLoader.
+            batch_idx: the index of the batch
+            dataloader_idx: the index of the dataloader
+        """
+        return self.training_type_plugin.on_train_batch_start(batch, batch_idx, dataloader_idx)
diff --git a/pytorch_lightning/plugins/training_type/ipu.py b/pytorch_lightning/plugins/training_type/ipu.py
index 1e34c079ab998..499fe48bca359 100644
--- a/pytorch_lightning/plugins/training_type/ipu.py
+++ b/pytorch_lightning/plugins/training_type/ipu.py
@@ -4,10 +4,10 @@
 from typing import Any, Iterable, List, Optional, Union
 
 import torch
-from torch.nn import Module
 from torch.utils.data import DataLoader
 
 from pytorch_lightning import _logger as log
+from pytorch_lightning.callbacks import GradientAccumulationScheduler
 from pytorch_lightning.core.lightning import LightningModule
 from pytorch_lightning.overrides.base import _LightningModuleWrapperBase
 from pytorch_lightning.plugins.environments.cluster_environment import ClusterEnvironment
@@ -61,6 +61,7 @@ def __init__(
         self.autoreport = autoreport
         self.autoreport_dir = autoreport_dir
         self.poptorch_models = {}
+        self._original_accumulate_grad_batches = None
 
         if self.autoreport:
             options = {"autoReport.all": self.autoreport}
@@ -80,6 +81,7 @@ def lightning_module(self) -> Optional[LightningModule]:
         return self.model.module if isinstance(self.model, LightningIPUModule) else self.model
 
     def pre_dispatch(self) -> None:
+        self._handle_gradient_accumulation_steps()
         if self.convert_model_to_half:
             log.info('Using full 16bit precision, converting LightningModule weights to FP16.')
             self.model = self.model.half()
@@ -174,10 +176,28 @@ def _convert_to_poptorch_loader(self, dataloader: Union[Iterable, DataLoader],
         dataloader.multiprocessing_context = multiprocessing_context
         return dataloader
 
+    def _handle_gradient_accumulation_steps(self):
+        """
+        This functions overrides the trainer.accumulation_scheduler to generate
+        ``accumulate_grad_batches=1``.
+        Therefore, ``optimizer_step`` will be called on every batch, and the IPU will handle grad accumulation.
+        """
+        self._original_accumulate_grad_batches = self.lightning_module.trainer.accumulate_grad_batches
+        if self._original_accumulate_grad_batches > 1:
+            # todo (tchaton) Add support for accumulate_grad_batches being a dictionary.
+            self.lightning_module.trainer.accumulation_scheduler = GradientAccumulationScheduler({0: 1})
+
+    def update_global_step(self, total_batch_idx: int, current_global_step: int) -> int:
+        if self._original_accumulate_grad_batches > 1:
+            if total_batch_idx % self._original_accumulate_grad_batches == 0:
+                current_global_step += 1
+            return current_global_step
+        return super().update_global_step(total_batch_idx, current_global_step)
+
     @property
     def _n_replicate(self):
-        # Ensure we replicate primitives values to have enough dimensions to split across devices
-        accumulate_grad_batches = self.lightning_module.trainer.accumulate_grad_batches
+        # Ensure we replicate values to have enough dimensions to split across devices
+        accumulate_grad_batches = self._original_accumulate_grad_batches
         return self.replication_factor * self.device_iterations * accumulate_grad_batches
 
     def _prepare_input(self, args):
@@ -240,6 +260,7 @@ def teardown(self) -> None:
             model.destroy()
 
     def _compiled(self, model):
+        # Required to ensure we only attach compiled models, as they are compiled lazily.
         return model._executable is not None
 
     def detach_models(self):
@@ -276,3 +297,8 @@ def on_test_end(self):
 
     def on_predict_end(self):
         self.detach_models()
+
+    def on_train_batch_start(self, batch: Any, batch_idx: int, dataloader_idx: int) -> None:
+        # Update optimizer stats if LR scheduler modified the optimizer state
+        optimizer = self.lightning_module.trainer.optimizers[0]
+        self.poptorch_models['train'].setOptimizer(optimizer)
diff --git a/pytorch_lightning/plugins/training_type/training_type_plugin.py b/pytorch_lightning/plugins/training_type/training_type_plugin.py
index d35e02968e753..e19413c8c664f 100644
--- a/pytorch_lightning/plugins/training_type/training_type_plugin.py
+++ b/pytorch_lightning/plugins/training_type/training_type_plugin.py
@@ -362,3 +362,16 @@ def on_test_end(self) -> None:
     def on_predict_end(self):
         """Called when predict ends."""
         pass
+
+    def on_train_batch_start(self, batch: Any, batch_idx: int, dataloader_idx: int) -> None:
+        """
+        Called in the training loop before anything happens for that batch.
+
+        If you return -1 here, you will skip training for the rest of the current epoch.
+
+        Args:
+            batch: The batched data as it is returned by the training DataLoader.
+            batch_idx: the index of the batch
+            dataloader_idx: the index of the dataloader
+        """
+        pass
diff --git a/pytorch_lightning/trainer/evaluation_loop.py b/pytorch_lightning/trainer/evaluation_loop.py
index d6d2f1af48599..f048297892533 100644
--- a/pytorch_lightning/trainer/evaluation_loop.py
+++ b/pytorch_lightning/trainer/evaluation_loop.py
@@ -79,10 +79,8 @@ def on_evaluation_start(self, *args: Any, **kwargs: Any) -> None:
         self.should_track_batch_outputs_for_epoch_end: bool = self._should_track_batch_outputs_for_epoch_end()
         if self.trainer.testing:
             self.trainer.call_hook('on_test_start', *args, **kwargs)
-            self.trainer.accelerator.on_test_start()
         else:
             self.trainer.call_hook('on_validation_start', *args, **kwargs)
-            self.trainer.accelerator.on_validation_start()
 
     def on_evaluation_model_eval(self) -> None:
         model_ref = self.trainer.lightning_module
@@ -101,10 +99,8 @@ def on_evaluation_model_train(self) -> None:
     def on_evaluation_end(self, *args: Any, **kwargs: Any) -> None:
         if self.trainer.testing:
             self.trainer.call_hook('on_test_end', *args, **kwargs)
-            self.trainer.accelerator.on_test_end()
         else:
             self.trainer.call_hook('on_validation_end', *args, **kwargs)
-            self.trainer.accelerator.on_validation_end()
 
         if self.trainer.state.fn != TrainerFn.FITTING:
             # summarize profile results
diff --git a/pytorch_lightning/trainer/predict_loop.py b/pytorch_lightning/trainer/predict_loop.py
index 25d4fd83d8cc5..c06ced6662d81 100644
--- a/pytorch_lightning/trainer/predict_loop.py
+++ b/pytorch_lightning/trainer/predict_loop.py
@@ -141,7 +141,6 @@ def on_predict_start(self) -> None:
         # hook
         self.trainer.call_hook("on_predict_start")
         self.trainer.call_hook("on_predict_epoch_start")
-        self.trainer.accelerator.on_predict_start()
 
     def on_predict_epoch_end(self) -> Optional[_PREDICT_OUTPUT]:
         self.trainer.profiler.describe()
@@ -163,4 +162,3 @@ def on_predict_end(self):
 
         # hook
         self.trainer.call_hook("on_predict_end")
-        self.trainer.accelerator.on_predict_end()
diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py
index b559ed710f29e..3b4ca6b6e2f92 100644
--- a/pytorch_lightning/trainer/trainer.py
+++ b/pytorch_lightning/trainer/trainer.py
@@ -1249,9 +1249,8 @@ def call_hook(self, hook_name: str, *args, **kwargs) -> Any:
                 hook_fx = getattr(model_ref, hook_name)
                 output = hook_fx(*args, **kwargs)
 
-            # if the PL module doesn't have the hook then call the accelerator
-            # used to auto-reduce things for the user with Results obj
-            elif hasattr(self.accelerator, hook_name):
+            # call hook in accelerator
+            if hasattr(self.accelerator, hook_name):
                 accelerator_hook = getattr(self.accelerator, hook_name)
                 output = accelerator_hook(*args, **kwargs)
 
diff --git a/pytorch_lightning/trainer/training_loop.py b/pytorch_lightning/trainer/training_loop.py
index 32aca773466a8..64c05b8be1547 100644
--- a/pytorch_lightning/trainer/training_loop.py
+++ b/pytorch_lightning/trainer/training_loop.py
@@ -101,7 +101,6 @@ def should_skip_training(self) -> bool:
     def on_train_start(self):
         # hook
         self.trainer.call_hook("on_train_start")
-        self.trainer.accelerator.on_train_start()
 
     def on_train_end(self):
         if self._teardown_already_run:
@@ -126,9 +125,6 @@ def on_train_end(self):
         # summarize profile results
         self.trainer.profiler.describe()
 
-        # give accelerators a chance to finish
-        self.trainer.accelerator.on_train_end()
-
         # reset bookkeeping
         self.trainer.state.stage = None
 
@@ -631,9 +627,8 @@ def _on_train_epoch_end_hook(self, processed_epoch_output) -> None:
                 else:
                     model_ref.on_train_epoch_end()
 
-            # if the PL module doesn't have the hook then call the accelerator
-            # used to auto-reduce things for the user with Results obj
-            elif hasattr(self.trainer.accelerator, hook_name):
+            # call hook in accelerator
+            if hasattr(self.trainer.accelerator, hook_name):
                 accelerator_hook = getattr(self.trainer.accelerator, hook_name)
                 accelerator_hook()
 
diff --git a/tests/accelerators/test_ipu.py b/tests/accelerators/test_ipu.py
index c14e47e2cd7c6..f830f099a7d91 100644
--- a/tests/accelerators/test_ipu.py
+++ b/tests/accelerators/test_ipu.py
@@ -12,13 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import os
+from typing import Any, Optional
 
 import pytest
 import torch
 import torch.nn.functional as F
 from torch.utils.data import DataLoader
 
-from pytorch_lightning import seed_everything, Trainer
+from pytorch_lightning import Callback, seed_everything, Trainer
+from pytorch_lightning.plugins import IPUPlugin, IPUPrecisionPlugin
 from tests.helpers.boring_model import BoringModel
 from tests.helpers.datamodules import ClassifDataModule
 from tests.helpers.datasets import SklearnDataset
@@ -91,7 +93,7 @@ def test_epoch_end(self, outputs) -> None:
 @pytest.mark.parametrize('ipu_cores', [1, 4])
 def test_all_stages(tmpdir, ipu_cores):
     model = IPUModel()
-    trainer = Trainer(fast_dev_run=True, accelerator='ipu', ipu_cores=ipu_cores)
+    trainer = Trainer(fast_dev_run=True, ipu_cores=ipu_cores)
     trainer.fit(model)
     trainer.validate(model)
     trainer.test(model)
@@ -103,7 +105,7 @@ def test_all_stages(tmpdir, ipu_cores):
 def test_inference_only(tmpdir, ipu_cores):
     model = IPUModel()
 
-    trainer = Trainer(fast_dev_run=True, accelerator='ipu', ipu_cores=ipu_cores)
+    trainer = Trainer(fast_dev_run=True, ipu_cores=ipu_cores)
     trainer.validate(model)
     trainer.test(model)
     trainer.predict(model, model.val_dataloader())
@@ -176,4 +178,61 @@ def test_dataloader(self):
     assert saved_result > 0.6 and (saved_result == test_result)
 
 
-# todo add test for precision 16 and fully half precision + device iterations
+@RunIf(ipu=True, special=True)
+def test_mixed_precision(tmpdir):
+
+    class TestCallback(Callback):
+
+        def setup(self, trainer: 'pl.Trainer', pl_module: 'pl.LightningModule', stage: Optional[str] = None) -> None:
+            assert isinstance(trainer.accelerator.precision_plugin, IPUPrecisionPlugin)
+            assert trainer.accelerator.precision_plugin.precision == 16
+            assert trainer.accelerator.model.precision == 16
+            raise SystemExit
+
+    model = IPUModel()
+    trainer = Trainer(fast_dev_run=True, ipu_cores=1, precision=16, callbacks=TestCallback())
+    with pytest.raises(SystemExit):
+        trainer.fit(model)
+
+
+@RunIf(ipu=True, special=True)
+def test_pure_half_precision(tmpdir):
+
+    class TestCallback(Callback):
+
+        def on_train_start(self, trainer: 'pl.Trainer', pl_module: 'pl.LightningModule') -> None:
+            assert isinstance(trainer.accelerator.training_type_plugin, IPUPlugin)
+            assert isinstance(trainer.accelerator.precision_plugin, IPUPrecisionPlugin)
+            assert trainer.accelerator.precision_plugin.precision == 16
+            assert trainer.accelerator.model.precision == 16
+            assert trainer.accelerator.training_type_plugin.convert_model_of_to_half
+            for param in trainer.accelerator.model.parameters():
+                assert param.dtype == torch.float16
+            raise SystemExit
+
+    model = IPUModel()
+    trainer = Trainer(
+        fast_dev_run=True,
+        ipu_cores=1,
+        precision=16,
+        plugins=IPUPlugin(convert_model_to_half=True),
+        callbacks=TestCallback()
+    )
+    with pytest.raises(SystemExit):
+        trainer.fit(model)
+
+
+@RunIf(ipu=True, special=True)
+def test_device_iterations_ipu_plugin(tmpdir):
+
+    class TestCallback(Callback):
+
+        def setup(self, trainer: 'pl.Trainer', pl_module: 'pl.LightningModule', stage: Optional[str] = None) -> None:
+            assert isinstance(trainer.accelerator.training_type_plugin, IPUPlugin)
+            assert trainer.accelerator.training_type_plugin.device_iterations == 20
+            raise SystemExit
+
+    model = IPUModel()
+    trainer = Trainer(fast_dev_run=True, ipu_cores=1, plugins=IPUPlugin(device_iterations=20), callbacks=TestCallback())
+    with pytest.raises(SystemExit):
+        trainer.fit(model)
diff --git a/tests/plugins/test_sharded_plugin.py b/tests/plugins/test_sharded_plugin.py
index 7ab49e6826d58..3c89c71209191 100644
--- a/tests/plugins/test_sharded_plugin.py
+++ b/tests/plugins/test_sharded_plugin.py
@@ -51,7 +51,7 @@ def on_fit_start(self, trainer, pl_module):
         callbacks=[CB()],
     )
 
-    with pytest.raises(SystemExit):
+    `with pytest.raises(SystemExit):`
         trainer.fit(model)
 
 

From 7469744d765c836109780a62b6579ba9adb4815b Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Thu, 3 Jun 2021 10:16:10 +0100
Subject: [PATCH 22/60] Fix errors

---
 tests/accelerators/test_ipu.py       | 30 +++++++++++++++++++++++++---
 tests/plugins/test_sharded_plugin.py |  2 +-
 2 files changed, 28 insertions(+), 4 deletions(-)

diff --git a/tests/accelerators/test_ipu.py b/tests/accelerators/test_ipu.py
index f830f099a7d91..adb377638ff5d 100644
--- a/tests/accelerators/test_ipu.py
+++ b/tests/accelerators/test_ipu.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import os
-from typing import Any, Optional
+from typing import Optional
 
 import pytest
 import torch
@@ -20,6 +20,7 @@
 from torch.utils.data import DataLoader
 
 from pytorch_lightning import Callback, seed_everything, Trainer
+from pytorch_lightning.core.lightning import LightningModule
 from pytorch_lightning.plugins import IPUPlugin, IPUPrecisionPlugin
 from tests.helpers.boring_model import BoringModel
 from tests.helpers.datamodules import ClassifDataModule
@@ -200,7 +201,7 @@ def test_pure_half_precision(tmpdir):
 
     class TestCallback(Callback):
 
-        def on_train_start(self, trainer: 'pl.Trainer', pl_module: 'pl.LightningModule') -> None:
+        def on_train_start(self, trainer: Trainer, pl_module: LightningModule) -> None:
             assert isinstance(trainer.accelerator.training_type_plugin, IPUPlugin)
             assert isinstance(trainer.accelerator.precision_plugin, IPUPrecisionPlugin)
             assert trainer.accelerator.precision_plugin.precision == 16
@@ -227,12 +228,35 @@ def test_device_iterations_ipu_plugin(tmpdir):
 
     class TestCallback(Callback):
 
-        def setup(self, trainer: 'pl.Trainer', pl_module: 'pl.LightningModule', stage: Optional[str] = None) -> None:
+        def on_train_start(self, trainer: Trainer, pl_module: LightningModule) -> None:
             assert isinstance(trainer.accelerator.training_type_plugin, IPUPlugin)
             assert trainer.accelerator.training_type_plugin.device_iterations == 20
+            # assert device iterations has been set correctly within the poptorch options
+            poptorch_model = trainer.accelerator.training_type_plugin.poptorch_models['train']
+            assert poptorch_model._options.toDict()['device_iterations'] == 20
             raise SystemExit
 
     model = IPUModel()
     trainer = Trainer(fast_dev_run=True, ipu_cores=1, plugins=IPUPlugin(device_iterations=20), callbacks=TestCallback())
     with pytest.raises(SystemExit):
         trainer.fit(model)
+
+
+@RunIf(ipu=True, special=True)
+def test_accumulated_batches(tmpdir):
+
+    class TestCallback(Callback):
+
+        def on_train_start(self, trainer: Trainer, pl_module: LightningModule) -> None:
+            # ensure the accumulation_scheduler is overridden to accumulate every batch
+            # since ipu handle accumulation
+            assert trainer.accumulation_scheduler.scheduling == {0: 1}
+            # assert poptorch option have been set correctly
+            poptorch_model = trainer.accelerator.training_type_plugin.poptorch_models['train']
+            assert poptorch_model._options.Training.toDict()['gradient_accumulation'] == 2
+            raise SystemExit
+
+    model = IPUModel()
+    trainer = Trainer(fast_dev_run=True, ipu_cores=1, accumulate_grad_batches=2, callbacks=TestCallback())
+    with pytest.raises(SystemExit):
+        trainer.fit(model)
diff --git a/tests/plugins/test_sharded_plugin.py b/tests/plugins/test_sharded_plugin.py
index 3c89c71209191..7ab49e6826d58 100644
--- a/tests/plugins/test_sharded_plugin.py
+++ b/tests/plugins/test_sharded_plugin.py
@@ -51,7 +51,7 @@ def on_fit_start(self, trainer, pl_module):
         callbacks=[CB()],
     )
 
-    `with pytest.raises(SystemExit):`
+    with pytest.raises(SystemExit):
         trainer.fit(model)
 
 

From f474c5bc1f453cc43845069ad892686b24337df8 Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Thu, 3 Jun 2021 14:08:23 +0100
Subject: [PATCH 23/60] fix

---
 pytorch_lightning/trainer/trainer.py       | 5 +++--
 pytorch_lightning/trainer/training_loop.py | 3 ++-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py
index 3b4ca6b6e2f92..ff1f7e7607572 100644
--- a/pytorch_lightning/trainer/trainer.py
+++ b/pytorch_lightning/trainer/trainer.py
@@ -1249,10 +1249,11 @@ def call_hook(self, hook_name: str, *args, **kwargs) -> Any:
                 hook_fx = getattr(model_ref, hook_name)
                 output = hook_fx(*args, **kwargs)
 
-            # call hook in accelerator
+            # if the PL module doesn't have the hook then call the accelerator
+            # used to auto-reduce things for the user with Results obj
             if hasattr(self.accelerator, hook_name):
                 accelerator_hook = getattr(self.accelerator, hook_name)
-                output = accelerator_hook(*args, **kwargs)
+                accelerator_hook(*args, **kwargs)
 
         if not skip:
             self._cache_logged_metrics()
diff --git a/pytorch_lightning/trainer/training_loop.py b/pytorch_lightning/trainer/training_loop.py
index 64c05b8be1547..8684401b706e2 100644
--- a/pytorch_lightning/trainer/training_loop.py
+++ b/pytorch_lightning/trainer/training_loop.py
@@ -627,7 +627,8 @@ def _on_train_epoch_end_hook(self, processed_epoch_output) -> None:
                 else:
                     model_ref.on_train_epoch_end()
 
-            # call hook in accelerator
+            # if the PL module doesn't have the hook then call the accelerator
+            # used to auto-reduce things for the user with Results obj
             if hasattr(self.trainer.accelerator, hook_name):
                 accelerator_hook = getattr(self.trainer.accelerator, hook_name)
                 accelerator_hook()

From e178d5f2fe75da724c40327d0f8544ea95c796d6 Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Thu, 3 Jun 2021 15:11:01 +0100
Subject: [PATCH 24/60] Try condition

---
 pytorch_lightning/trainer/trainer.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py
index ff1f7e7607572..9bf227d9581c2 100644
--- a/pytorch_lightning/trainer/trainer.py
+++ b/pytorch_lightning/trainer/trainer.py
@@ -1253,7 +1253,9 @@ def call_hook(self, hook_name: str, *args, **kwargs) -> Any:
             # used to auto-reduce things for the user with Results obj
             if hasattr(self.accelerator, hook_name):
                 accelerator_hook = getattr(self.accelerator, hook_name)
-                accelerator_hook(*args, **kwargs)
+                accelerator_output = accelerator_hook(*args, **kwargs)
+                if not output:
+                    output = accelerator_output
 
         if not skip:
             self._cache_logged_metrics()

From c70492082d5f0ec9ff66176c9a33895c76eb51da Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Thu, 3 Jun 2021 15:47:31 +0100
Subject: [PATCH 25/60] Add missing annotation

---
 tests/accelerators/test_ipu.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/accelerators/test_ipu.py b/tests/accelerators/test_ipu.py
index adb377638ff5d..43623f84a44fa 100644
--- a/tests/accelerators/test_ipu.py
+++ b/tests/accelerators/test_ipu.py
@@ -112,6 +112,7 @@ def test_inference_only(tmpdir, ipu_cores):
     trainer.predict(model, model.val_dataloader())
 
 
+@RunIf(ipu=True, special=True)
 def test_optimization(tmpdir):
     seed_everything(42)
 
@@ -184,7 +185,7 @@ def test_mixed_precision(tmpdir):
 
     class TestCallback(Callback):
 
-        def setup(self, trainer: 'pl.Trainer', pl_module: 'pl.LightningModule', stage: Optional[str] = None) -> None:
+        def setup(self, trainer: Trainer, pl_module: LightningModule, stage: Optional[str] = None) -> None:
             assert isinstance(trainer.accelerator.precision_plugin, IPUPrecisionPlugin)
             assert trainer.accelerator.precision_plugin.precision == 16
             assert trainer.accelerator.model.precision == 16

From c54a2166908c090ee6b20fa4090719bfba1c2dfc Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Thu, 3 Jun 2021 16:07:51 +0100
Subject: [PATCH 26/60] Clearer

---
 pytorch_lightning/trainer/trainer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py
index 9bf227d9581c2..b52e24fdeb3b3 100644
--- a/pytorch_lightning/trainer/trainer.py
+++ b/pytorch_lightning/trainer/trainer.py
@@ -1254,7 +1254,7 @@ def call_hook(self, hook_name: str, *args, **kwargs) -> Any:
             if hasattr(self.accelerator, hook_name):
                 accelerator_hook = getattr(self.accelerator, hook_name)
                 accelerator_output = accelerator_hook(*args, **kwargs)
-                if not output:
+                if output is None:
                     output = accelerator_output
 
         if not skip:

From 2ea176655af058180130a0e241e17ba66732d999 Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Thu, 3 Jun 2021 16:08:45 +0100
Subject: [PATCH 27/60] Clearer message

---
 pytorch_lightning/trainer/trainer.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py
index b52e24fdeb3b3..fda7b015a38a2 100644
--- a/pytorch_lightning/trainer/trainer.py
+++ b/pytorch_lightning/trainer/trainer.py
@@ -1249,11 +1249,11 @@ def call_hook(self, hook_name: str, *args, **kwargs) -> Any:
                 hook_fx = getattr(model_ref, hook_name)
                 output = hook_fx(*args, **kwargs)
 
-            # if the PL module doesn't have the hook then call the accelerator
-            # used to auto-reduce things for the user with Results obj
+            # call the accelerator hook
             if hasattr(self.accelerator, hook_name):
                 accelerator_hook = getattr(self.accelerator, hook_name)
                 accelerator_output = accelerator_hook(*args, **kwargs)
+                # used to auto-reduce things for the user with Results obj
                 if output is None:
                     output = accelerator_output
 

From 751f0ea4f20d866f047e0021886efd547310b9e6 Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Thu, 3 Jun 2021 19:11:32 +0100
Subject: [PATCH 28/60] Fix variable

---
 tests/accelerators/test_ipu.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/accelerators/test_ipu.py b/tests/accelerators/test_ipu.py
index 43623f84a44fa..0d5a6e89bb331 100644
--- a/tests/accelerators/test_ipu.py
+++ b/tests/accelerators/test_ipu.py
@@ -207,7 +207,7 @@ def on_train_start(self, trainer: Trainer, pl_module: LightningModule) -> None:
             assert isinstance(trainer.accelerator.precision_plugin, IPUPrecisionPlugin)
             assert trainer.accelerator.precision_plugin.precision == 16
             assert trainer.accelerator.model.precision == 16
-            assert trainer.accelerator.training_type_plugin.convert_model_of_to_half
+            assert trainer.accelerator.training_type_plugin.convert_model_to_half
             for param in trainer.accelerator.model.parameters():
                 assert param.dtype == torch.float16
             raise SystemExit

From 61d2014afaf096f14cbd8929292d2fa8a7479052 Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Mon, 7 Jun 2021 12:37:00 +0100
Subject: [PATCH 29/60] Cleanups

---
 pytorch_lightning/accelerators/ipu.py         |   4 +-
 .../plugins/training_type/ipu.py              | 105 +++++++++++-------
 2 files changed, 67 insertions(+), 42 deletions(-)

diff --git a/pytorch_lightning/accelerators/ipu.py b/pytorch_lightning/accelerators/ipu.py
index 8374bc1bc1554..34bee31b5a91d 100644
--- a/pytorch_lightning/accelerators/ipu.py
+++ b/pytorch_lightning/accelerators/ipu.py
@@ -15,13 +15,15 @@
 
 from torch.optim import Optimizer
 
+import pytorch_lightning as pl
 from pytorch_lightning.accelerators.accelerator import Accelerator
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 
 
 class IPUAccelerator(Accelerator):
+    """ Accelerator for IPUs. """
 
-    def setup_optimizers(self, trainer):
+    def setup_optimizers(self, trainer: 'pl.Trainer') -> None:
         super().setup_optimizers(trainer)
 
         if len(self.optimizers) > 1:
diff --git a/pytorch_lightning/plugins/training_type/ipu.py b/pytorch_lightning/plugins/training_type/ipu.py
index 499fe48bca359..2527470a12166 100644
--- a/pytorch_lightning/plugins/training_type/ipu.py
+++ b/pytorch_lightning/plugins/training_type/ipu.py
@@ -43,6 +43,9 @@ def _move_float_tensors_to_half(self, batch: Any):
 
 
 class IPUPlugin(ParallelPlugin):
+    """
+        Plugin for training on IPU devices.
+    """
 
     def __init__(
         self,
@@ -53,7 +56,19 @@ def __init__(
         convert_model_to_half: bool = False,
         parallel_devices: Optional[List[torch.device]] = None,
         cluster_environment: Optional[ClusterEnvironment] = None,
-    ):
+    ) -> None:
+        """
+        Arguments:
+
+            device_iterations: Number of iterations to run on device at once before returning to host.
+                This can be used as an optimization to speed up training.
+                https://docs.graphcore.ai/projects/poptorch-user-guide/en/0.1.67/batching.html
+            autoround_num_ipus: When selecting multiple IPUs, auto-rounds to powers of 2 as required for IPUs.
+            autoreport: Enable auto-reporting for IPUs using PopVision
+                https://docs.graphcore.ai/projects/graphcore-popvision-user-guide/en/latest/graph/graph.html
+            autoreport_dir: Optional directory to store autoReport output.
+            convert_model_to_half: Converts the model to half precision, which can be used for pure FP16 training.
+        """
         super().__init__(parallel_devices, cluster_environment)
         self.convert_model_to_half = convert_model_to_half
         self.device_iterations = device_iterations
@@ -92,7 +107,7 @@ def pre_dispatch(self) -> None:
         self.model = model
 
         # Separate models are instantiated for different stages, but they share the same weights on host.
-        # When validation/test models are run, they sync weights first.
+        # When validation/test models are run, weights are synced first.
 
         if self.lightning_module.trainer.training:
             # Create model for training which will run training.
@@ -228,33 +243,6 @@ def predict_step(self, *args, **kwargs):
         args = self._prepare_input(args)
         return self.poptorch_models['predict'](*args, **kwargs)
 
-    @property
-    def on_gpu(self) -> bool:
-        return False
-
-    @property
-    def root_device(self) -> torch.device:
-        pass
-
-    def model_to_device(self) -> None:
-        pass
-
-    @property
-    def is_global_zero(self) -> bool:
-        return True
-
-    def reduce(self, tensor: Union[torch.Tensor, Any], *args: Any, **kwargs: Any) -> Union[torch.Tensor, Any]:
-        return tensor
-
-    def barrier(self, name: Optional[str] = None) -> None:
-        pass
-
-    def all_gather(self, tensor: torch.Tensor, group: Optional[Any] = None, sync_grads: bool = False) -> torch.Tensor:
-        return tensor
-
-    def broadcast(self, obj: object, src: int = 0) -> object:
-        return obj
-
     def teardown(self) -> None:
         for k, model in self.poptorch_models.items():
             model.destroy()
@@ -263,42 +251,77 @@ def _compiled(self, model):
         # Required to ensure we only attach compiled models, as they are compiled lazily.
         return model._executable is not None
 
-    def detach_models(self):
+    def _detach_models(self):
+        """
+        Detaches all stage specific models from IPU devices.
+        """
         for k, model in self.poptorch_models.items():
             if self._compiled(model) and model.isAttachedToDevice():
                 model.detachFromDevice()
 
-    def load_model(self, stage):
-        self.detach_models()
+    def _load_model(self, stage):
+        """
+        Loads the stage specific accelerator model onto device if compiled and not attached to IPU devices.
+        Args:
+            stage: The stage to load
+        """
+        self._detach_models()
         model = self.poptorch_models[stage]
         if self._compiled(model) and not model.isAttachedToDevice():
             model.attachToDevice()
 
     def on_train_start(self):
-        self.load_model('train')
+        self._load_model('train')
 
     def on_validation_start(self):
-        self.load_model('val')
+        self._load_model('val')
 
     def on_test_start(self):
-        self.load_model('test')
+        self._load_model('test')
 
     def on_predict_start(self):
-        self.load_model('predict')
+        self._load_model('predict')
 
     def on_train_end(self):
-        self.detach_models()
+        self._detach_models()
 
     def on_validation_end(self):
-        self.detach_models()
+        self._detach_models()
 
     def on_test_end(self):
-        self.detach_models()
+        self._detach_models()
 
     def on_predict_end(self):
-        self.detach_models()
+        self._detach_models()
 
     def on_train_batch_start(self, batch: Any, batch_idx: int, dataloader_idx: int) -> None:
-        # Update optimizer stats if LR scheduler modified the optimizer state
+        # Updates optimizer stats if LR scheduler modified the optimizer state
         optimizer = self.lightning_module.trainer.optimizers[0]
         self.poptorch_models['train'].setOptimizer(optimizer)
+
+    @property
+    def on_gpu(self) -> bool:
+        return False
+
+    @property
+    def root_device(self) -> torch.device:
+        pass
+
+    def model_to_device(self) -> None:
+        pass
+
+    @property
+    def is_global_zero(self) -> bool:
+        return True
+
+    def reduce(self, tensor: Union[torch.Tensor, Any], *args: Any, **kwargs: Any) -> Union[torch.Tensor, Any]:
+        return tensor
+
+    def barrier(self, name: Optional[str] = None) -> None:
+        pass
+
+    def all_gather(self, tensor: torch.Tensor, group: Optional[Any] = None, sync_grads: bool = False) -> torch.Tensor:
+        return tensor
+
+    def broadcast(self, obj: object, src: int = 0) -> object:
+        return obj

From 62860ffa8c0e78292d7245dff1850e8336f857b9 Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Mon, 7 Jun 2021 13:40:04 +0100
Subject: [PATCH 30/60] Add comment

---
 pl_examples/ipu_examples/mnist.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pl_examples/ipu_examples/mnist.py b/pl_examples/ipu_examples/mnist.py
index c907f4a15af48..465b22c16d1f0 100644
--- a/pl_examples/ipu_examples/mnist.py
+++ b/pl_examples/ipu_examples/mnist.py
@@ -64,6 +64,8 @@ def accuracy(self, logits, y):
         return acc
 
     def validation_epoch_end(self, outputs) -> None:
+        # since the training step/validation step and test step are run on the IPU device
+        # we must log the average loss outside the step functions.
         self.log('val_acc', torch.stack(outputs).mean(), prog_bar=True)
 
     def test_epoch_end(self, outputs) -> None:

From b5a50325a739f5b6e7ff33d0e49b02babe47c59c Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Mon, 7 Jun 2021 13:59:28 +0100
Subject: [PATCH 31/60] CHANGELOG.md

---
 CHANGELOG.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5bc8ffcf1d40e..d63afef4fdc61 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -65,6 +65,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Added trainer stage hooks for Training Plugins and Accelerators ([#7864](https://github.com/PyTorchLightning/pytorch-lightning/pull/7864))
 
 
+- Added IPU Accelerator ([#7867](https://github.com/PyTorchLightning/pytorch-lightning/pull/7867))
+
+
 ### Changed
 
 - Changed calling of `untoggle_optimizer(opt_idx)` out of the closure function ([#7563](https://github.com/PyTorchLightning/pytorch-lightning/pull/7563)

From 72ed367c4ffd35de1af96bc6b5e5bad602df9a70 Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Mon, 7 Jun 2021 14:07:14 +0100
Subject: [PATCH 32/60] Add simple selection test

---
 tests/accelerators/test_ipu.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tests/accelerators/test_ipu.py b/tests/accelerators/test_ipu.py
index 0d5a6e89bb331..e5aff0de0d015 100644
--- a/tests/accelerators/test_ipu.py
+++ b/tests/accelerators/test_ipu.py
@@ -20,6 +20,7 @@
 from torch.utils.data import DataLoader
 
 from pytorch_lightning import Callback, seed_everything, Trainer
+from pytorch_lightning.accelerators import IPUAccelerator
 from pytorch_lightning.core.lightning import LightningModule
 from pytorch_lightning.plugins import IPUPlugin, IPUPrecisionPlugin
 from tests.helpers.boring_model import BoringModel
@@ -90,6 +91,13 @@ def test_epoch_end(self, outputs) -> None:
         self.log('test_acc', torch.stack(outputs).mean())
 
 
+def test_accelerator_selected(tmpdir):
+    trainer = Trainer(ipu_cores=1)
+    assert isinstance(trainer.accelerator, IPUAccelerator)
+    trainer = Trainer(ipu_cores=1, accelerator='ipu')
+    assert isinstance(trainer.accelerator, IPUAccelerator)
+
+
 @RunIf(ipu=True, special=True)
 @pytest.mark.parametrize('ipu_cores', [1, 4])
 def test_all_stages(tmpdir, ipu_cores):

From 3fb031d180c3653002d276e0a1ebd24e9ce1cc09 Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Mon, 7 Jun 2021 14:15:21 +0100
Subject: [PATCH 33/60] Remove special=True to see what happens

---
 tests/accelerators/test_ipu.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/tests/accelerators/test_ipu.py b/tests/accelerators/test_ipu.py
index e5aff0de0d015..c313e2ff29044 100644
--- a/tests/accelerators/test_ipu.py
+++ b/tests/accelerators/test_ipu.py
@@ -98,7 +98,7 @@ def test_accelerator_selected(tmpdir):
     assert isinstance(trainer.accelerator, IPUAccelerator)
 
 
-@RunIf(ipu=True, special=True)
+@RunIf(ipu=True)
 @pytest.mark.parametrize('ipu_cores', [1, 4])
 def test_all_stages(tmpdir, ipu_cores):
     model = IPUModel()
@@ -109,7 +109,7 @@ def test_all_stages(tmpdir, ipu_cores):
     trainer.predict(model, model.val_dataloader())
 
 
-@RunIf(ipu=True, special=True)
+@RunIf(ipu=True)
 @pytest.mark.parametrize('ipu_cores', [1, 4])
 def test_inference_only(tmpdir, ipu_cores):
     model = IPUModel()
@@ -120,7 +120,7 @@ def test_inference_only(tmpdir, ipu_cores):
     trainer.predict(model, model.val_dataloader())
 
 
-@RunIf(ipu=True, special=True)
+@RunIf(ipu=True)
 def test_optimization(tmpdir):
     seed_everything(42)
 
@@ -188,7 +188,7 @@ def test_dataloader(self):
     assert saved_result > 0.6 and (saved_result == test_result)
 
 
-@RunIf(ipu=True, special=True)
+@RunIf(ipu=True)
 def test_mixed_precision(tmpdir):
 
     class TestCallback(Callback):
@@ -205,7 +205,7 @@ def setup(self, trainer: Trainer, pl_module: LightningModule, stage: Optional[st
         trainer.fit(model)
 
 
-@RunIf(ipu=True, special=True)
+@RunIf(ipu=True)
 def test_pure_half_precision(tmpdir):
 
     class TestCallback(Callback):
@@ -232,7 +232,7 @@ def on_train_start(self, trainer: Trainer, pl_module: LightningModule) -> None:
         trainer.fit(model)
 
 
-@RunIf(ipu=True, special=True)
+@RunIf(ipu=True)
 def test_device_iterations_ipu_plugin(tmpdir):
 
     class TestCallback(Callback):
@@ -251,7 +251,7 @@ def on_train_start(self, trainer: Trainer, pl_module: LightningModule) -> None:
         trainer.fit(model)
 
 
-@RunIf(ipu=True, special=True)
+@RunIf(ipu=True)
 def test_accumulated_batches(tmpdir):
 
     class TestCallback(Callback):

From 515d4918cf263d298a55f6983f20e9b63ea1a141 Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Mon, 7 Jun 2021 14:36:50 +0100
Subject: [PATCH 34/60] Fix test

---
 tests/accelerators/test_ipu.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/accelerators/test_ipu.py b/tests/accelerators/test_ipu.py
index c313e2ff29044..fbfd2b9c63780 100644
--- a/tests/accelerators/test_ipu.py
+++ b/tests/accelerators/test_ipu.py
@@ -239,14 +239,14 @@ class TestCallback(Callback):
 
         def on_train_start(self, trainer: Trainer, pl_module: LightningModule) -> None:
             assert isinstance(trainer.accelerator.training_type_plugin, IPUPlugin)
-            assert trainer.accelerator.training_type_plugin.device_iterations == 20
+            assert trainer.accelerator.training_type_plugin.device_iterations == 2
             # assert device iterations has been set correctly within the poptorch options
             poptorch_model = trainer.accelerator.training_type_plugin.poptorch_models['train']
-            assert poptorch_model._options.toDict()['device_iterations'] == 20
+            assert poptorch_model._options.toDict()['device_iterations'] == 2
             raise SystemExit
 
     model = IPUModel()
-    trainer = Trainer(fast_dev_run=True, ipu_cores=1, plugins=IPUPlugin(device_iterations=20), callbacks=TestCallback())
+    trainer = Trainer(fast_dev_run=True, ipu_cores=1, plugins=IPUPlugin(device_iterations=2), callbacks=TestCallback())
     with pytest.raises(SystemExit):
         trainer.fit(model)
 

From ed168086179c63d5489f94ef9c08cebc36ee2f6b Mon Sep 17 00:00:00 2001
From: Sean Naren <sean@grid.ai>
Date: Mon, 7 Jun 2021 14:51:47 +0100
Subject: [PATCH 35/60] Update tests/accelerators/test_ipu.py

Co-authored-by: Kaushik B <45285388+kaushikb11@users.noreply.github.com>
---
 tests/accelerators/test_ipu.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/accelerators/test_ipu.py b/tests/accelerators/test_ipu.py
index fbfd2b9c63780..f9cff82097cad 100644
--- a/tests/accelerators/test_ipu.py
+++ b/tests/accelerators/test_ipu.py
@@ -91,6 +91,7 @@ def test_epoch_end(self, outputs) -> None:
         self.log('test_acc', torch.stack(outputs).mean())
 
 
+@RunIf(ipu=True)
 def test_accelerator_selected(tmpdir):
     trainer = Trainer(ipu_cores=1)
     assert isinstance(trainer.accelerator, IPUAccelerator)

From 7f50295c45b605b0bddb89f10de573a83b7dda96 Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Mon, 7 Jun 2021 15:31:31 +0100
Subject: [PATCH 36/60] Convert ipu_cores -> ipus

---
 pl_examples/ipu_examples/mnist.py    |  2 +-
 pytorch_lightning/trainer/trainer.py |  4 ++--
 tests/accelerators/test_ipu.py       | 26 +++++++++++++-------------
 3 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/pl_examples/ipu_examples/mnist.py b/pl_examples/ipu_examples/mnist.py
index 465b22c16d1f0..32ae010fc2d2a 100644
--- a/pl_examples/ipu_examples/mnist.py
+++ b/pl_examples/ipu_examples/mnist.py
@@ -80,7 +80,7 @@ def configure_optimizers(self):
 
     model = LitClassifier()
 
-    trainer = pl.Trainer(max_epochs=2, ipu_cores=8)
+    trainer = pl.Trainer(max_epochs=2, ipus=8)
 
     trainer.fit(model, datamodule=dm)
     trainer.test(model, datamodule=dm)
diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py
index 0bdf0f73d0e0a..e7a255a65c33d 100644
--- a/pytorch_lightning/trainer/trainer.py
+++ b/pytorch_lightning/trainer/trainer.py
@@ -107,7 +107,7 @@ def __init__(
         gpus: Optional[Union[List[int], str, int]] = None,
         auto_select_gpus: bool = False,
         tpu_cores: Optional[Union[List[int], str, int]] = None,
-        ipu_cores: Optional[int] = None,
+        ipus: Optional[int] = None,
         log_gpu_memory: Optional[str] = None,
         progress_bar_refresh_rate: Optional[int] = None,
         overfit_batches: Union[int, float] = 0.0,
@@ -324,7 +324,7 @@ def __init__(
         self.optimizer_connector = OptimizerConnector(self)
 
         self.accelerator_connector = AcceleratorConnector(
-            num_processes, tpu_cores, ipu_cores, distributed_backend, auto_select_gpus, gpus, num_nodes, sync_batchnorm,
+            num_processes, tpu_cores, ipus, distributed_backend, auto_select_gpus, gpus, num_nodes, sync_batchnorm,
             benchmark, replace_sampler_ddp, deterministic, precision, amp_backend, amp_level, plugins
         )
         self.logger_connector = LoggerConnector(self, log_gpu_memory)
diff --git a/tests/accelerators/test_ipu.py b/tests/accelerators/test_ipu.py
index f9cff82097cad..be6bdfea9d11a 100644
--- a/tests/accelerators/test_ipu.py
+++ b/tests/accelerators/test_ipu.py
@@ -93,17 +93,17 @@ def test_epoch_end(self, outputs) -> None:
 
 @RunIf(ipu=True)
 def test_accelerator_selected(tmpdir):
-    trainer = Trainer(ipu_cores=1)
+    trainer = Trainer(ipus=1)
     assert isinstance(trainer.accelerator, IPUAccelerator)
-    trainer = Trainer(ipu_cores=1, accelerator='ipu')
+    trainer = Trainer(ipus=1, accelerator='ipu')
     assert isinstance(trainer.accelerator, IPUAccelerator)
 
 
 @RunIf(ipu=True)
-@pytest.mark.parametrize('ipu_cores', [1, 4])
-def test_all_stages(tmpdir, ipu_cores):
+@pytest.mark.parametrize('ipus', [1, 4])
+def test_all_stages(tmpdir, ipus):
     model = IPUModel()
-    trainer = Trainer(fast_dev_run=True, ipu_cores=ipu_cores)
+    trainer = Trainer(fast_dev_run=True, ipus=ipus)
     trainer.fit(model)
     trainer.validate(model)
     trainer.test(model)
@@ -111,11 +111,11 @@ def test_all_stages(tmpdir, ipu_cores):
 
 
 @RunIf(ipu=True)
-@pytest.mark.parametrize('ipu_cores', [1, 4])
-def test_inference_only(tmpdir, ipu_cores):
+@pytest.mark.parametrize('ipus', [1, 4])
+def test_inference_only(tmpdir, ipus):
     model = IPUModel()
 
-    trainer = Trainer(fast_dev_run=True, ipu_cores=ipu_cores)
+    trainer = Trainer(fast_dev_run=True, ipus=ipus)
     trainer.validate(model)
     trainer.test(model)
     trainer.predict(model, model.val_dataloader())
@@ -157,7 +157,7 @@ def test_dataloader(self):
         max_epochs=1,
         weights_summary=None,
         deterministic=True,
-        ipu_cores=2,
+        ipus=2,
     )
 
     # fit model
@@ -201,7 +201,7 @@ def setup(self, trainer: Trainer, pl_module: LightningModule, stage: Optional[st
             raise SystemExit
 
     model = IPUModel()
-    trainer = Trainer(fast_dev_run=True, ipu_cores=1, precision=16, callbacks=TestCallback())
+    trainer = Trainer(fast_dev_run=True, ipus=1, precision=16, callbacks=TestCallback())
     with pytest.raises(SystemExit):
         trainer.fit(model)
 
@@ -224,7 +224,7 @@ def on_train_start(self, trainer: Trainer, pl_module: LightningModule) -> None:
     model = IPUModel()
     trainer = Trainer(
         fast_dev_run=True,
-        ipu_cores=1,
+        ipus=1,
         precision=16,
         plugins=IPUPlugin(convert_model_to_half=True),
         callbacks=TestCallback()
@@ -247,7 +247,7 @@ def on_train_start(self, trainer: Trainer, pl_module: LightningModule) -> None:
             raise SystemExit
 
     model = IPUModel()
-    trainer = Trainer(fast_dev_run=True, ipu_cores=1, plugins=IPUPlugin(device_iterations=2), callbacks=TestCallback())
+    trainer = Trainer(fast_dev_run=True, ipus=1, plugins=IPUPlugin(device_iterations=2), callbacks=TestCallback())
     with pytest.raises(SystemExit):
         trainer.fit(model)
 
@@ -267,6 +267,6 @@ def on_train_start(self, trainer: Trainer, pl_module: LightningModule) -> None:
             raise SystemExit
 
     model = IPUModel()
-    trainer = Trainer(fast_dev_run=True, ipu_cores=1, accumulate_grad_batches=2, callbacks=TestCallback())
+    trainer = Trainer(fast_dev_run=True, ipus=1, accumulate_grad_batches=2, callbacks=TestCallback())
     with pytest.raises(SystemExit):
         trainer.fit(model)

From c53cf88de36121e55e9c95144fdf6f323b48c26e Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Mon, 7 Jun 2021 19:56:24 +0100
Subject: [PATCH 37/60] Add typing, fail earlier

---
 .../plugins/training_type/ipu.py              | 28 +++++++++----------
 1 file changed, 13 insertions(+), 15 deletions(-)

diff --git a/pytorch_lightning/plugins/training_type/ipu.py b/pytorch_lightning/plugins/training_type/ipu.py
index 2527470a12166..e585f27df5fd9 100644
--- a/pytorch_lightning/plugins/training_type/ipu.py
+++ b/pytorch_lightning/plugins/training_type/ipu.py
@@ -34,10 +34,10 @@ def forward(self, *inputs, **kwargs):
         return super().forward(*inputs, **kwargs)
 
     @staticmethod
-    def batch_to(data):
+    def batch_to(data: torch.Tensor) -> torch.Tensor:
         return data.half()
 
-    def _move_float_tensors_to_half(self, batch: Any):
+    def _move_float_tensors_to_half(self, batch: Any) -> Any:
         batch = apply_to_collection(batch, (torch.FloatTensor, torch.cuda.FloatTensor), function=self.batch_to)
         return batch
 
@@ -70,6 +70,9 @@ def __init__(
             convert_model_to_half: Converts the model to half precision, which can be used for pure FP16 training.
         """
         super().__init__(parallel_devices, cluster_environment)
+        if not poptorch.ipuHardwareIsAvailable():
+            raise MisconfigurationException("IPU Accelerator requires IPUs to run.")
+
         self.convert_model_to_half = convert_model_to_half
         self.device_iterations = device_iterations
         self.autoround_num_ipus = autoround_num_ipus
@@ -86,11 +89,6 @@ def __init__(
                 options["autoReport.directory"] = self.autoreport_dir
             os.environ["POPLAR_ENGINE_OPTIONS"] = json.dumps(options)
 
-    def setup_environment(self) -> None:
-        super().setup_environment()
-        if not poptorch.ipuHardwareIsAvailable():
-            raise MisconfigurationException("IPU Accelerator requires IPUs to run.")
-
     @property
     def lightning_module(self) -> Optional[LightningModule]:
         return self.model.module if isinstance(self.model, LightningIPUModule) else self.model
@@ -125,7 +123,7 @@ def pre_dispatch(self) -> None:
     def replication_factor(self):
         return len(self.parallel_devices)
 
-    def _create_opts(self, training):
+    def _create_opts(self, training: bool):
         opts = poptorch.Options()
         opts.deviceIterations(self.device_iterations)
         opts.replicationFactor(self.replication_factor)
@@ -138,16 +136,16 @@ def _create_opts(self, training):
             opts.randomSeed(int(os.environ["PL_GLOBAL_SEED"]))
         return opts
 
-    def on_reset_train_dataloader(self, dataloader) -> Union[Iterable, DataLoader]:
+    def on_reset_train_dataloader(self, dataloader: Union[Iterable, DataLoader]) -> Union[Iterable, DataLoader]:
         return self.process_dataloader(dataloader)
 
-    def on_reset_val_dataloader(self, dataloader) -> Union[Iterable, DataLoader]:
+    def on_reset_val_dataloader(self, dataloader: Union[Iterable, DataLoader]) -> Union[Iterable, DataLoader]:
         return self.process_dataloader(dataloader)
 
-    def on_reset_test_dataloader(self, dataloader) -> Union[Iterable, DataLoader]:
+    def on_reset_test_dataloader(self, dataloader: Union[Iterable, DataLoader]) -> Union[Iterable, DataLoader]:
         return self.process_dataloader(dataloader)
 
-    def on_reset_predict_dataloader(self, dataloader) -> Union[Iterable, DataLoader]:
+    def on_reset_predict_dataloader(self, dataloader: Union[Iterable, DataLoader]) -> Union[Iterable, DataLoader]:
         return self.process_dataloader(dataloader)
 
     def process_dataloader(self, dataloader: Union[Iterable, DataLoader]) -> Union[Iterable, DataLoader]:
@@ -215,7 +213,7 @@ def _n_replicate(self):
         accumulate_grad_batches = self._original_accumulate_grad_batches
         return self.replication_factor * self.device_iterations * accumulate_grad_batches
 
-    def _prepare_input(self, args):
+    def _prepare_input(self, args: Any):
 
         def to_tuple(x):
             return tuple(x)
@@ -247,7 +245,7 @@ def teardown(self) -> None:
         for k, model in self.poptorch_models.items():
             model.destroy()
 
-    def _compiled(self, model):
+    def _compiled(self, model: Any):
         # Required to ensure we only attach compiled models, as they are compiled lazily.
         return model._executable is not None
 
@@ -259,7 +257,7 @@ def _detach_models(self):
             if self._compiled(model) and model.isAttachedToDevice():
                 model.detachFromDevice()
 
-    def _load_model(self, stage):
+    def _load_model(self, stage: str):
         """
         Loads the stage specific accelerator model onto device if compiled and not attached to IPU devices.
         Args:

From a6dbd8a411349fac3cc68e15589115f42d2944f6 Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Mon, 7 Jun 2021 19:58:28 +0100
Subject: [PATCH 38/60] simplify precision

---
 pytorch_lightning/plugins/training_type/ipu.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pytorch_lightning/plugins/training_type/ipu.py b/pytorch_lightning/plugins/training_type/ipu.py
index e585f27df5fd9..727a392ed6271 100644
--- a/pytorch_lightning/plugins/training_type/ipu.py
+++ b/pytorch_lightning/plugins/training_type/ipu.py
@@ -23,12 +23,12 @@
 
 class LightningIPUModule(_LightningModuleWrapperBase):
 
-    def __init__(self, pl_module: LightningModule, precision: int):
+    def __init__(self, pl_module: LightningModule, precision: Union[str, int]):
         super().__init__(pl_module)
         self.precision = precision
 
     def forward(self, *inputs, **kwargs):
-        if self.precision == 16:
+        if self.precision in ("mixed", 16):
             inputs = self._move_float_tensors_to_half(inputs)
 
         return super().forward(*inputs, **kwargs)
@@ -98,7 +98,7 @@ def pre_dispatch(self) -> None:
         if self.convert_model_to_half:
             log.info('Using full 16bit precision, converting LightningModule weights to FP16.')
             self.model = self.model.half()
-        precision = self.lightning_module.trainer.accelerator.precision_plugin.precision
+        precision = self.lightning_module.trainer.precision
         precision = 16 if self.convert_model_to_half else precision
 
         model = LightningIPUModule(self.lightning_module, precision)

From 953454b0fba0ebf6a925a3e76f778b983c6fad76 Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Tue, 8 Jun 2021 12:41:28 +0100
Subject: [PATCH 39/60] Add test, add helper

---
 .../plugins/training_type/ipu.py              |  5 +-
 tests/accelerators/test_ipu.py                | 46 +++++++++++++++++++
 2 files changed, 50 insertions(+), 1 deletion(-)

diff --git a/pytorch_lightning/plugins/training_type/ipu.py b/pytorch_lightning/plugins/training_type/ipu.py
index 727a392ed6271..3347e897a0a74 100644
--- a/pytorch_lightning/plugins/training_type/ipu.py
+++ b/pytorch_lightning/plugins/training_type/ipu.py
@@ -71,7 +71,10 @@ def __init__(
         """
         super().__init__(parallel_devices, cluster_environment)
         if not poptorch.ipuHardwareIsAvailable():
-            raise MisconfigurationException("IPU Accelerator requires IPUs to run.")
+            raise MisconfigurationException(
+                "The IPU Accelerator requires IPU devices to run. "
+                "Learn more or get started with IPUs at https://www.graphcore.ai/getstarted"
+            )
 
         self.convert_model_to_half = convert_model_to_half
         self.device_iterations = device_iterations
diff --git a/tests/accelerators/test_ipu.py b/tests/accelerators/test_ipu.py
index be6bdfea9d11a..cdfcf203b220d 100644
--- a/tests/accelerators/test_ipu.py
+++ b/tests/accelerators/test_ipu.py
@@ -270,3 +270,49 @@ def on_train_start(self, trainer: Trainer, pl_module: LightningModule) -> None:
     trainer = Trainer(fast_dev_run=True, ipus=1, accumulate_grad_batches=2, callbacks=TestCallback())
     with pytest.raises(SystemExit):
         trainer.fit(model)
+
+
+@RunIf(ipu=True)
+def test_stages_correct(tmpdir):
+    """Ensure all stages correctly are traced correctly by asserting the output for each stage"""
+
+    class StageModel(IPUModel):
+
+        def training_step(self, batch, batch_idx):
+            loss = super().training_step(batch, batch_idx)
+            # tracing requires a loss value that depends on the model.
+            # force it to be a value but ensure we use the loss.
+            return (loss - loss) + torch.tensor(1)
+
+        def validation_step(self, batch, batch_idx):
+            loss = super().validation_step(batch, batch_idx)
+            return (loss - loss) + torch.tensor(2)
+
+        def test_step(self, batch, batch_idx):
+            loss = super().validation_step(batch, batch_idx)
+            return (loss - loss) + torch.tensor(3)
+
+        def predict_step(self, batch, batch_idx, dataloader_idx=None):
+            output = super().predict_step(batch, batch_idx)
+            return (output - output) + torch.tensor(4)
+
+    class TestCallback(Callback):
+
+        def on_train_batch_end(self, trainer, pl_module, outputs, batch, batch_idx, dataloader_idx) -> None:
+            assert outputs['loss'].item() == 1
+
+        def on_validation_batch_end(self, trainer, pl_module, outputs, batch, batch_idx, dataloader_idx) -> None:
+            assert outputs.item() == 2
+
+        def on_test_batch_end(self, trainer, pl_module, outputs, batch, batch_idx, dataloader_idx) -> None:
+            assert outputs.item() == 3
+
+        def on_predict_batch_end(self, trainer, pl_module, outputs, batch, batch_idx, dataloader_idx) -> None:
+            assert torch.all(outputs == 4).item()
+
+    model = StageModel()
+    trainer = Trainer(fast_dev_run=True, ipus=1, callbacks=TestCallback())
+    trainer.fit(model)
+    trainer.test(model)
+    trainer.validate(model)
+    trainer.predict(model, model.test_dataloader())

From 24829bfb3ef554285b9cd19b1366be5d749c6a02 Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Tue, 8 Jun 2021 13:50:36 +0100
Subject: [PATCH 40/60] fix accum

---
 pytorch_lightning/plugins/training_type/ipu.py |  6 +++++-
 tests/accelerators/test_ipu.py                 | 11 +++++++++++
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/pytorch_lightning/plugins/training_type/ipu.py b/pytorch_lightning/plugins/training_type/ipu.py
index 3347e897a0a74..37a6da94b495c 100644
--- a/pytorch_lightning/plugins/training_type/ipu.py
+++ b/pytorch_lightning/plugins/training_type/ipu.py
@@ -199,8 +199,12 @@ def _handle_gradient_accumulation_steps(self):
         Therefore, ``optimizer_step`` will be called on every batch, and the IPU will handle grad accumulation.
         """
         self._original_accumulate_grad_batches = self.lightning_module.trainer.accumulate_grad_batches
+        if not isinstance(self._original_accumulate_grad_batches, int):
+            raise MisconfigurationException(
+                f"IPUs currently only support accumulate_grad_batches being an integer value. "
+                f"Received {self._original_accumulate_grad_batches}"
+            )
         if self._original_accumulate_grad_batches > 1:
-            # todo (tchaton) Add support for accumulate_grad_batches being a dictionary.
             self.lightning_module.trainer.accumulation_scheduler = GradientAccumulationScheduler({0: 1})
 
     def update_global_step(self, total_batch_idx: int, current_global_step: int) -> int:
diff --git a/tests/accelerators/test_ipu.py b/tests/accelerators/test_ipu.py
index cdfcf203b220d..5d75ea0afc46d 100644
--- a/tests/accelerators/test_ipu.py
+++ b/tests/accelerators/test_ipu.py
@@ -23,6 +23,7 @@
 from pytorch_lightning.accelerators import IPUAccelerator
 from pytorch_lightning.core.lightning import LightningModule
 from pytorch_lightning.plugins import IPUPlugin, IPUPrecisionPlugin
+from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from tests.helpers.boring_model import BoringModel
 from tests.helpers.datamodules import ClassifDataModule
 from tests.helpers.datasets import SklearnDataset
@@ -316,3 +317,13 @@ def on_predict_batch_end(self, trainer, pl_module, outputs, batch, batch_idx, da
     trainer.test(model)
     trainer.validate(model)
     trainer.predict(model, model.test_dataloader())
+
+
+@RunIf(ipu=True)
+def test_accumulate_grad_batches_dict_fails(tmpdir):
+    model = IPUModel()
+    trainer = Trainer(ipus=1, accumulate_grad_batches={0: 1})
+    with pytest.raises(
+        MisconfigurationException, match="IPUs currently only support accumulate_grad_batches being an integer value."
+    ):
+        trainer.fit(model)

From d7d38c56d0726efece789fa7a5b8b93dda4a0898 Mon Sep 17 00:00:00 2001
From: Sean Naren <sean@grid.ai>
Date: Tue, 8 Jun 2021 13:53:19 +0100
Subject: [PATCH 41/60] Update pytorch_lightning/plugins/training_type/ipu.py

Co-authored-by: thomas chaton <thomas@grid.ai>
---
 pytorch_lightning/plugins/training_type/ipu.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pytorch_lightning/plugins/training_type/ipu.py b/pytorch_lightning/plugins/training_type/ipu.py
index 37a6da94b495c..40d25eb0caca1 100644
--- a/pytorch_lightning/plugins/training_type/ipu.py
+++ b/pytorch_lightning/plugins/training_type/ipu.py
@@ -249,7 +249,7 @@ def predict_step(self, *args, **kwargs):
         return self.poptorch_models['predict'](*args, **kwargs)
 
     def teardown(self) -> None:
-        for k, model in self.poptorch_models.items():
+        for model in self.poptorch_models.values():
             model.destroy()
 
     def _compiled(self, model: Any):

From c333e2768447e88fd18ec18d7bbdf8c116ebd48d Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Tue, 8 Jun 2021 14:09:46 +0100
Subject: [PATCH 42/60] Use stages

---
 .../plugins/training_type/ipu.py              | 25 ++++++++++---------
 tests/accelerators/test_ipu.py                |  5 ++--
 2 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/pytorch_lightning/plugins/training_type/ipu.py b/pytorch_lightning/plugins/training_type/ipu.py
index 40d25eb0caca1..857529163a325 100644
--- a/pytorch_lightning/plugins/training_type/ipu.py
+++ b/pytorch_lightning/plugins/training_type/ipu.py
@@ -12,6 +12,7 @@
 from pytorch_lightning.overrides.base import _LightningModuleWrapperBase
 from pytorch_lightning.plugins.environments.cluster_environment import ClusterEnvironment
 from pytorch_lightning.plugins.training_type.parallel import ParallelPlugin
+from pytorch_lightning.trainer.states import RunningStage
 from pytorch_lightning.trainer.supporters import CombinedLoader
 from pytorch_lightning.utilities import _POPTORCH_AVAILABLE
 from pytorch_lightning.utilities.apply_func import apply_to_collection
@@ -110,12 +111,12 @@ def pre_dispatch(self) -> None:
         # Separate models are instantiated for different stages, but they share the same weights on host.
         # When validation/test models are run, weights are synced first.
 
-        if self.lightning_module.trainer.training:
+        if self.lightning_module.trainer.state.stage is RunningStage.TRAINING:
             # Create model for training which will run training.
             optimizer = self.lightning_module.trainer.optimizers[0]
             model = poptorch.trainingModel(model=model, options=self._create_opts(training=True), optimizer=optimizer)
-            self.poptorch_models['train'] = model
-        for x in ('val', 'test', 'predict'):
+            self.poptorch_models[RunningStage.TRAINING] = model
+        for x in (RunningStage.VALIDATING, RunningStage.TESTING, RunningStage.PREDICTING):
             model = poptorch.inferenceModel(
                 model=model,
                 options=self._create_opts(training=False),
@@ -234,19 +235,19 @@ def to_tensor(x):
 
     def training_step(self, *args, **kwargs):
         args = self._prepare_input(args)
-        return self.poptorch_models['train'](*args, **kwargs)
+        return self.poptorch_models[RunningStage.TRAINING](*args, **kwargs)
 
     def validation_step(self, *args, **kwargs):
         args = self._prepare_input(args)
-        return self.poptorch_models['val'](*args, **kwargs)
+        return self.poptorch_models[RunningStage.VALIDATING](*args, **kwargs)
 
     def test_step(self, *args, **kwargs):
         args = self._prepare_input(args)
-        return self.poptorch_models['test'](*args, **kwargs)
+        return self.poptorch_models[RunningStage.TESTING](*args, **kwargs)
 
     def predict_step(self, *args, **kwargs):
         args = self._prepare_input(args)
-        return self.poptorch_models['predict'](*args, **kwargs)
+        return self.poptorch_models[RunningStage.PREDICTING](*args, **kwargs)
 
     def teardown(self) -> None:
         for model in self.poptorch_models.values():
@@ -276,16 +277,16 @@ def _load_model(self, stage: str):
             model.attachToDevice()
 
     def on_train_start(self):
-        self._load_model('train')
+        self._load_model(RunningStage.TRAINING)
 
     def on_validation_start(self):
-        self._load_model('val')
+        self._load_model(RunningStage.VALIDATING)
 
     def on_test_start(self):
-        self._load_model('test')
+        self._load_model(RunningStage.TESTING)
 
     def on_predict_start(self):
-        self._load_model('predict')
+        self._load_model(RunningStage.PREDICTING)
 
     def on_train_end(self):
         self._detach_models()
@@ -302,7 +303,7 @@ def on_predict_end(self):
     def on_train_batch_start(self, batch: Any, batch_idx: int, dataloader_idx: int) -> None:
         # Updates optimizer stats if LR scheduler modified the optimizer state
         optimizer = self.lightning_module.trainer.optimizers[0]
-        self.poptorch_models['train'].setOptimizer(optimizer)
+        self.poptorch_models[RunningStage.TRAINING].setOptimizer(optimizer)
 
     @property
     def on_gpu(self) -> bool:
diff --git a/tests/accelerators/test_ipu.py b/tests/accelerators/test_ipu.py
index 5d75ea0afc46d..37ee51ba3379f 100644
--- a/tests/accelerators/test_ipu.py
+++ b/tests/accelerators/test_ipu.py
@@ -23,6 +23,7 @@
 from pytorch_lightning.accelerators import IPUAccelerator
 from pytorch_lightning.core.lightning import LightningModule
 from pytorch_lightning.plugins import IPUPlugin, IPUPrecisionPlugin
+from pytorch_lightning.trainer.states import RunningStage
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from tests.helpers.boring_model import BoringModel
 from tests.helpers.datamodules import ClassifDataModule
@@ -243,7 +244,7 @@ def on_train_start(self, trainer: Trainer, pl_module: LightningModule) -> None:
             assert isinstance(trainer.accelerator.training_type_plugin, IPUPlugin)
             assert trainer.accelerator.training_type_plugin.device_iterations == 2
             # assert device iterations has been set correctly within the poptorch options
-            poptorch_model = trainer.accelerator.training_type_plugin.poptorch_models['train']
+            poptorch_model = trainer.accelerator.training_type_plugin.poptorch_models[RunningStage.TRAINING]
             assert poptorch_model._options.toDict()['device_iterations'] == 2
             raise SystemExit
 
@@ -263,7 +264,7 @@ def on_train_start(self, trainer: Trainer, pl_module: LightningModule) -> None:
             # since ipu handle accumulation
             assert trainer.accumulation_scheduler.scheduling == {0: 1}
             # assert poptorch option have been set correctly
-            poptorch_model = trainer.accelerator.training_type_plugin.poptorch_models['train']
+            poptorch_model = trainer.accelerator.training_type_plugin.poptorch_models[RunningStage.TRAINING]
             assert poptorch_model._options.Training.toDict()['gradient_accumulation'] == 2
             raise SystemExit
 

From 9d3741a47ecd3b96c006acbd0c4d45c923ab918b Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Tue, 8 Jun 2021 14:34:31 +0100
Subject: [PATCH 43/60] Make sure warning message returned

---
 pytorch_lightning/plugins/training_type/ipu.py | 2 +-
 tests/accelerators/test_ipu.py                 | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/pytorch_lightning/plugins/training_type/ipu.py b/pytorch_lightning/plugins/training_type/ipu.py
index 857529163a325..6606eede7c47c 100644
--- a/pytorch_lightning/plugins/training_type/ipu.py
+++ b/pytorch_lightning/plugins/training_type/ipu.py
@@ -71,7 +71,7 @@ def __init__(
             convert_model_to_half: Converts the model to half precision, which can be used for pure FP16 training.
         """
         super().__init__(parallel_devices, cluster_environment)
-        if not poptorch.ipuHardwareIsAvailable():
+        if not _POPTORCH_AVAILABLE or not poptorch.ipuHardwareIsAvailable():
             raise MisconfigurationException(
                 "The IPU Accelerator requires IPU devices to run. "
                 "Learn more or get started with IPUs at https://www.graphcore.ai/getstarted"
diff --git a/tests/accelerators/test_ipu.py b/tests/accelerators/test_ipu.py
index 37ee51ba3379f..f50390039191e 100644
--- a/tests/accelerators/test_ipu.py
+++ b/tests/accelerators/test_ipu.py
@@ -93,6 +93,14 @@ def test_epoch_end(self, outputs) -> None:
         self.log('test_acc', torch.stack(outputs).mean())
 
 
+def test_fail_if_no_ipus(tmpdir):
+    with pytest.raises(MisconfigurationException, match="IPU Accelerator requires IPU devices to run"):
+        Trainer(ipus=1)
+
+    with pytest.raises(MisconfigurationException, match="IPU Accelerator requires IPU devices to run"):
+        Trainer(ipus=1, accelerator='ipu')
+
+
 @RunIf(ipu=True)
 def test_accelerator_selected(tmpdir):
     trainer = Trainer(ipus=1)

From fd1899a45570a3c0a901a568c5180f17fb25f698 Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Tue, 8 Jun 2021 14:47:38 +0100
Subject: [PATCH 44/60] thorw error

---
 .../plugins/precision/ipu_precision.py        | 37 +++++++++++++++++--
 tests/accelerators/test_ipu.py                |  8 ++++
 2 files changed, 42 insertions(+), 3 deletions(-)

diff --git a/pytorch_lightning/plugins/precision/ipu_precision.py b/pytorch_lightning/plugins/precision/ipu_precision.py
index 4e88a6cf73fe1..b0da6e13e20f4 100644
--- a/pytorch_lightning/plugins/precision/ipu_precision.py
+++ b/pytorch_lightning/plugins/precision/ipu_precision.py
@@ -1,8 +1,25 @@
-from typing import Any
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Optional, Union
 
 from torch import Tensor
+from torch.nn import Module
+from torch.optim import Optimizer
 
 from pytorch_lightning.plugins.precision.precision_plugin import PrecisionPlugin
+from pytorch_lightning.utilities import GradClipAlgorithmType
+from pytorch_lightning.utilities.exceptions import MisconfigurationException
 
 
 class IPUPrecisionPlugin(PrecisionPlugin):
@@ -20,5 +37,19 @@ def backward(
         # IPU internally manages bwd step.
         return closure_loss
 
-    def clip_gradients(self, *args, **kwargs) -> None:
-        pass
+    def clip_gradients(
+        self,
+        optimizer: Optimizer,
+        clip_val: Union[int, float],
+        gradient_clip_algorithm: GradClipAlgorithmType = GradClipAlgorithmType.NORM,
+        model: Optional[Module] = None
+    ) -> None:
+        """Clips the gradients"""
+        if clip_val is None:
+            return
+
+        clip_val = float(clip_val)
+        if clip_val <= 0:
+            return
+
+        raise MisconfigurationException("IPUs currently do not support clipping gradients.")
diff --git a/tests/accelerators/test_ipu.py b/tests/accelerators/test_ipu.py
index f50390039191e..4bb4a14a38957 100644
--- a/tests/accelerators/test_ipu.py
+++ b/tests/accelerators/test_ipu.py
@@ -336,3 +336,11 @@ def test_accumulate_grad_batches_dict_fails(tmpdir):
         MisconfigurationException, match="IPUs currently only support accumulate_grad_batches being an integer value."
     ):
         trainer.fit(model)
+
+
+@RunIf(ipu=True)
+def test_clip_gradients_fails(tmpdir):
+    model = IPUModel()
+    trainer = Trainer(ipus=1, gradient_clip_val=10)
+    with pytest.raises(MisconfigurationException, match="IPUs currently do not support clipping gradients."):
+        trainer.fit(model)

From 07279546307bd6b5081e08e459ce1c0585579196 Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Tue, 8 Jun 2021 14:58:08 +0100
Subject: [PATCH 45/60] Add more tests, use fs

---
 pytorch_lightning/plugins/training_type/ipu.py |  7 +++++--
 tests/accelerators/test_ipu.py                 | 11 +++++++++++
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/pytorch_lightning/plugins/training_type/ipu.py b/pytorch_lightning/plugins/training_type/ipu.py
index 6606eede7c47c..dda3174b8f096 100644
--- a/pytorch_lightning/plugins/training_type/ipu.py
+++ b/pytorch_lightning/plugins/training_type/ipu.py
@@ -16,6 +16,7 @@
 from pytorch_lightning.trainer.supporters import CombinedLoader
 from pytorch_lightning.utilities import _POPTORCH_AVAILABLE
 from pytorch_lightning.utilities.apply_func import apply_to_collection
+from pytorch_lightning.utilities.cloud_io import get_filesystem
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 
 if _POPTORCH_AVAILABLE:
@@ -88,8 +89,10 @@ def __init__(
         if self.autoreport:
             options = {"autoReport.all": self.autoreport}
             if self.autoreport_dir:
-                if not os.path.exists(self.autoreport_dir):
-                    os.makedirs(self.autoreport_dir)
+                self._fs = get_filesystem(str(self.autoreport_dir))
+
+                if not self._fs.exists(self.autoreport_dir):
+                    self._fs.makedirs(self.autoreport_dir)
                 options["autoReport.directory"] = self.autoreport_dir
             os.environ["POPLAR_ENGINE_OPTIONS"] = json.dumps(options)
 
diff --git a/tests/accelerators/test_ipu.py b/tests/accelerators/test_ipu.py
index 4bb4a14a38957..f67937c12e579 100644
--- a/tests/accelerators/test_ipu.py
+++ b/tests/accelerators/test_ipu.py
@@ -344,3 +344,14 @@ def test_clip_gradients_fails(tmpdir):
     trainer = Trainer(ipus=1, gradient_clip_val=10)
     with pytest.raises(MisconfigurationException, match="IPUs currently do not support clipping gradients."):
         trainer.fit(model)
+
+
+@RunIf(ipu=True)
+def test_autoreport(tmpdir):
+    """Ensure autoreport dumps to a file."""
+    model = IPUModel()
+    autoreport_path = os.path.join(tmpdir, 'report/')
+    trainer = Trainer(ipus=1, fast_dev_run=True, plugins=IPUPlugin(autoreport=True, autoreport_dir=autoreport_path))
+    trainer.fit(model)
+    assert os.path.exists(autoreport_path)
+    assert os.path.isfile(autoreport_path + 'profile.pop')

From ce182f777592e20b985a5424fbdffc7635d68fc5 Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Tue, 8 Jun 2021 15:00:57 +0100
Subject: [PATCH 46/60] add comment

---
 pl_examples/ipu_examples/mnist.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pl_examples/ipu_examples/mnist.py b/pl_examples/ipu_examples/mnist.py
index 32ae010fc2d2a..87087d67766b2 100644
--- a/pl_examples/ipu_examples/mnist.py
+++ b/pl_examples/ipu_examples/mnist.py
@@ -47,6 +47,9 @@ def training_step(self, batch, batch_idx):
     def validation_step(self, batch, batch_idx):
         x, y = batch
         logits = self(x)
+        # we currently return the accuracy as the validation_step/test_step is run on the IPU devices.
+        # Outputs from the step functions are sent to the host device, where we calculate the metrics in
+        # validation_epoch_end and test_epoch_end for the test_step.
         acc = self.accuracy(logits, y)
         return acc
 

From 7e81bcd8fdad861fb7b0a5d162d2715c44679d42 Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Tue, 8 Jun 2021 16:47:17 +0100
Subject: [PATCH 47/60] Clean

---
 .../plugins/training_type/ipu.py              |  3 ++
 tests/accelerators/test_ipu.py                | 37 +++----------------
 2 files changed, 9 insertions(+), 31 deletions(-)

diff --git a/pytorch_lightning/plugins/training_type/ipu.py b/pytorch_lightning/plugins/training_type/ipu.py
index dda3174b8f096..208ffb96d5c0c 100644
--- a/pytorch_lightning/plugins/training_type/ipu.py
+++ b/pytorch_lightning/plugins/training_type/ipu.py
@@ -192,6 +192,9 @@ def _convert_to_poptorch_loader(self, dataloader: Union[Iterable, DataLoader],
         if not contains_dataset:
             dl_args.pop('dataset')
 
+        # Override to drop last uneven batch, as IPUs does not support uneven inputs.
+        dl_args['drop_last'] = True
+
         dataloader = poptorch.DataLoader(**dl_args, options=opts)
         dataloader.multiprocessing_context = multiprocessing_context
         return dataloader
diff --git a/tests/accelerators/test_ipu.py b/tests/accelerators/test_ipu.py
index f67937c12e579..f5db660ffeb20 100644
--- a/tests/accelerators/test_ipu.py
+++ b/tests/accelerators/test_ipu.py
@@ -17,17 +17,16 @@
 import pytest
 import torch
 import torch.nn.functional as F
-from torch.utils.data import DataLoader
 
 from pytorch_lightning import Callback, seed_everything, Trainer
 from pytorch_lightning.accelerators import IPUAccelerator
 from pytorch_lightning.core.lightning import LightningModule
 from pytorch_lightning.plugins import IPUPlugin, IPUPrecisionPlugin
 from pytorch_lightning.trainer.states import RunningStage
+from pytorch_lightning.utilities import _IPU_AVAILABLE
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from tests.helpers.boring_model import BoringModel
 from tests.helpers.datamodules import ClassifDataModule
-from tests.helpers.datasets import SklearnDataset
 from tests.helpers.runif import RunIf
 from tests.helpers.simple_models import ClassificationModel
 
@@ -93,6 +92,7 @@ def test_epoch_end(self, outputs) -> None:
         self.log('test_acc', torch.stack(outputs).mean())
 
 
+@pytest.mark.skipif(_IPU_AVAILABLE, reason="test requires non-IPU machine")
 def test_fail_if_no_ipus(tmpdir):
     with pytest.raises(MisconfigurationException, match="IPU Accelerator requires IPU devices to run"):
         Trainer(ipus=1)
@@ -135,37 +135,12 @@ def test_inference_only(tmpdir, ipus):
 def test_optimization(tmpdir):
     seed_everything(42)
 
-    # Override to drop last uneven batch, as IPU poptorch does not support uneven inputs.
-    class DataModule(ClassifDataModule):
-
-        def train_dataloader(self):
-            return DataLoader(
-                SklearnDataset(self.x_train, self.y_train, self._x_type, self._y_type),
-                batch_size=self.batch_size,
-                drop_last=True
-            )
-
-        def val_dataloader(self):
-            return DataLoader(
-                SklearnDataset(self.x_valid, self.y_valid, self._x_type, self._y_type),
-                batch_size=self.batch_size,
-                drop_last=True
-            )
-
-        def test_dataloader(self):
-            return DataLoader(
-                SklearnDataset(self.x_test, self.y_test, self._x_type, self._y_type),
-                batch_size=self.batch_size,
-                drop_last=True
-            )
-
-    dm = DataModule(length=1024)
+    dm = ClassifDataModule(length=1024)
     model = IPUClassificationModel()
 
     trainer = Trainer(
         default_root_dir=tmpdir,
         max_epochs=1,
-        weights_summary=None,
         deterministic=True,
         ipus=2,
     )
@@ -181,7 +156,7 @@ def test_dataloader(self):
     assert result[0]['val_acc'] > 0.7
 
     # test
-    result = trainer.test(datamodule=dm)
+    result = trainer.test(model, datamodule=dm)
     assert dm.trainer is not None
     test_result = result[0]['test_acc']
     assert test_result > 0.6
@@ -194,9 +169,9 @@ def test_dataloader(self):
 
     trainer = Trainer(default_root_dir=tmpdir, deterministic=True)
 
-    result = trainer.test(model, dm.test_dataloader())
+    result = trainer.test(model, datamodule=dm)
     saved_result = result[0]['test_acc']
-    assert saved_result > 0.6 and (saved_result == test_result)
+    assert saved_result == test_result
 
 
 @RunIf(ipu=True)

From d1788d1a336ee4ad98b7ab766cf1b75f97b104fe Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Wed, 9 Jun 2021 12:18:03 +0100
Subject: [PATCH 48/60] Address feedback, add IPU tests

---
 .../plugins/training_type/ipu.py              |  64 ++++++--
 tests/accelerators/test_ipu.py                | 142 +++++++++++++++++-
 2 files changed, 195 insertions(+), 11 deletions(-)

diff --git a/pytorch_lightning/plugins/training_type/ipu.py b/pytorch_lightning/plugins/training_type/ipu.py
index 208ffb96d5c0c..a86e050391c57 100644
--- a/pytorch_lightning/plugins/training_type/ipu.py
+++ b/pytorch_lightning/plugins/training_type/ipu.py
@@ -14,7 +14,7 @@
 from pytorch_lightning.plugins.training_type.parallel import ParallelPlugin
 from pytorch_lightning.trainer.states import RunningStage
 from pytorch_lightning.trainer.supporters import CombinedLoader
-from pytorch_lightning.utilities import _POPTORCH_AVAILABLE
+from pytorch_lightning.utilities import _POPTORCH_AVAILABLE, rank_zero_warn
 from pytorch_lightning.utilities.apply_func import apply_to_collection
 from pytorch_lightning.utilities.cloud_io import get_filesystem
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
@@ -58,6 +58,8 @@ def __init__(
         convert_model_to_half: bool = False,
         parallel_devices: Optional[List[torch.device]] = None,
         cluster_environment: Optional[ClusterEnvironment] = None,
+        training_opts: Optional['poptorch.Options'] = None,
+        inference_opts: Optional['poptorch.Options'] = None
     ) -> None:
         """
         Arguments:
@@ -70,6 +72,9 @@ def __init__(
                 https://docs.graphcore.ai/projects/graphcore-popvision-user-guide/en/latest/graph/graph.html
             autoreport_dir: Optional directory to store autoReport output.
             convert_model_to_half: Converts the model to half precision, which can be used for pure FP16 training.
+            training_opts: Optional ``poptorch.Options`` to override the default created options for training.
+            inference_opts: Optional ``poptorch.Options`` to override the default
+                created options for validation/testing and predicting.
         """
         super().__init__(parallel_devices, cluster_environment)
         if not _POPTORCH_AVAILABLE or not poptorch.ipuHardwareIsAvailable():
@@ -85,6 +90,8 @@ def __init__(
         self.autoreport_dir = autoreport_dir
         self.poptorch_models = {}
         self._original_accumulate_grad_batches = None
+        self._training_opts = training_opts
+        self._inference_opts = inference_opts
 
         if self.autoreport:
             options = {"autoReport.all": self.autoreport}
@@ -96,10 +103,6 @@ def __init__(
                 options["autoReport.directory"] = self.autoreport_dir
             os.environ["POPLAR_ENGINE_OPTIONS"] = json.dumps(options)
 
-    @property
-    def lightning_module(self) -> Optional[LightningModule]:
-        return self.model.module if isinstance(self.model, LightningIPUModule) else self.model
-
     def pre_dispatch(self) -> None:
         self._handle_gradient_accumulation_steps()
         if self.convert_model_to_half:
@@ -117,12 +120,12 @@ def pre_dispatch(self) -> None:
         if self.lightning_module.trainer.state.stage is RunningStage.TRAINING:
             # Create model for training which will run training.
             optimizer = self.lightning_module.trainer.optimizers[0]
-            model = poptorch.trainingModel(model=model, options=self._create_opts(training=True), optimizer=optimizer)
+            model = poptorch.trainingModel(model=model, options=self.training_opts, optimizer=optimizer)
             self.poptorch_models[RunningStage.TRAINING] = model
         for x in (RunningStage.VALIDATING, RunningStage.TESTING, RunningStage.PREDICTING):
             model = poptorch.inferenceModel(
                 model=model,
-                options=self._create_opts(training=False),
+                options=self.inference_opts,
             )
             self.poptorch_models[x] = model
 
@@ -138,11 +141,55 @@ def _create_opts(self, training: bool):
         opts.Training.gradientAccumulation(gradient_accumulation)
         opts.autoRoundNumIPUs(self.autoround_num_ipus)
 
-        # todo (sean): unsure if this is necessary but to be safe.
         if os.environ.get("PL_GLOBAL_SEED"):
             opts.randomSeed(int(os.environ["PL_GLOBAL_SEED"]))
         return opts
 
+    @property
+    def training_opts(self) -> 'poptorch.Options':
+        if self._training_opts is None:
+            self._training_opts = self._create_opts(training=True)
+        self._validate_opts(self._training_opts, training=True)
+        return self._training_opts
+
+    @property
+    def inference_opts(self) -> 'poptorch.Options':
+        if self._inference_opts is None:
+            self._inference_opts = self._create_opts(training=False)
+        self._validate_opts(self._inference_opts, training=False)
+        return self._inference_opts
+
+    def _validate_opts(self, opts: 'poptorch.Options', training: bool) -> None:
+        if opts is not None:
+            if opts.replication_factor != self.replication_factor:
+                rank_zero_warn(
+                    f"Manual poptorch.Options set replicationFactor to {opts.replication_factor} "
+                    f"which differs to the ipus={self.replication_factor} flag passed to the Trainer. "
+                    f"Setting to {self.replication_factor} in the poptorch.Options.", UserWarning
+                )
+                opts.set(replication_factor=self.replication_factor)
+            if not training:
+                if opts.Training.gradient_accumulation != 1:
+                    rank_zero_warn(
+                        "Inference poptorch.Options should set gradientAccumulation to 1. "
+                        "Setting gradientAccumulation to 1 for inference options.", UserWarning
+                    )
+                    opts.Training.set(gradient_accumulation=1)
+            else:
+                accumulate_grad_batches = self.lightning_module.trainer.accumulate_grad_batches
+                if opts.Training.gradient_accumulation != self.lightning_module.trainer.accumulate_grad_batches:
+                    rank_zero_warn(
+                        f"Training poptorch.Options set gradientAccumulation to {opts.Training.gradient_accumulation}. "
+                        f"This is different to accumulate_grad_batches which was set to {accumulate_grad_batches}. "
+                        f"To change gradientAccumulation, please set accumulate_grad_batches in the Trainer. "
+                        f"Setting poptorch.Options gradientAccumulation to {accumulate_grad_batches}", UserWarning
+                    )
+                    opts.Training.set(gradient_accumulation=self.lightning_module.trainer.accumulate_grad_batches)
+
+    @property
+    def lightning_module(self) -> Optional[LightningModule]:
+        return self.model.module if isinstance(self.model, LightningIPUModule) else self.model
+
     def on_reset_train_dataloader(self, dataloader: Union[Iterable, DataLoader]) -> Union[Iterable, DataLoader]:
         return self.process_dataloader(dataloader)
 
@@ -191,7 +238,6 @@ def _convert_to_poptorch_loader(self, dataloader: Union[Iterable, DataLoader],
         dl_args['multiprocessing_context'] = multiprocessing_context
         if not contains_dataset:
             dl_args.pop('dataset')
-
         # Override to drop last uneven batch, as IPUs does not support uneven inputs.
         dl_args['drop_last'] = True
 
diff --git a/tests/accelerators/test_ipu.py b/tests/accelerators/test_ipu.py
index f5db660ffeb20..b1b5621e26d02 100644
--- a/tests/accelerators/test_ipu.py
+++ b/tests/accelerators/test_ipu.py
@@ -30,6 +30,9 @@
 from tests.helpers.runif import RunIf
 from tests.helpers.simple_models import ClassificationModel
 
+if _IPU_AVAILABLE:
+    import poptorch
+
 
 class IPUModel(BoringModel):
 
@@ -141,7 +144,6 @@ def test_optimization(tmpdir):
     trainer = Trainer(
         default_root_dir=tmpdir,
         max_epochs=1,
-        deterministic=True,
         ipus=2,
     )
 
@@ -167,7 +169,7 @@ def test_optimization(tmpdir):
 
     model = IPUClassificationModel.load_from_checkpoint(model_path)
 
-    trainer = Trainer(default_root_dir=tmpdir, deterministic=True)
+    trainer = Trainer(default_root_dir=tmpdir, ipus=2)
 
     result = trainer.test(model, datamodule=dm)
     saved_result = result[0]['test_acc']
@@ -330,3 +332,139 @@ def test_autoreport(tmpdir):
     trainer.fit(model)
     assert os.path.exists(autoreport_path)
     assert os.path.isfile(autoreport_path + 'profile.pop')
+
+
+@RunIf(ipu=True)
+def test_manual_poptorch_opts(tmpdir):
+    """Ensure if the user passes manual poptorch Options, we run with the correct object."""
+    model = IPUModel()
+    inference_opts = poptorch.Options()
+    inference_opts.deviceIterations(20)
+    inference_opts.replicationFactor(1)
+    inference_opts.Training.gradientAccumulation(1)
+
+    training_opts = poptorch.Options()
+    training_opts.deviceIterations(20)
+    training_opts.replicationFactor(1)
+    training_opts.Training.gradientAccumulation(1)
+
+    trainer = Trainer(
+        ipus=1, fast_dev_run=True, plugins=IPUPlugin(inference_opts=inference_opts, training_opts=training_opts)
+    )
+
+    assert isinstance(trainer.accelerator.training_type_plugin, IPUPlugin)
+    assert trainer.accelerator.training_type_plugin.training_opts == training_opts
+    assert trainer.accelerator.training_type_plugin.inference_opts == inference_opts
+    trainer.fit(model)
+
+
+@RunIf(ipu=True)
+def test_manual_poptorch_opts_ipu_count(tmpdir):
+    """
+    Ensure if the user passes manual poptorch Options
+    and the number of ipus do not match, we warn and we set it for the user.
+    """
+
+    manual_ipus = 1
+    expected_ipus = 2
+    model = IPUModel()
+    inference_opts = poptorch.Options()
+    inference_opts.replicationFactor(manual_ipus)
+
+    training_opts = poptorch.Options()
+    training_opts.replicationFactor(manual_ipus)
+
+    trainer = Trainer(
+        ipus=expected_ipus,
+        fast_dev_run=True,
+        plugins=IPUPlugin(inference_opts=inference_opts, training_opts=training_opts)
+    )
+    with pytest.warns(
+        UserWarning,
+        match=f"Manual poptorch.Options set replicationFactor to {manual_ipus} "
+        f"which differs to the ipus={expected_ipus} flag passed to the Trainer. "
+        f"Setting to {expected_ipus} in the poptorch.Options."
+    ):
+        trainer.fit(model)
+        assert isinstance(trainer.accelerator.training_type_plugin, IPUPlugin)
+        assert trainer.accelerator.training_type_plugin.training_opts.replication_factor == 2
+        assert trainer.accelerator.training_type_plugin.inference_opts.replication_factor == 2
+
+
+@RunIf(ipu=True)
+def test_manual_poptorch_opts_inference_grad_accum(tmpdir):
+    """
+    Ensure if the user passes manual poptorch Options
+    and grad accumulation is set greater than 1 for inference, we warn and set to 1.
+    """
+
+    model = IPUModel()
+    inference_opts = poptorch.Options()
+    inference_opts.Training.gradientAccumulation(4)
+
+    training_opts = poptorch.Options()
+    training_opts.Training.gradientAccumulation(1)
+
+    trainer = Trainer(
+        ipus=1, fast_dev_run=True, plugins=IPUPlugin(inference_opts=inference_opts, training_opts=training_opts)
+    )
+    with pytest.warns(
+        UserWarning,
+        match="Inference poptorch.Options should set gradientAccumulation to 1. "
+        "Setting gradientAccumulation to 1 for inference options.",
+    ):
+        trainer.fit(model)
+        assert isinstance(trainer.accelerator.training_type_plugin, IPUPlugin)
+        assert trainer.accelerator.training_type_plugin.inference_opts.Training.gradient_accumulation == 1
+
+
+@RunIf(ipu=True)
+def test_manual_poptorch_opts_train_grad_accum(tmpdir):
+    """
+    Ensure if the user passes manual poptorch Options
+    and grad accumulation differs to accumulate_grad_batches, we
+    """
+
+    model = IPUModel()
+    inference_opts = poptorch.Options()
+    inference_opts.Training.gradientAccumulation(1)
+
+    training_opts = poptorch.Options()
+    training_opts.Training.gradientAccumulation(2)
+
+    trainer = Trainer(
+        ipus=1,
+        fast_dev_run=True,
+        accumulate_grad_batches=1,
+        plugins=IPUPlugin(inference_opts=inference_opts, training_opts=training_opts)
+    )
+    with pytest.warns(
+        UserWarning,
+        match=f"Training poptorch.Options set gradientAccumulation to {2}. "
+        f"This is different to accumulate_grad_batches which was set to {1}. "
+        f"To change gradientAccumulation, please set accumulate_grad_batches in the Trainer. "
+        f"Setting poptorch.Options gradientAccumulation to {1}",
+    ):
+        trainer.fit(model)
+        assert isinstance(trainer.accelerator.training_type_plugin, IPUPlugin)
+        assert trainer.accelerator.training_type_plugin.inference_opts.Training.gradient_accumulation == 1
+
+
+@RunIf(ipu=True)
+def test_default_opts(tmpdir):
+    """
+    Ensure default opts are set correctly in the IPUPlugin.
+    """
+
+    model = IPUModel()
+
+    trainer = Trainer(ipus=1, fast_dev_run=True)
+    trainer.fit(model)
+    assert isinstance(trainer.accelerator.training_type_plugin, IPUPlugin)
+    inference_opts = trainer.accelerator.training_type_plugin.inference_opts
+    training_opts = trainer.accelerator.training_type_plugin.training_opts
+    for opts in (inference_opts, training_opts):
+        assert isinstance(opts, poptorch.Options)
+        assert opts.Training.gradient_accumulation == 1
+        assert opts.device_iterations == 1
+        assert opts.replication_factor == 1

From 08e5338203208e9c93b151442df69d624ab5d40c Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Wed, 9 Jun 2021 12:41:42 +0100
Subject: [PATCH 49/60] Fixes

---
 .../plugins/training_type/ipu.py              | 19 +++++++++----------
 tests/accelerators/test_ipu.py                |  9 +--------
 2 files changed, 10 insertions(+), 18 deletions(-)

diff --git a/pytorch_lightning/plugins/training_type/ipu.py b/pytorch_lightning/plugins/training_type/ipu.py
index a86e050391c57..d5e0a4a9900b0 100644
--- a/pytorch_lightning/plugins/training_type/ipu.py
+++ b/pytorch_lightning/plugins/training_type/ipu.py
@@ -168,23 +168,22 @@ def _validate_opts(self, opts: 'poptorch.Options', training: bool) -> None:
                     f"Setting to {self.replication_factor} in the poptorch.Options.", UserWarning
                 )
                 opts.set(replication_factor=self.replication_factor)
-            if not training:
-                if opts.Training.gradient_accumulation != 1:
-                    rank_zero_warn(
-                        "Inference poptorch.Options should set gradientAccumulation to 1. "
-                        "Setting gradientAccumulation to 1 for inference options.", UserWarning
-                    )
-                    opts.Training.set(gradient_accumulation=1)
-            else:
+            if training:
                 accumulate_grad_batches = self.lightning_module.trainer.accumulate_grad_batches
-                if opts.Training.gradient_accumulation != self.lightning_module.trainer.accumulate_grad_batches:
+                if opts.Training.gradient_accumulation != accumulate_grad_batches:
                     rank_zero_warn(
                         f"Training poptorch.Options set gradientAccumulation to {opts.Training.gradient_accumulation}. "
                         f"This is different to accumulate_grad_batches which was set to {accumulate_grad_batches}. "
                         f"To change gradientAccumulation, please set accumulate_grad_batches in the Trainer. "
                         f"Setting poptorch.Options gradientAccumulation to {accumulate_grad_batches}", UserWarning
                     )
-                    opts.Training.set(gradient_accumulation=self.lightning_module.trainer.accumulate_grad_batches)
+                    opts.Training.set(gradient_accumulation=accumulate_grad_batches)
+            elif opts.Training.gradient_accumulation != 1:
+                rank_zero_warn(
+                    "Inference poptorch.Options should set gradientAccumulation to 1. "
+                    "Setting gradientAccumulation to 1 for inference options.", UserWarning
+                )
+                opts.Training.set(gradient_accumulation=1)
 
     @property
     def lightning_module(self) -> Optional[LightningModule]:
diff --git a/tests/accelerators/test_ipu.py b/tests/accelerators/test_ipu.py
index b1b5621e26d02..ededc1ac8439c 100644
--- a/tests/accelerators/test_ipu.py
+++ b/tests/accelerators/test_ipu.py
@@ -339,23 +339,16 @@ def test_manual_poptorch_opts(tmpdir):
     """Ensure if the user passes manual poptorch Options, we run with the correct object."""
     model = IPUModel()
     inference_opts = poptorch.Options()
-    inference_opts.deviceIterations(20)
-    inference_opts.replicationFactor(1)
-    inference_opts.Training.gradientAccumulation(1)
-
     training_opts = poptorch.Options()
-    training_opts.deviceIterations(20)
-    training_opts.replicationFactor(1)
-    training_opts.Training.gradientAccumulation(1)
 
     trainer = Trainer(
         ipus=1, fast_dev_run=True, plugins=IPUPlugin(inference_opts=inference_opts, training_opts=training_opts)
     )
+    trainer.fit(model)
 
     assert isinstance(trainer.accelerator.training_type_plugin, IPUPlugin)
     assert trainer.accelerator.training_type_plugin.training_opts == training_opts
     assert trainer.accelerator.training_type_plugin.inference_opts == inference_opts
-    trainer.fit(model)
 
 
 @RunIf(ipu=True)

From 45dc6a66446e09e296e730770f1d1a11c68db03d Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Wed, 9 Jun 2021 12:45:14 +0100
Subject: [PATCH 50/60] Fix signature

---
 pytorch_lightning/plugins/precision/ipu_precision.py | 5 +++++
 pytorch_lightning/plugins/training_type/ipu.py       | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/pytorch_lightning/plugins/precision/ipu_precision.py b/pytorch_lightning/plugins/precision/ipu_precision.py
index b0da6e13e20f4..e6983966e166b 100644
--- a/pytorch_lightning/plugins/precision/ipu_precision.py
+++ b/pytorch_lightning/plugins/precision/ipu_precision.py
@@ -17,6 +17,7 @@
 from torch.nn import Module
 from torch.optim import Optimizer
 
+import pytorch_lightning as pl
 from pytorch_lightning.plugins.precision.precision_plugin import PrecisionPlugin
 from pytorch_lightning.utilities import GradClipAlgorithmType
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
@@ -30,7 +31,11 @@ def __init__(self, precision: int) -> None:
 
     def backward(
         self,
+        model: 'pl.LightningModule',
         closure_loss: Tensor,
+        optimizer: Optimizer,
+        opt_idx: int,
+        should_accumulate: bool,
         *args: Any,
         **kwargs: Any,
     ) -> Tensor:
diff --git a/pytorch_lightning/plugins/training_type/ipu.py b/pytorch_lightning/plugins/training_type/ipu.py
index d5e0a4a9900b0..75bd6ce092ed1 100644
--- a/pytorch_lightning/plugins/training_type/ipu.py
+++ b/pytorch_lightning/plugins/training_type/ipu.py
@@ -29,7 +29,7 @@ def __init__(self, pl_module: LightningModule, precision: Union[str, int]):
         super().__init__(pl_module)
         self.precision = precision
 
-    def forward(self, *inputs, **kwargs):
+    def forward(self, *inputs: Any, **kwargs: Any) -> Any:
         if self.precision in ("mixed", 16):
             inputs = self._move_float_tensors_to_half(inputs)
 

From de040c633a3b4f54be3c1829bfcdcc6427b822f7 Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Wed, 9 Jun 2021 12:47:22 +0100
Subject: [PATCH 51/60] Add types

---
 pytorch_lightning/accelerators/ipu.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pytorch_lightning/accelerators/ipu.py b/pytorch_lightning/accelerators/ipu.py
index 34bee31b5a91d..c9bee827af0e6 100644
--- a/pytorch_lightning/accelerators/ipu.py
+++ b/pytorch_lightning/accelerators/ipu.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from collections import Callable
+from typing import Any
 
 from torch.optim import Optimizer
 
@@ -29,6 +30,6 @@ def setup_optimizers(self, trainer: 'pl.Trainer') -> None:
         if len(self.optimizers) > 1:
             raise MisconfigurationException("IPUs currently only support one optimizer.")
 
-    def optimizer_step(self, optimizer: Optimizer, opt_idx: int, lambda_closure: Callable, **kwargs):
+    def optimizer_step(self, optimizer: Optimizer, opt_idx: int, lambda_closure: Callable, **kwargs: Any) -> None:
         # Optimizer step is handled by the IPU accelerator.
         lambda_closure()

From 42d7ab08365699da2a8bbd3f59a4a1eeaec294f2 Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Wed, 9 Jun 2021 15:37:46 +0100
Subject: [PATCH 52/60] Remove autoround

---
 pytorch_lightning/plugins/training_type/ipu.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/pytorch_lightning/plugins/training_type/ipu.py b/pytorch_lightning/plugins/training_type/ipu.py
index 75bd6ce092ed1..1d64636e5f43a 100644
--- a/pytorch_lightning/plugins/training_type/ipu.py
+++ b/pytorch_lightning/plugins/training_type/ipu.py
@@ -52,7 +52,6 @@ class IPUPlugin(ParallelPlugin):
     def __init__(
         self,
         device_iterations: int = 1,
-        autoround_num_ipus: bool = True,
         autoreport: bool = True,
         autoreport_dir: Optional[str] = None,
         convert_model_to_half: bool = False,
@@ -67,7 +66,6 @@ def __init__(
             device_iterations: Number of iterations to run on device at once before returning to host.
                 This can be used as an optimization to speed up training.
                 https://docs.graphcore.ai/projects/poptorch-user-guide/en/0.1.67/batching.html
-            autoround_num_ipus: When selecting multiple IPUs, auto-rounds to powers of 2 as required for IPUs.
             autoreport: Enable auto-reporting for IPUs using PopVision
                 https://docs.graphcore.ai/projects/graphcore-popvision-user-guide/en/latest/graph/graph.html
             autoreport_dir: Optional directory to store autoReport output.
@@ -85,7 +83,6 @@ def __init__(
 
         self.convert_model_to_half = convert_model_to_half
         self.device_iterations = device_iterations
-        self.autoround_num_ipus = autoround_num_ipus
         self.autoreport = autoreport
         self.autoreport_dir = autoreport_dir
         self.poptorch_models = {}
@@ -139,7 +136,6 @@ def _create_opts(self, training: bool):
         opts.replicationFactor(self.replication_factor)
         gradient_accumulation = self.lightning_module.trainer.accumulate_grad_batches if training else 1
         opts.Training.gradientAccumulation(gradient_accumulation)
-        opts.autoRoundNumIPUs(self.autoround_num_ipus)
 
         if os.environ.get("PL_GLOBAL_SEED"):
             opts.randomSeed(int(os.environ["PL_GLOBAL_SEED"]))

From 36f36720c99474c2d2578b7b7e79d5dff0e2dd7c Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Wed, 9 Jun 2021 19:42:02 +0100
Subject: [PATCH 53/60] Add docstring

---
 pytorch_lightning/trainer/trainer.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py
index eb205004fe070..16db47e61dd5d 100644
--- a/pytorch_lightning/trainer/trainer.py
+++ b/pytorch_lightning/trainer/trainer.py
@@ -284,6 +284,8 @@ def __init__(
 
             tpu_cores: How many TPU cores to train on (1 or 8) / Single TPU to train on [1]
 
+            ipus: How many IPUs to train on.
+
             track_grad_norm: -1 no tracking. Otherwise tracks that p-norm. May be set to 'inf' infinity-norm.
 
             truncated_bptt_steps: Deprecated in v1.3 to be removed in 1.5.

From f9d61c52929f64d016cead4b5cd251f13e818c39 Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Thu, 10 Jun 2021 21:53:28 +0100
Subject: [PATCH 54/60] ipu_cores -> ipus

---
 .../trainer/connectors/accelerator_connector.py    | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index 1d50a93b0b086..6b6c73ef327a8 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -83,7 +83,7 @@ def __init__(
         self,
         num_processes,
         tpu_cores,
-        ipu_cores,
+        ipus,
         distributed_backend,
         auto_select_gpus,
         gpus,
@@ -103,7 +103,7 @@ def __init__(
 
         self.num_processes = num_processes
         self.tpu_cores = device_parser.parse_tpu_cores(tpu_cores)
-        self.ipu_cores = ipu_cores
+        self.ipus = ipus
         self.distributed_backend = distributed_backend
         self.auto_select_gpus = auto_select_gpus
         self.gpus = gpus
@@ -256,7 +256,7 @@ def on_tpu(self) -> bool:
 
     @property
     def on_ipu(self) -> bool:
-        return self.ipu_cores is not None
+        return self.ipus is not None
 
     @property
     def tpu_id(self) -> Optional[int]:
@@ -334,8 +334,8 @@ def parallel_devices(self) -> List[Union[torch.device, int]]:
             if isinstance(self.tpu_cores, int):
                 devices = list(range(self.tpu_cores))
         elif self.on_ipu:
-            if isinstance(self.ipu_cores, int):
-                devices = list(range(self.ipu_cores))
+            if isinstance(self.ipus, int):
+                devices = list(range(self.ipus))
         else:
             devices = [torch.device("cpu")] * self.num_processes
         return devices
@@ -636,8 +636,8 @@ def set_distributed_mode(self, distributed_backend: Optional[str] = None):
         num_tpu_cores = self.tpu_cores if self.tpu_cores is not None else 0
         rank_zero_info(f'TPU available: {_TPU_AVAILABLE}, using: {num_tpu_cores} TPU cores')
 
-        num_ipu_cores = self.ipu_cores if self.ipu_cores is not None else 0
-        rank_zero_info(f'IPU available: {_IPU_AVAILABLE}, using: {num_ipu_cores} IPU cores')
+        num_ipus = self.ipus if self.ipus is not None else 0
+        rank_zero_info(f'IPU available: {_IPU_AVAILABLE}, using: {num_ipus} IPU cores')
 
         if torch.cuda.is_available() and self._device_type != DeviceType.GPU:
             rank_zero_warn(

From cf48ff86f0050d353bc93d8df843f3856e8a45a9 Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Fri, 11 Jun 2021 11:06:55 +0100
Subject: [PATCH 55/60] Add test, remove unnecessary precision set

---
 .../plugins/precision/ipu_precision.py            |  4 ----
 .../trainer/connectors/accelerator_connector.py   |  2 +-
 tests/accelerators/test_ipu.py                    | 15 +++++++++++++--
 3 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/pytorch_lightning/plugins/precision/ipu_precision.py b/pytorch_lightning/plugins/precision/ipu_precision.py
index e6983966e166b..21510f2914180 100644
--- a/pytorch_lightning/plugins/precision/ipu_precision.py
+++ b/pytorch_lightning/plugins/precision/ipu_precision.py
@@ -25,10 +25,6 @@
 
 class IPUPrecisionPlugin(PrecisionPlugin):
 
-    def __init__(self, precision: int) -> None:
-        super().__init__()
-        self.precision = precision
-
     def backward(
         self,
         model: 'pl.LightningModule',
diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index 6b6c73ef327a8..feceb386b9aa9 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -369,7 +369,7 @@ def select_precision_plugin(self) -> PrecisionPlugin:
         self.amp_type = AMPType.from_str(self.amp_type)
 
         if self.on_ipu:
-            return IPUPrecisionPlugin(self.precision)
+            return IPUPrecisionPlugin()
 
         if self._distrib_type == DistributedType.DEEPSPEED or isinstance(self._training_type_plugin, DeepSpeedPlugin):
             return DeepSpeedPrecisionPlugin(self.precision)
diff --git a/tests/accelerators/test_ipu.py b/tests/accelerators/test_ipu.py
index ededc1ac8439c..4feb117ebf543 100644
--- a/tests/accelerators/test_ipu.py
+++ b/tests/accelerators/test_ipu.py
@@ -183,7 +183,6 @@ class TestCallback(Callback):
 
         def setup(self, trainer: Trainer, pl_module: LightningModule, stage: Optional[str] = None) -> None:
             assert isinstance(trainer.accelerator.precision_plugin, IPUPrecisionPlugin)
-            assert trainer.accelerator.precision_plugin.precision == 16
             assert trainer.accelerator.model.precision == 16
             raise SystemExit
 
@@ -201,7 +200,6 @@ class TestCallback(Callback):
         def on_train_start(self, trainer: Trainer, pl_module: LightningModule) -> None:
             assert isinstance(trainer.accelerator.training_type_plugin, IPUPlugin)
             assert isinstance(trainer.accelerator.precision_plugin, IPUPrecisionPlugin)
-            assert trainer.accelerator.precision_plugin.precision == 16
             assert trainer.accelerator.model.precision == 16
             assert trainer.accelerator.training_type_plugin.convert_model_to_half
             for param in trainer.accelerator.model.parameters():
@@ -461,3 +459,16 @@ def test_default_opts(tmpdir):
         assert opts.Training.gradient_accumulation == 1
         assert opts.device_iterations == 1
         assert opts.replication_factor == 1
+
+
+@RunIf(ipu=True)
+def test_clip_val_fail(tmpdir):
+    """
+    Ensure if clipping value is greater than 0 or not None, we throw an exception.
+    """
+
+    model = IPUModel()
+
+    trainer = Trainer(ipus=1, fast_dev_run=True, gradient_clip_val=10)
+    with pytest.raises(MisconfigurationException, match="IPUs currently do not support clipping gradients."):
+        trainer.fit(model)

From 02a75b518c4a9b47946171ecf8c2bbab04b23d19 Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Fri, 11 Jun 2021 11:10:29 +0100
Subject: [PATCH 56/60] Add optimizer test

---
 tests/accelerators/test_ipu.py | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/tests/accelerators/test_ipu.py b/tests/accelerators/test_ipu.py
index 4feb117ebf543..3f26148767126 100644
--- a/tests/accelerators/test_ipu.py
+++ b/tests/accelerators/test_ipu.py
@@ -469,6 +469,24 @@ def test_clip_val_fail(tmpdir):
 
     model = IPUModel()
 
-    trainer = Trainer(ipus=1, fast_dev_run=True, gradient_clip_val=10)
+    trainer = Trainer(ipus=1, gradient_clip_val=10)
     with pytest.raises(MisconfigurationException, match="IPUs currently do not support clipping gradients."):
         trainer.fit(model)
+
+
+@RunIf(ipu=True)
+def test_multi_optimizers_fail(tmpdir):
+    """
+    Ensure if there are multiple optimizers, we throw an exception
+    """
+
+    class TestModel(IPUModel):
+
+        def configure_optimizers(self):
+            return [torch.optim.Adam(self.parameters()), torch.optim.Adam(self.parameters())]
+
+    model = TestModel()
+
+    trainer = Trainer(ipus=1)
+    with pytest.raises(MisconfigurationException, match="IPUs currently only support one optimizer."):
+        trainer.fit(model)

From d18fc559f44e49a66b5fa6eba13aa75ada92e681 Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Fri, 11 Jun 2021 12:58:18 +0100
Subject: [PATCH 57/60] Add precision back with test

---
 pytorch_lightning/plugins/precision/ipu_precision.py |  4 ++++
 .../trainer/connectors/accelerator_connector.py      |  2 +-
 tests/accelerators/test_ipu.py                       | 12 ++++++++++++
 3 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/pytorch_lightning/plugins/precision/ipu_precision.py b/pytorch_lightning/plugins/precision/ipu_precision.py
index 21510f2914180..e6983966e166b 100644
--- a/pytorch_lightning/plugins/precision/ipu_precision.py
+++ b/pytorch_lightning/plugins/precision/ipu_precision.py
@@ -25,6 +25,10 @@
 
 class IPUPrecisionPlugin(PrecisionPlugin):
 
+    def __init__(self, precision: int) -> None:
+        super().__init__()
+        self.precision = precision
+
     def backward(
         self,
         model: 'pl.LightningModule',
diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index feceb386b9aa9..6b6c73ef327a8 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -369,7 +369,7 @@ def select_precision_plugin(self) -> PrecisionPlugin:
         self.amp_type = AMPType.from_str(self.amp_type)
 
         if self.on_ipu:
-            return IPUPrecisionPlugin()
+            return IPUPrecisionPlugin(self.precision)
 
         if self._distrib_type == DistributedType.DEEPSPEED or isinstance(self._training_type_plugin, DeepSpeedPlugin):
             return DeepSpeedPrecisionPlugin(self.precision)
diff --git a/tests/accelerators/test_ipu.py b/tests/accelerators/test_ipu.py
index 3f26148767126..67a70b1af8999 100644
--- a/tests/accelerators/test_ipu.py
+++ b/tests/accelerators/test_ipu.py
@@ -183,6 +183,7 @@ class TestCallback(Callback):
 
         def setup(self, trainer: Trainer, pl_module: LightningModule, stage: Optional[str] = None) -> None:
             assert isinstance(trainer.accelerator.precision_plugin, IPUPrecisionPlugin)
+            assert trainer.accelerator.precision_plugin.precision == 16
             assert trainer.accelerator.model.precision == 16
             raise SystemExit
 
@@ -200,6 +201,7 @@ class TestCallback(Callback):
         def on_train_start(self, trainer: Trainer, pl_module: LightningModule) -> None:
             assert isinstance(trainer.accelerator.training_type_plugin, IPUPlugin)
             assert isinstance(trainer.accelerator.precision_plugin, IPUPrecisionPlugin)
+            assert trainer.accelerator.precision_plugin.precision == 16
             assert trainer.accelerator.model.precision == 16
             assert trainer.accelerator.training_type_plugin.convert_model_to_half
             for param in trainer.accelerator.model.parameters():
@@ -490,3 +492,13 @@ def configure_optimizers(self):
     trainer = Trainer(ipus=1)
     with pytest.raises(MisconfigurationException, match="IPUs currently only support one optimizer."):
         trainer.fit(model)
+
+
+@RunIf(ipu=True)
+def test_precision_plugin(tmpdir):
+    """
+    Ensure precision plugin value is set correctly.
+    """
+
+    plugin = IPUPrecisionPlugin(precision=16)
+    assert plugin.precision == 16

From 043884af85baad959545048f0bb29c4d1abaa147 Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Fri, 11 Jun 2021 15:17:15 +0100
Subject: [PATCH 58/60] Address code review

---
 .../plugins/training_type/ipu.py              | 23 ++++--
 .../connectors/accelerator_connector.py       |  2 +-
 tests/accelerators/test_ipu.py                | 71 +++++++++++--------
 3 files changed, 58 insertions(+), 38 deletions(-)

diff --git a/pytorch_lightning/plugins/training_type/ipu.py b/pytorch_lightning/plugins/training_type/ipu.py
index 1d64636e5f43a..9cacbed585aa9 100644
--- a/pytorch_lightning/plugins/training_type/ipu.py
+++ b/pytorch_lightning/plugins/training_type/ipu.py
@@ -1,3 +1,16 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import inspect
 import json
 import os
@@ -94,9 +107,7 @@ def __init__(
             options = {"autoReport.all": self.autoreport}
             if self.autoreport_dir:
                 self._fs = get_filesystem(str(self.autoreport_dir))
-
-                if not self._fs.exists(self.autoreport_dir):
-                    self._fs.makedirs(self.autoreport_dir)
+                self._fs.makedirs(self.autoreport_dir, exist_ok=True)
                 options["autoReport.directory"] = self.autoreport_dir
             os.environ["POPLAR_ENGINE_OPTIONS"] = json.dumps(options)
 
@@ -161,7 +172,7 @@ def _validate_opts(self, opts: 'poptorch.Options', training: bool) -> None:
                 rank_zero_warn(
                     f"Manual poptorch.Options set replicationFactor to {opts.replication_factor} "
                     f"which differs to the ipus={self.replication_factor} flag passed to the Trainer. "
-                    f"Setting to {self.replication_factor} in the poptorch.Options.", UserWarning
+                    f"Setting to {self.replication_factor} in the poptorch.Options."
                 )
                 opts.set(replication_factor=self.replication_factor)
             if training:
@@ -171,13 +182,13 @@ def _validate_opts(self, opts: 'poptorch.Options', training: bool) -> None:
                         f"Training poptorch.Options set gradientAccumulation to {opts.Training.gradient_accumulation}. "
                         f"This is different to accumulate_grad_batches which was set to {accumulate_grad_batches}. "
                         f"To change gradientAccumulation, please set accumulate_grad_batches in the Trainer. "
-                        f"Setting poptorch.Options gradientAccumulation to {accumulate_grad_batches}", UserWarning
+                        f"Setting poptorch.Options gradientAccumulation to {accumulate_grad_batches}"
                     )
                     opts.Training.set(gradient_accumulation=accumulate_grad_batches)
             elif opts.Training.gradient_accumulation != 1:
                 rank_zero_warn(
                     "Inference poptorch.Options should set gradientAccumulation to 1. "
-                    "Setting gradientAccumulation to 1 for inference options.", UserWarning
+                    "Setting gradientAccumulation to 1 for inference options."
                 )
                 opts.Training.set(gradient_accumulation=1)
 
diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index 6b6c73ef327a8..8f5de9a6302aa 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -637,7 +637,7 @@ def set_distributed_mode(self, distributed_backend: Optional[str] = None):
         rank_zero_info(f'TPU available: {_TPU_AVAILABLE}, using: {num_tpu_cores} TPU cores')
 
         num_ipus = self.ipus if self.ipus is not None else 0
-        rank_zero_info(f'IPU available: {_IPU_AVAILABLE}, using: {num_ipus} IPU cores')
+        rank_zero_info(f'IPU available: {_IPU_AVAILABLE}, using: {num_ipus} IPUs')
 
         if torch.cuda.is_available() and self._device_type != DeviceType.GPU:
             rank_zero_warn(
diff --git a/tests/accelerators/test_ipu.py b/tests/accelerators/test_ipu.py
index 67a70b1af8999..bf5f0ff6ca125 100644
--- a/tests/accelerators/test_ipu.py
+++ b/tests/accelerators/test_ipu.py
@@ -98,17 +98,17 @@ def test_epoch_end(self, outputs) -> None:
 @pytest.mark.skipif(_IPU_AVAILABLE, reason="test requires non-IPU machine")
 def test_fail_if_no_ipus(tmpdir):
     with pytest.raises(MisconfigurationException, match="IPU Accelerator requires IPU devices to run"):
-        Trainer(ipus=1)
+        Trainer(default_root_dir=tmpdir, ipus=1)
 
     with pytest.raises(MisconfigurationException, match="IPU Accelerator requires IPU devices to run"):
-        Trainer(ipus=1, accelerator='ipu')
+        Trainer(default_root_dir=tmpdir, ipus=1, accelerator='ipu')
 
 
 @RunIf(ipu=True)
 def test_accelerator_selected(tmpdir):
-    trainer = Trainer(ipus=1)
+    trainer = Trainer(default_root_dir=tmpdir, ipus=1)
     assert isinstance(trainer.accelerator, IPUAccelerator)
-    trainer = Trainer(ipus=1, accelerator='ipu')
+    trainer = Trainer(default_root_dir=tmpdir, ipus=1, accelerator='ipu')
     assert isinstance(trainer.accelerator, IPUAccelerator)
 
 
@@ -116,7 +116,7 @@ def test_accelerator_selected(tmpdir):
 @pytest.mark.parametrize('ipus', [1, 4])
 def test_all_stages(tmpdir, ipus):
     model = IPUModel()
-    trainer = Trainer(fast_dev_run=True, ipus=ipus)
+    trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True, ipus=ipus)
     trainer.fit(model)
     trainer.validate(model)
     trainer.test(model)
@@ -128,7 +128,7 @@ def test_all_stages(tmpdir, ipus):
 def test_inference_only(tmpdir, ipus):
     model = IPUModel()
 
-    trainer = Trainer(fast_dev_run=True, ipus=ipus)
+    trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True, ipus=ipus)
     trainer.validate(model)
     trainer.test(model)
     trainer.predict(model, model.val_dataloader())
@@ -188,7 +188,7 @@ def setup(self, trainer: Trainer, pl_module: LightningModule, stage: Optional[st
             raise SystemExit
 
     model = IPUModel()
-    trainer = Trainer(fast_dev_run=True, ipus=1, precision=16, callbacks=TestCallback())
+    trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True, ipus=1, precision=16, callbacks=TestCallback())
     with pytest.raises(SystemExit):
         trainer.fit(model)
 
@@ -210,6 +210,7 @@ def on_train_start(self, trainer: Trainer, pl_module: LightningModule) -> None:
 
     model = IPUModel()
     trainer = Trainer(
+        default_root_dir=tmpdir,
         fast_dev_run=True,
         ipus=1,
         precision=16,
@@ -234,7 +235,13 @@ def on_train_start(self, trainer: Trainer, pl_module: LightningModule) -> None:
             raise SystemExit
 
     model = IPUModel()
-    trainer = Trainer(fast_dev_run=True, ipus=1, plugins=IPUPlugin(device_iterations=2), callbacks=TestCallback())
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        fast_dev_run=True,
+        ipus=1,
+        plugins=IPUPlugin(device_iterations=2),
+        callbacks=TestCallback()
+    )
     with pytest.raises(SystemExit):
         trainer.fit(model)
 
@@ -254,7 +261,9 @@ def on_train_start(self, trainer: Trainer, pl_module: LightningModule) -> None:
             raise SystemExit
 
     model = IPUModel()
-    trainer = Trainer(fast_dev_run=True, ipus=1, accumulate_grad_batches=2, callbacks=TestCallback())
+    trainer = Trainer(
+        default_root_dir=tmpdir, fast_dev_run=True, ipus=1, accumulate_grad_batches=2, callbacks=TestCallback()
+    )
     with pytest.raises(SystemExit):
         trainer.fit(model)
 
@@ -298,7 +307,7 @@ def on_predict_batch_end(self, trainer, pl_module, outputs, batch, batch_idx, da
             assert torch.all(outputs == 4).item()
 
     model = StageModel()
-    trainer = Trainer(fast_dev_run=True, ipus=1, callbacks=TestCallback())
+    trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True, ipus=1, callbacks=TestCallback())
     trainer.fit(model)
     trainer.test(model)
     trainer.validate(model)
@@ -308,7 +317,7 @@ def on_predict_batch_end(self, trainer, pl_module, outputs, batch, batch_idx, da
 @RunIf(ipu=True)
 def test_accumulate_grad_batches_dict_fails(tmpdir):
     model = IPUModel()
-    trainer = Trainer(ipus=1, accumulate_grad_batches={0: 1})
+    trainer = Trainer(default_root_dir=tmpdir, ipus=1, accumulate_grad_batches={0: 1})
     with pytest.raises(
         MisconfigurationException, match="IPUs currently only support accumulate_grad_batches being an integer value."
     ):
@@ -318,7 +327,7 @@ def test_accumulate_grad_batches_dict_fails(tmpdir):
 @RunIf(ipu=True)
 def test_clip_gradients_fails(tmpdir):
     model = IPUModel()
-    trainer = Trainer(ipus=1, gradient_clip_val=10)
+    trainer = Trainer(default_root_dir=tmpdir, ipus=1, gradient_clip_val=10)
     with pytest.raises(MisconfigurationException, match="IPUs currently do not support clipping gradients."):
         trainer.fit(model)
 
@@ -328,7 +337,12 @@ def test_autoreport(tmpdir):
     """Ensure autoreport dumps to a file."""
     model = IPUModel()
     autoreport_path = os.path.join(tmpdir, 'report/')
-    trainer = Trainer(ipus=1, fast_dev_run=True, plugins=IPUPlugin(autoreport=True, autoreport_dir=autoreport_path))
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        ipus=1,
+        fast_dev_run=True,
+        plugins=IPUPlugin(autoreport=True, autoreport_dir=autoreport_path)
+    )
     trainer.fit(model)
     assert os.path.exists(autoreport_path)
     assert os.path.isfile(autoreport_path + 'profile.pop')
@@ -342,7 +356,10 @@ def test_manual_poptorch_opts(tmpdir):
     training_opts = poptorch.Options()
 
     trainer = Trainer(
-        ipus=1, fast_dev_run=True, plugins=IPUPlugin(inference_opts=inference_opts, training_opts=training_opts)
+        default_root_dir=tmpdir,
+        ipus=1,
+        fast_dev_run=True,
+        plugins=IPUPlugin(inference_opts=inference_opts, training_opts=training_opts)
     )
     trainer.fit(model)
 
@@ -368,6 +385,7 @@ def test_manual_poptorch_opts_ipu_count(tmpdir):
     training_opts.replicationFactor(manual_ipus)
 
     trainer = Trainer(
+        default_root_dir=tmpdir,
         ipus=expected_ipus,
         fast_dev_run=True,
         plugins=IPUPlugin(inference_opts=inference_opts, training_opts=training_opts)
@@ -399,7 +417,10 @@ def test_manual_poptorch_opts_inference_grad_accum(tmpdir):
     training_opts.Training.gradientAccumulation(1)
 
     trainer = Trainer(
-        ipus=1, fast_dev_run=True, plugins=IPUPlugin(inference_opts=inference_opts, training_opts=training_opts)
+        default_root_dir=tmpdir,
+        ipus=1,
+        fast_dev_run=True,
+        plugins=IPUPlugin(inference_opts=inference_opts, training_opts=training_opts)
     )
     with pytest.warns(
         UserWarning,
@@ -426,6 +447,7 @@ def test_manual_poptorch_opts_train_grad_accum(tmpdir):
     training_opts.Training.gradientAccumulation(2)
 
     trainer = Trainer(
+        default_root_dir=tmpdir,
         ipus=1,
         fast_dev_run=True,
         accumulate_grad_batches=1,
@@ -451,7 +473,7 @@ def test_default_opts(tmpdir):
 
     model = IPUModel()
 
-    trainer = Trainer(ipus=1, fast_dev_run=True)
+    trainer = Trainer(default_root_dir=tmpdir, ipus=1, fast_dev_run=True)
     trainer.fit(model)
     assert isinstance(trainer.accelerator.training_type_plugin, IPUPlugin)
     inference_opts = trainer.accelerator.training_type_plugin.inference_opts
@@ -464,20 +486,7 @@ def test_default_opts(tmpdir):
 
 
 @RunIf(ipu=True)
-def test_clip_val_fail(tmpdir):
-    """
-    Ensure if clipping value is greater than 0 or not None, we throw an exception.
-    """
-
-    model = IPUModel()
-
-    trainer = Trainer(ipus=1, gradient_clip_val=10)
-    with pytest.raises(MisconfigurationException, match="IPUs currently do not support clipping gradients."):
-        trainer.fit(model)
-
-
-@RunIf(ipu=True)
-def test_multi_optimizers_fail(tmpdir):
+def test_multi_optimizers_fails(tmpdir):
     """
     Ensure if there are multiple optimizers, we throw an exception
     """
@@ -489,7 +498,7 @@ def configure_optimizers(self):
 
     model = TestModel()
 
-    trainer = Trainer(ipus=1)
+    trainer = Trainer(default_root_dir=tmpdir, ipus=1)
     with pytest.raises(MisconfigurationException, match="IPUs currently only support one optimizer."):
         trainer.fit(model)
 

From b2493913ec965f190d51e1f9316e128774bbe91a Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Fri, 11 Jun 2021 15:24:13 +0100
Subject: [PATCH 59/60] Change to probs

---
 pl_examples/ipu_examples/mnist.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pl_examples/ipu_examples/mnist.py b/pl_examples/ipu_examples/mnist.py
index 87087d67766b2..37cb63c076e2e 100644
--- a/pl_examples/ipu_examples/mnist.py
+++ b/pl_examples/ipu_examples/mnist.py
@@ -46,11 +46,11 @@ def training_step(self, batch, batch_idx):
 
     def validation_step(self, batch, batch_idx):
         x, y = batch
-        logits = self(x)
+        probs = self(x)
         # we currently return the accuracy as the validation_step/test_step is run on the IPU devices.
         # Outputs from the step functions are sent to the host device, where we calculate the metrics in
         # validation_epoch_end and test_epoch_end for the test_step.
-        acc = self.accuracy(logits, y)
+        acc = self.accuracy(probs, y)
         return acc
 
     def test_step(self, batch, batch_idx):

From b0dd20609b7e0ed3ea12f63e9a68a32509bcd3cb Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Fri, 11 Jun 2021 15:26:43 +0100
Subject: [PATCH 60/60] Move some of the asserts earlier

---
 tests/accelerators/test_ipu.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/tests/accelerators/test_ipu.py b/tests/accelerators/test_ipu.py
index bf5f0ff6ca125..52496e28b2230 100644
--- a/tests/accelerators/test_ipu.py
+++ b/tests/accelerators/test_ipu.py
@@ -182,13 +182,13 @@ def test_mixed_precision(tmpdir):
     class TestCallback(Callback):
 
         def setup(self, trainer: Trainer, pl_module: LightningModule, stage: Optional[str] = None) -> None:
-            assert isinstance(trainer.accelerator.precision_plugin, IPUPrecisionPlugin)
-            assert trainer.accelerator.precision_plugin.precision == 16
             assert trainer.accelerator.model.precision == 16
             raise SystemExit
 
     model = IPUModel()
     trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True, ipus=1, precision=16, callbacks=TestCallback())
+    assert isinstance(trainer.accelerator.precision_plugin, IPUPrecisionPlugin)
+    assert trainer.accelerator.precision_plugin.precision == 16
     with pytest.raises(SystemExit):
         trainer.fit(model)
 
@@ -199,9 +199,6 @@ def test_pure_half_precision(tmpdir):
     class TestCallback(Callback):
 
         def on_train_start(self, trainer: Trainer, pl_module: LightningModule) -> None:
-            assert isinstance(trainer.accelerator.training_type_plugin, IPUPlugin)
-            assert isinstance(trainer.accelerator.precision_plugin, IPUPrecisionPlugin)
-            assert trainer.accelerator.precision_plugin.precision == 16
             assert trainer.accelerator.model.precision == 16
             assert trainer.accelerator.training_type_plugin.convert_model_to_half
             for param in trainer.accelerator.model.parameters():
@@ -217,6 +214,11 @@ def on_train_start(self, trainer: Trainer, pl_module: LightningModule) -> None:
         plugins=IPUPlugin(convert_model_to_half=True),
         callbacks=TestCallback()
     )
+
+    assert isinstance(trainer.accelerator.training_type_plugin, IPUPlugin)
+    assert isinstance(trainer.accelerator.precision_plugin, IPUPrecisionPlugin)
+    assert trainer.accelerator.precision_plugin.precision == 16
+
     with pytest.raises(SystemExit):
         trainer.fit(model)
 
@@ -227,7 +229,6 @@ def test_device_iterations_ipu_plugin(tmpdir):
     class TestCallback(Callback):
 
         def on_train_start(self, trainer: Trainer, pl_module: LightningModule) -> None:
-            assert isinstance(trainer.accelerator.training_type_plugin, IPUPlugin)
             assert trainer.accelerator.training_type_plugin.device_iterations == 2
             # assert device iterations has been set correctly within the poptorch options
             poptorch_model = trainer.accelerator.training_type_plugin.poptorch_models[RunningStage.TRAINING]
@@ -242,6 +243,7 @@ def on_train_start(self, trainer: Trainer, pl_module: LightningModule) -> None:
         plugins=IPUPlugin(device_iterations=2),
         callbacks=TestCallback()
     )
+    assert isinstance(trainer.accelerator.training_type_plugin, IPUPlugin)
     with pytest.raises(SystemExit):
         trainer.fit(model)