From b3e8e94da27e517e4f92b4d806e1d0337264167c Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Wed, 23 Jun 2021 14:41:55 +0100
Subject: [PATCH 1/6] Fixes to ensure ipu options are respected

---
 pytorch_lightning/plugins/training_type/ipu.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/pytorch_lightning/plugins/training_type/ipu.py b/pytorch_lightning/plugins/training_type/ipu.py
index 4e75358b67fae..7330dc92df592 100644
--- a/pytorch_lightning/plugins/training_type/ipu.py
+++ b/pytorch_lightning/plugins/training_type/ipu.py
@@ -211,9 +211,8 @@ def process_dataloader(self, dataloader: Union[Iterable, DataLoader]) -> Union[I
             dataloader = apply_to_collection(dataloader, DataLoader, self.process_dataloader)
             return dataloader
         if not isinstance(dataloader, poptorch.DataLoader):
-            dataloader = self._convert_to_poptorch_loader(
-                dataloader=dataloader, opts=self._create_opts(training=self.lightning_module.training)
-            )
+            opts = self.training_opts if self.lightning_module.training else self.inference_opts
+            dataloader = self._convert_to_poptorch_loader(dataloader=dataloader, opts=opts)
         return dataloader
 
     def _convert_to_poptorch_loader(self, dataloader: Union[Iterable, DataLoader],
@@ -266,9 +265,11 @@ def update_global_step(self, total_batch_idx: int, current_global_step: int) ->
 
     @property
     def _n_replicate(self):
-        # Ensure we replicate values to have enough dimensions to split across devices
-        accumulate_grad_batches = self._original_accumulate_grad_batches
-        return self.replication_factor * self.device_iterations * accumulate_grad_batches
+        opts = self.training_opts if self.lightning_module.training else self.inference_opts
+        accumulate_grad_batches = opts.Training.gradient_accumulation
+        device_iterations = opts.device_iterations
+        replication_factor = opts.replication_factor
+        return replication_factor * device_iterations * accumulate_grad_batches
 
     def _prepare_input(self, args: Any):
 

From 661c71c2834f35e86cd978c1c9ad57b71fb7554c Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Wed, 23 Jun 2021 14:59:28 +0100
Subject: [PATCH 2/6] Better setter

---
 pytorch_lightning/plugins/training_type/ipu.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pytorch_lightning/plugins/training_type/ipu.py b/pytorch_lightning/plugins/training_type/ipu.py
index 7330dc92df592..66ed49650d4f0 100644
--- a/pytorch_lightning/plugins/training_type/ipu.py
+++ b/pytorch_lightning/plugins/training_type/ipu.py
@@ -211,7 +211,8 @@ def process_dataloader(self, dataloader: Union[Iterable, DataLoader]) -> Union[I
             dataloader = apply_to_collection(dataloader, DataLoader, self.process_dataloader)
             return dataloader
         if not isinstance(dataloader, poptorch.DataLoader):
-            opts = self.training_opts if self.lightning_module.training else self.inference_opts
+            is_training = self.lightning_module.trainer.state.stage is RunningStage.TRAINING
+            opts = self.training_opts if is_training else self.inference_opts
             dataloader = self._convert_to_poptorch_loader(dataloader=dataloader, opts=opts)
         return dataloader
 

From 2d9c663103695d5bc679ca6b5c1ccfcfae13c5bf Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Wed, 30 Jun 2021 10:46:47 +0100
Subject: [PATCH 3/6] Add test for poptorch Options

---
 .../plugins/training_type/ipu.py              | 31 +++++++++------
 tests/accelerators/test_ipu.py                | 39 +++++++++++++++++++
 2 files changed, 59 insertions(+), 11 deletions(-)

diff --git a/pytorch_lightning/plugins/training_type/ipu.py b/pytorch_lightning/plugins/training_type/ipu.py
index 66ed49650d4f0..6365f9cb13a71 100644
--- a/pytorch_lightning/plugins/training_type/ipu.py
+++ b/pytorch_lightning/plugins/training_type/ipu.py
@@ -108,7 +108,6 @@ def __init__(
             os.environ["POPLAR_ENGINE_OPTIONS"] = json.dumps(options)
 
     def pre_dispatch(self) -> None:
-        self._handle_gradient_accumulation_steps()
         precision = self.lightning_module.trainer.precision
         model = LightningIPUModule(self.lightning_module, precision)
         self.model = model
@@ -127,6 +126,7 @@ def pre_dispatch(self) -> None:
                 options=self.inference_opts,
             )
             self.poptorch_models[x] = model
+        self._handle_gradient_accumulation_steps()
 
     @property
     def replication_factor(self):
@@ -167,7 +167,7 @@ def _validate_opts(self, opts: 'poptorch.Options', training: bool) -> None:
                 )
                 opts.set(replication_factor=self.replication_factor)
             if training:
-                accumulate_grad_batches = self.lightning_module.trainer.accumulate_grad_batches
+                accumulate_grad_batches = self.accumulate_grad_batches
                 if opts.Training.gradient_accumulation != accumulate_grad_batches:
                     rank_zero_warn(
                         f"Training poptorch.Options set gradientAccumulation to {opts.Training.gradient_accumulation}. "
@@ -242,24 +242,33 @@ def _convert_to_poptorch_loader(self, dataloader: Union[Iterable, DataLoader],
         dataloader.multiprocessing_context = multiprocessing_context
         return dataloader
 
+    @property
+    def accumulate_grad_batches(self) -> int:
+        """
+        Tracks lazily the set accumulate_grad_batches in the trainer.
+        The IPUPlugin replaces the original accumulate_grad_batches.
+        """
+        if self._original_accumulate_grad_batches is None:
+            self._original_accumulate_grad_batches = self.lightning_module.trainer.accumulate_grad_batches
+            if not isinstance(self._original_accumulate_grad_batches, int):
+                raise MisconfigurationException(
+                    f"IPUs currently only support accumulate_grad_batches being an integer value. "
+                    f"Received {self.accumulate_grad_batches}"
+                )
+        return self._original_accumulate_grad_batches
+
     def _handle_gradient_accumulation_steps(self):
         """
         This functions overrides the trainer.accumulation_scheduler to generate
         ``accumulate_grad_batches=1``.
         Therefore, ``optimizer_step`` will be called on every batch, and the IPU will handle grad accumulation.
         """
-        self._original_accumulate_grad_batches = self.lightning_module.trainer.accumulate_grad_batches
-        if not isinstance(self._original_accumulate_grad_batches, int):
-            raise MisconfigurationException(
-                f"IPUs currently only support accumulate_grad_batches being an integer value. "
-                f"Received {self._original_accumulate_grad_batches}"
-            )
-        if self._original_accumulate_grad_batches > 1:
+        if self.accumulate_grad_batches > 1:
             self.lightning_module.trainer.accumulation_scheduler = GradientAccumulationScheduler({0: 1})
 
     def update_global_step(self, total_batch_idx: int, current_global_step: int) -> int:
-        if self._original_accumulate_grad_batches > 1:
-            if total_batch_idx % self._original_accumulate_grad_batches == 0:
+        if self.accumulate_grad_batches > 1:
+            if total_batch_idx % self.accumulate_grad_batches == 0:
                 current_global_step += 1
             return current_global_step
         return super().update_global_step(total_batch_idx, current_global_step)
diff --git a/tests/accelerators/test_ipu.py b/tests/accelerators/test_ipu.py
index eb5a5349483bd..658a8d6f15b04 100644
--- a/tests/accelerators/test_ipu.py
+++ b/tests/accelerators/test_ipu.py
@@ -460,6 +460,45 @@ def test_manual_poptorch_opts_train_grad_accum(tmpdir):
         assert trainer.accelerator.training_type_plugin.inference_opts.Training.gradient_accumulation == 1
 
 
+@RunIf(ipu=True)
+def test_manual_poptorch_opts_custom(tmpdir):
+    """
+    Ensure if the user passes manual poptorch Options with custom parameters set,
+    we respect them in our poptorch options.
+    """
+
+    model = IPUModel()
+    inference_opts = poptorch.Options()
+    inference_opts.deviceIterations(16)
+    inference_opts.replicationFactor(2)
+    inference_opts.Training.gradientAccumulation(1)
+
+    training_opts = poptorch.Options()
+    training_opts.deviceIterations(8)
+    inference_opts.replicationFactor(2)
+    training_opts.Training.gradientAccumulation(2)
+
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        ipus=2,
+        fast_dev_run=True,
+        accumulate_grad_batches=2,
+        plugins=IPUPlugin(inference_opts=inference_opts, training_opts=training_opts)
+    )
+    trainer.fit(model)
+    plugin = trainer.accelerator.training_type_plugin
+    assert isinstance(plugin, IPUPlugin)
+    inference_opts = plugin.inference_opts
+    training_opts = plugin.training_opts
+    assert inference_opts.device_iterations == 16
+    assert inference_opts.replication_factor == 2
+    assert inference_opts.Training.gradient_accumulation == 1
+
+    assert training_opts.device_iterations == 8
+    assert training_opts.replication_factor == 2
+    assert training_opts.Training.gradient_accumulation == 2
+
+
 @RunIf(ipu=True)
 def test_default_opts(tmpdir):
     """

From ff6a2ee4bb362c3c5c2fb1eb580999e3f092c590 Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Wed, 30 Jun 2021 11:22:39 +0100
Subject: [PATCH 4/6] Fix test

---
 pytorch_lightning/plugins/training_type/ipu.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pytorch_lightning/plugins/training_type/ipu.py b/pytorch_lightning/plugins/training_type/ipu.py
index e9bcac30c86d9..351cd59f0cea1 100644
--- a/pytorch_lightning/plugins/training_type/ipu.py
+++ b/pytorch_lightning/plugins/training_type/ipu.py
@@ -136,7 +136,7 @@ def _create_opts(self, training: bool):
         opts = poptorch.Options()
         opts.deviceIterations(self.device_iterations)
         opts.replicationFactor(self.replication_factor)
-        gradient_accumulation = self.lightning_module.trainer.accumulate_grad_batches if training else 1
+        gradient_accumulation = self.accumulate_grad_batches if training else 1
         opts.Training.gradientAccumulation(gradient_accumulation)
 
         if os.environ.get("PL_GLOBAL_SEED"):

From d5f8d0881fa9f75e068dd7e5ac22c954d486c113 Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Wed, 30 Jun 2021 12:17:44 +0100
Subject: [PATCH 5/6] fix ipu test

---
 tests/accelerators/test_ipu.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/accelerators/test_ipu.py b/tests/accelerators/test_ipu.py
index 658a8d6f15b04..363648c9f681d 100644
--- a/tests/accelerators/test_ipu.py
+++ b/tests/accelerators/test_ipu.py
@@ -475,7 +475,7 @@ def test_manual_poptorch_opts_custom(tmpdir):
 
     training_opts = poptorch.Options()
     training_opts.deviceIterations(8)
-    inference_opts.replicationFactor(2)
+    training_opts.replicationFactor(2)
     training_opts.Training.gradientAccumulation(2)
 
     trainer = Trainer(

From e4a340af99b801d479a7f07d568bcac1284f5e1d Mon Sep 17 00:00:00 2001
From: Sean Naren <sean@grid.ai>
Date: Wed, 30 Jun 2021 12:21:17 +0100
Subject: [PATCH 6/6] Update pytorch_lightning/plugins/training_type/ipu.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com>
---
 pytorch_lightning/plugins/training_type/ipu.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pytorch_lightning/plugins/training_type/ipu.py b/pytorch_lightning/plugins/training_type/ipu.py
index 351cd59f0cea1..a2e408b66f98f 100644
--- a/pytorch_lightning/plugins/training_type/ipu.py
+++ b/pytorch_lightning/plugins/training_type/ipu.py
@@ -211,7 +211,7 @@ def process_dataloader(self, dataloader: Union[Iterable, DataLoader]) -> Union[I
             dataloader = apply_to_collection(dataloader, DataLoader, self.process_dataloader)
             return dataloader
         if not isinstance(dataloader, poptorch.DataLoader):
-            is_training = self.lightning_module.trainer.state.stage is RunningStage.TRAINING
+            is_training = self.lightning_module.trainer.training
             opts = self.training_opts if is_training else self.inference_opts
             dataloader = self._convert_to_poptorch_loader(dataloader=dataloader, opts=opts)
         return dataloader