From e95d5c6523c3d57288f960cc5bf809c9a8491f8e Mon Sep 17 00:00:00 2001
From: ananthsub <ananth.subramaniam@gmail.com>
Date: Fri, 10 Sep 2021 15:52:50 -0700
Subject: [PATCH 01/16] re-add changes

---
 CHANGELOG.md                                  |  3 ++
 benchmarks/test_basic_parity.py               |  2 +-
 .../connectors/accelerator_connector.py       | 32 ++++++++++++-------
 pytorch_lightning/trainer/trainer.py          |  3 +-
 4 files changed, 27 insertions(+), 13 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3d8fbf05573f6..3c50fb1ee2408 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -163,6 +163,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Added `pl_legacy_patch` load utility for loading old checkpoints that have pickled legacy Lightning attributes ([#9166](https://github.com/PyTorchLightning/pytorch-lightning/pull/9166))
 
 
+- Added support for `torch.use_deterministic_algorithms` ([#9121](https://github.com/PyTorchLightning/pytorch-lightning/pull/9121))
+
+
 ### Changed
 
 - `pytorch_lightning.loggers.neptune.NeptuneLogger` is now consistent with new [neptune-client](https://github.com/neptune-ai/neptune-client) API ([#6867](https://github.com/PyTorchLightning/pytorch-lightning/pull/6867)).
diff --git a/benchmarks/test_basic_parity.py b/benchmarks/test_basic_parity.py
index 6612f76280076..e9442dd26e65b 100644
--- a/benchmarks/test_basic_parity.py
+++ b/benchmarks/test_basic_parity.py
@@ -151,6 +151,7 @@ def vanilla_loop(cls_model, idx, device_type: str = "cuda", num_epochs=10):
 
 def lightning_loop(cls_model, idx, device_type: str = "cuda", num_epochs=10):
     seed_everything(idx)
+    torch.backends.cudnn.deterministic = True
 
     model = cls_model()
     # init model parts
@@ -161,7 +162,6 @@ def lightning_loop(cls_model, idx, device_type: str = "cuda", num_epochs=10):
         weights_summary=None,
         gpus=1 if device_type == "cuda" else 0,
         checkpoint_callback=False,
-        deterministic=True,
         logger=False,
         replace_sampler_ddp=False,
     )
diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index 61e87a67c7fac..5d5d5d3a348ff 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -60,10 +60,6 @@
     TorchElasticEnvironment,
 )
 from pytorch_lightning.utilities import (
-    _APEX_AVAILABLE,
-    _HOROVOD_AVAILABLE,
-    _IPU_AVAILABLE,
-    _TPU_AVAILABLE,
     AMPType,
     device_parser,
     DeviceType,
@@ -74,6 +70,15 @@
 )
 from pytorch_lightning.utilities.enums import PrecisionType
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
+from pytorch_lightning.utilities.imports import (
+    _APEX_AVAILABLE,
+    _HOROVOD_AVAILABLE,
+    _IPU_AVAILABLE,
+    _NATIVE_AMP_AVAILABLE,
+    _TORCH_GREATER_EQUAL_1_7,
+    _TORCH_GREATER_EQUAL_1_8,
+    _TPU_AVAILABLE,
+)
 
 if _HOROVOD_AVAILABLE:
     import horovod.torch as hvd
@@ -96,7 +101,7 @@ def __init__(
         sync_batchnorm,
         benchmark,
         replace_sampler_ddp,
-        deterministic,
+        deterministic: bool,
         precision,
         amp_type,
         amp_level,
@@ -113,6 +118,7 @@ def __init__(
                 f" Use `Trainer(accelerator={distributed_backend})` instead."
             )
         distributed_backend = distributed_backend or accelerator
+        self._init_deterministic(deterministic)
 
         self.num_processes = num_processes
         self.devices = devices
@@ -126,7 +132,6 @@ def __init__(
         self.sync_batchnorm = sync_batchnorm
         self.benchmark = benchmark
         self.replace_sampler_ddp = replace_sampler_ddp
-        self.deterministic = deterministic
         self.precision = precision
         self.amp_type = amp_type.lower() if isinstance(amp_type, str) else None
         self.amp_level = amp_level
@@ -177,16 +182,21 @@ def __init__(
         # TODO: should this be moved to GPU accelerator?
         torch.backends.cudnn.benchmark = self.benchmark
 
-        # determinism for cudnn
-        # TODO: should this be moved to GPU accelerator?
-        torch.backends.cudnn.deterministic = deterministic
+        self.replace_sampler_ddp = replace_sampler_ddp
+
+    def _init_deterministic(self, deterministic: bool) -> None:
+        self.deterministic = deterministic
+        if _TORCH_GREATER_EQUAL_1_8:
+            torch.use_deterministic_algorithms(deterministic)
+        elif _TORCH_GREATER_EQUAL_1_7:
+            torch.set_deterministic(deterministic)
+        else:  # the minimum version Lightning supports is PyTorch 1.6
+            torch._set_deterministic(deterministic)
         if deterministic:
             # fixing non-deterministic part of horovod
             # https://github.com/PyTorchLightning/pytorch-lightning/pull/1572/files#r420279383
             os.environ["HOROVOD_FUSION_THRESHOLD"] = str(0)
 
-        self.replace_sampler_ddp = replace_sampler_ddp
-
     def select_accelerator_type(self) -> None:
         if self.distributed_backend == "auto":
             if self.has_tpu:
diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py
index f49c892e37191..7a85afeb70928 100644
--- a/pytorch_lightning/trainer/trainer.py
+++ b/pytorch_lightning/trainer/trainer.py
@@ -222,7 +222,8 @@ def __init__(
                 Default: ``os.getcwd()``.
                 Can be remote file paths such as `s3://mybucket/path` or 'hdfs://path/'
 
-            deterministic: If true enables cudnn.deterministic.
+            deterministic: If ``True``, sets whether PyTorch operations must use deterministic algorithms.
+                Default: ``False``.
 
             devices: Will be mapped to either `gpus`, `tpu_cores`, `num_processes` or `ipus`,
                 based on the accelerator type.

From af9d303dbe829680297b474b030aa616b53e1b3d Mon Sep 17 00:00:00 2001
From: ananthsub <ananth.subramaniam@gmail.com>
Date: Fri, 10 Sep 2021 16:39:24 -0700
Subject: [PATCH 02/16] Update test_data_parallel.py

---
 tests/overrides/test_data_parallel.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/overrides/test_data_parallel.py b/tests/overrides/test_data_parallel.py
index bab00943ef691..5af2a164c459e 100644
--- a/tests/overrides/test_data_parallel.py
+++ b/tests/overrides/test_data_parallel.py
@@ -87,6 +87,7 @@ def training_step(self, batch, batch_idx):
     model = TestModel()
     model.trainer = Mock()
     model.trainer.state.stage = RunningStage.TRAINING
+    model.trainer.accelerator_connector.deterministic = False
     batch = torch.rand(2, 32).cuda()
     batch_idx = 0
 
@@ -125,6 +126,7 @@ def training_step(self, batch, batch_idx):
     model = TestModel().to(device)
     model.trainer = Mock()
     model.trainer.state.stage = RunningStage.TRAINING
+    model.trainer.accelerator_connector.deterministic = False
     batch = torch.rand(2, 32).to(device)
     batch_idx = 0
 

From ad5e2f7035808958d4b852c7622005f6a2dfd62b Mon Sep 17 00:00:00 2001
From: ananthsub <ananth.subramaniam@gmail.com>
Date: Fri, 24 Sep 2021 21:31:58 -0700
Subject: [PATCH 03/16] Update CHANGELOG.md

---
 CHANGELOG.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3c50fb1ee2408..25529d2692ed9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -228,9 +228,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Executing the `optimizer_closure` is now required when overriding the `optimizer_step` hook ([#9360](https://github.com/PyTorchLightning/pytorch-lightning/pull/9360))
 
 
-- Removed `TrainerProperties` mixin and moved property definitions directly into `Trainer` ([#9495](https://github.com/PyTorchLightning/pytorch-lightning/pull/9495))
-
-
 - Changed logging of `LightningModule` and `LightningDataModule` hyperparameters to raise an exception only if there are colliding keys with different values ([#9496](https://github.com/PyTorchLightning/pytorch-lightning/pull/9496))
 
 
@@ -397,6 +394,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Removed `call_configure_sharded_model_hook` property from `Accelerator` and `TrainingTypePlugin` ([#9612](https://github.com/PyTorchLightning/pytorch-lightning/pull/9612))
 
 
+- Removed `TrainerProperties` mixin and moved property definitions directly into `Trainer` ([#9495](https://github.com/PyTorchLightning/pytorch-lightning/pull/9495))
+
+
 ### Fixed
 
 

From ab1c3286d2a84d442ac2de118e12f8e989a7d274 Mon Sep 17 00:00:00 2001
From: ananthsub <ananth.subramaniam@gmail.com>
Date: Fri, 24 Sep 2021 22:55:49 -0700
Subject: [PATCH 04/16] Update test_legacy_checkpoints.py

---
 tests/checkpointing/test_legacy_checkpoints.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/checkpointing/test_legacy_checkpoints.py b/tests/checkpointing/test_legacy_checkpoints.py
index 040cd642556cf..0910959fc7e7c 100644
--- a/tests/checkpointing/test_legacy_checkpoints.py
+++ b/tests/checkpointing/test_legacy_checkpoints.py
@@ -83,9 +83,9 @@ def test_resume_legacy_checkpoints(tmpdir, pl_version: str):
             callbacks=[es, stop],
             max_epochs=21,
             accumulate_grad_batches=2,
-            deterministic=True,
             resume_from_checkpoint=path_ckpt,
         )
+        torch.backends.cudnn.deterministic = True
         trainer.fit(model, datamodule=dm)
         res = trainer.test(model, datamodule=dm)
         assert res[0]["test_loss"] <= 0.7

From da175005cbab2376156f08cb04f5c5d21b5c24aa Mon Sep 17 00:00:00 2001
From: ananthsub <ananth.subramaniam@gmail.com>
Date: Fri, 24 Sep 2021 23:22:51 -0700
Subject: [PATCH 05/16] Update test_horovod.py

---
 tests/models/test_horovod.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/tests/models/test_horovod.py b/tests/models/test_horovod.py
index bb74040b2c37f..18fbcfbc09d42 100644
--- a/tests/models/test_horovod.py
+++ b/tests/models/test_horovod.py
@@ -78,7 +78,6 @@ def test_horovod_cpu(tmpdir):
         limit_train_batches=0.4,
         limit_val_batches=0.2,
         accelerator="horovod",
-        deterministic=True,
     )
     _run_horovod(trainer_options)
 
@@ -170,7 +169,6 @@ def test_horovod_apex(tmpdir):
         limit_train_batches=0.4,
         limit_val_batches=0.2,
         gpus=2,
-        deterministic=True,
         accelerator="horovod",
         amp_backend="apex",
         precision=16,
@@ -190,7 +188,6 @@ def test_horovod_amp(tmpdir):
         limit_train_batches=0.4,
         limit_val_batches=0.2,
         gpus=2,
-        deterministic=True,
         accelerator="horovod",
         amp_backend="native",
         precision=16,
@@ -210,7 +207,6 @@ def test_horovod_gather(tmpdir):
         limit_train_batches=0.4,
         limit_val_batches=0.2,
         gpus=2,
-        deterministic=True,
         accelerator="horovod",
     )
     _run_horovod(trainer_options, on_gpu=True)
@@ -236,7 +232,6 @@ def validation_step(self, batch, *args, **kwargs):
         limit_train_batches=0.4,
         limit_val_batches=0.2,
         gpus=1,
-        deterministic=True,
         accelerator="horovod",
     )
     tpipes.run_model_test_without_loggers(trainer_options, model)
@@ -253,7 +248,6 @@ def test_horovod_multi_optimizer(tmpdir):
         max_epochs=1,
         limit_train_batches=0.4,
         limit_val_batches=0.2,
-        deterministic=True,
         accelerator="horovod",
     )
     trainer.fit(model)

From 3b2ad552333c91fdd06ff18e74ad6a20c53e66eb Mon Sep 17 00:00:00 2001
From: ananthsub <ananth.subramaniam@gmail.com>
Date: Wed, 29 Sep 2021 15:31:07 -0700
Subject: [PATCH 06/16] Update test_horovod.py

---
 tests/models/test_horovod.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/tests/models/test_horovod.py b/tests/models/test_horovod.py
index 18fbcfbc09d42..e58bd391feb82 100644
--- a/tests/models/test_horovod.py
+++ b/tests/models/test_horovod.py
@@ -95,7 +95,6 @@ def test_horovod_cpu_clip_grad_by_value(tmpdir):
         limit_train_batches=0.4,
         limit_val_batches=0.2,
         accelerator="horovod",
-        deterministic=True,
     )
     _run_horovod(trainer_options)
 
@@ -111,7 +110,6 @@ def test_horovod_cpu_implicit(tmpdir):
         max_epochs=1,
         limit_train_batches=0.4,
         limit_val_batches=0.2,
-        deterministic=True,
     )
     _run_horovod(trainer_options)
 
@@ -128,7 +126,6 @@ def test_horovod_multi_gpu(tmpdir):
         limit_train_batches=0.4,
         limit_val_batches=0.2,
         gpus=2,
-        deterministic=True,
         accelerator="horovod",
     )
     _run_horovod(trainer_options, on_gpu=True)
@@ -147,7 +144,6 @@ def test_horovod_multi_gpu_grad_by_value(tmpdir):
         limit_train_batches=0.4,
         limit_val_batches=0.2,
         gpus=2,
-        deterministic=True,
         accelerator="horovod",
     )
     _run_horovod(trainer_options, on_gpu=True)

From a75d07ed0fa3fed8b54cf69d3c56867779efadee Mon Sep 17 00:00:00 2001
From: ananthsub <ananth.subramaniam@gmail.com>
Date: Wed, 29 Sep 2021 15:34:10 -0700
Subject: [PATCH 07/16] Update accelerator_connector.py

---
 pytorch_lightning/trainer/connectors/accelerator_connector.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index 5d5d5d3a348ff..11de9909f388e 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -74,7 +74,6 @@
     _APEX_AVAILABLE,
     _HOROVOD_AVAILABLE,
     _IPU_AVAILABLE,
-    _NATIVE_AMP_AVAILABLE,
     _TORCH_GREATER_EQUAL_1_7,
     _TORCH_GREATER_EQUAL_1_8,
     _TPU_AVAILABLE,

From c4e5ee666930b2db40d1a44a95898a5fbad5e0f8 Mon Sep 17 00:00:00 2001
From: ananthsub <ananth.subramaniam@gmail.com>
Date: Wed, 29 Sep 2021 16:09:39 -0700
Subject: [PATCH 08/16] update tests

---
 tests/accelerators/test_common.py     | 10 +++-------
 tests/overrides/test_data_parallel.py |  5 +++--
 2 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/tests/accelerators/test_common.py b/tests/accelerators/test_common.py
index 93564e27defa9..d40222d3a974c 100644
--- a/tests/accelerators/test_common.py
+++ b/tests/accelerators/test_common.py
@@ -16,6 +16,7 @@
 
 import tests.helpers.utils as tutils
 from pytorch_lightning import Trainer
+from pytorch_lightning.utilities.seed import seed_everything
 from tests.accelerators.test_dp import CustomClassificationModelDP
 from tests.helpers.boring_model import BoringModel
 from tests.helpers.datamodules import ClassifDataModule
@@ -32,16 +33,11 @@
 )
 def test_evaluate(tmpdir, trainer_kwargs):
     tutils.set_random_master_port()
-
+    seed_everything(1)
     dm = ClassifDataModule()
     model = CustomClassificationModelDP()
     trainer = Trainer(
-        default_root_dir=tmpdir,
-        max_epochs=2,
-        limit_train_batches=10,
-        limit_val_batches=10,
-        deterministic=True,
-        **trainer_kwargs
+        default_root_dir=tmpdir, max_epochs=2, limit_train_batches=10, limit_val_batches=10, **trainer_kwargs
     )
 
     trainer.fit(model, datamodule=dm)
diff --git a/tests/overrides/test_data_parallel.py b/tests/overrides/test_data_parallel.py
index 5af2a164c459e..41ed25f778d97 100644
--- a/tests/overrides/test_data_parallel.py
+++ b/tests/overrides/test_data_parallel.py
@@ -87,7 +87,8 @@ def training_step(self, batch, batch_idx):
     model = TestModel()
     model.trainer = Mock()
     model.trainer.state.stage = RunningStage.TRAINING
-    model.trainer.accelerator_connector.deterministic = False
+    model.trainer.accelerator._init_deterministic(False)
+
     batch = torch.rand(2, 32).cuda()
     batch_idx = 0
 
@@ -126,7 +127,7 @@ def training_step(self, batch, batch_idx):
     model = TestModel().to(device)
     model.trainer = Mock()
     model.trainer.state.stage = RunningStage.TRAINING
-    model.trainer.accelerator_connector.deterministic = False
+    model.trainer.accelerator._init_deterministic(False)
     batch = torch.rand(2, 32).to(device)
     batch_idx = 0
 

From 421d044ab5e34a290ead8d20d7a2190c69af9f8a Mon Sep 17 00:00:00 2001
From: ananthsub <ananth.subramaniam@gmail.com>
Date: Wed, 29 Sep 2021 16:39:20 -0700
Subject: [PATCH 09/16] update tests

---
 tests/accelerators/test_common.py     | 7 ++-----
 tests/overrides/test_data_parallel.py | 4 ++--
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/tests/accelerators/test_common.py b/tests/accelerators/test_common.py
index d40222d3a974c..cb1560d2af355 100644
--- a/tests/accelerators/test_common.py
+++ b/tests/accelerators/test_common.py
@@ -45,11 +45,8 @@ def test_evaluate(tmpdir, trainer_kwargs):
 
     old_weights = model.layer_0.weight.clone().detach().cpu()
 
-    result = trainer.validate(datamodule=dm)
-    assert result[0]["val_acc"] > 0.55
-
-    result = trainer.test(datamodule=dm)
-    assert result[0]["test_acc"] > 0.55
+    trainer.validate(datamodule=dm)
+    trainer.test(datamodule=dm)
 
     # make sure weights didn't change
     new_weights = model.layer_0.weight.clone().detach().cpu()
diff --git a/tests/overrides/test_data_parallel.py b/tests/overrides/test_data_parallel.py
index 41ed25f778d97..c6e575558bab3 100644
--- a/tests/overrides/test_data_parallel.py
+++ b/tests/overrides/test_data_parallel.py
@@ -87,7 +87,7 @@ def training_step(self, batch, batch_idx):
     model = TestModel()
     model.trainer = Mock()
     model.trainer.state.stage = RunningStage.TRAINING
-    model.trainer.accelerator._init_deterministic(False)
+    model.trainer.accelerator_connector._init_deterministic(False)
 
     batch = torch.rand(2, 32).cuda()
     batch_idx = 0
@@ -127,7 +127,7 @@ def training_step(self, batch, batch_idx):
     model = TestModel().to(device)
     model.trainer = Mock()
     model.trainer.state.stage = RunningStage.TRAINING
-    model.trainer.accelerator._init_deterministic(False)
+    model.trainer.accelerator_connector._init_deterministic(False)
     batch = torch.rand(2, 32).to(device)
     batch_idx = 0
 

From 68434f5dbe34d79bf7f9ff1c75faa67be772c44a Mon Sep 17 00:00:00 2001
From: ananthsub <ananth.subramaniam@gmail.com>
Date: Wed, 29 Sep 2021 17:25:58 -0700
Subject: [PATCH 10/16] Update test_data_parallel.py

---
 tests/overrides/test_data_parallel.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/overrides/test_data_parallel.py b/tests/overrides/test_data_parallel.py
index c6e575558bab3..c43d9fe322a71 100644
--- a/tests/overrides/test_data_parallel.py
+++ b/tests/overrides/test_data_parallel.py
@@ -85,7 +85,7 @@ def training_step(self, batch, batch_idx):
             return {"loss": loss}
 
     model = TestModel()
-    model.trainer = Mock()
+    model.trainer = Mock(deterministic=False)
     model.trainer.state.stage = RunningStage.TRAINING
     model.trainer.accelerator_connector._init_deterministic(False)
 
@@ -125,7 +125,7 @@ def training_step(self, batch, batch_idx):
             return output
 
     model = TestModel().to(device)
-    model.trainer = Mock()
+    model.trainer = Mock(deterministic=False)
     model.trainer.state.stage = RunningStage.TRAINING
     model.trainer.accelerator_connector._init_deterministic(False)
     batch = torch.rand(2, 32).to(device)

From 54dc9fec5ead531a69f9ac0b46e6d0edeefb0dfa Mon Sep 17 00:00:00 2001
From: ananthsub <ananth.subramaniam@gmail.com>
Date: Wed, 29 Sep 2021 17:58:56 -0700
Subject: [PATCH 11/16] Update test_data_parallel.py

---
 tests/overrides/test_data_parallel.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/tests/overrides/test_data_parallel.py b/tests/overrides/test_data_parallel.py
index c43d9fe322a71..2a4bd21aed87e 100644
--- a/tests/overrides/test_data_parallel.py
+++ b/tests/overrides/test_data_parallel.py
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from unittest import mock
 from unittest.mock import MagicMock, Mock
 
 import pytest
@@ -85,7 +86,7 @@ def training_step(self, batch, batch_idx):
             return {"loss": loss}
 
     model = TestModel()
-    model.trainer = Mock(deterministic=False)
+    model.trainer = Mock()
     model.trainer.state.stage = RunningStage.TRAINING
     model.trainer.accelerator_connector._init_deterministic(False)
 
@@ -113,8 +114,9 @@ def test_python_scalar_to_tensor(inp, expected):
 
 
 @RunIf(min_gpus=1)
+@mock.patch("pytorch_lightning.trainer.Trainer", autospec=True)
 @pytest.mark.parametrize("device", [torch.device("cpu"), torch.device("cuda", 0)])
-def test_lightning_parallel_module_python_scalar_conversion(device):
+def test_lightning_parallel_module_python_scalar_conversion(mock_trainer, device):
     """Test that LightningParallelModule can convert Python scalars to tensors."""
 
     class TestModel(BoringModel):
@@ -125,9 +127,9 @@ def training_step(self, batch, batch_idx):
             return output
 
     model = TestModel().to(device)
-    model.trainer = Mock(deterministic=False)
+    model.trainer = mock_trainer.return_value
     model.trainer.state.stage = RunningStage.TRAINING
-    model.trainer.accelerator_connector._init_deterministic(False)
+    # model.trainer.accelerator_connector._init_deterministic(False)
     batch = torch.rand(2, 32).to(device)
     batch_idx = 0
 

From c6f288bcba829725255daa824329800e8f4a6a03 Mon Sep 17 00:00:00 2001
From: ananthsub <ananth.subramaniam@gmail.com>
Date: Wed, 29 Sep 2021 18:36:36 -0700
Subject: [PATCH 12/16] Update test_data_parallel.py

---
 tests/overrides/test_data_parallel.py | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/tests/overrides/test_data_parallel.py b/tests/overrides/test_data_parallel.py
index 2a4bd21aed87e..c415eca674c51 100644
--- a/tests/overrides/test_data_parallel.py
+++ b/tests/overrides/test_data_parallel.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from unittest import mock
 from unittest.mock import MagicMock, Mock
 
 import pytest
@@ -86,10 +85,11 @@ def training_step(self, batch, batch_idx):
             return {"loss": loss}
 
     model = TestModel()
-    model.trainer = Mock()
-    model.trainer.state.stage = RunningStage.TRAINING
-    model.trainer.accelerator_connector._init_deterministic(False)
+    trainer = MagicMock()
+    trainer.state.stage = RunningStage.TRAINING
+    trainer.accelerator_connector._init_deterministic(False)
 
+    model.trainer = trainer
     batch = torch.rand(2, 32).cuda()
     batch_idx = 0
 
@@ -114,9 +114,8 @@ def test_python_scalar_to_tensor(inp, expected):
 
 
 @RunIf(min_gpus=1)
-@mock.patch("pytorch_lightning.trainer.Trainer", autospec=True)
 @pytest.mark.parametrize("device", [torch.device("cpu"), torch.device("cuda", 0)])
-def test_lightning_parallel_module_python_scalar_conversion(mock_trainer, device):
+def test_lightning_parallel_module_python_scalar_conversion(device):
     """Test that LightningParallelModule can convert Python scalars to tensors."""
 
     class TestModel(BoringModel):
@@ -127,9 +126,9 @@ def training_step(self, batch, batch_idx):
             return output
 
     model = TestModel().to(device)
-    model.trainer = mock_trainer.return_value
-    model.trainer.state.stage = RunningStage.TRAINING
-    # model.trainer.accelerator_connector._init_deterministic(False)
+    trainer = MagicMock()
+    trainer.state.stage = RunningStage.TRAINING
+    trainer.accelerator_connector._init_deterministic(False)
     batch = torch.rand(2, 32).to(device)
     batch_idx = 0
 

From bb8bdc09496299d65ca942f73bbb951c1fa4339a Mon Sep 17 00:00:00 2001
From: ananthsub <ananth.subramaniam@gmail.com>
Date: Wed, 29 Sep 2021 18:41:12 -0700
Subject: [PATCH 13/16] Update test_data_parallel.py

---
 tests/overrides/test_data_parallel.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/overrides/test_data_parallel.py b/tests/overrides/test_data_parallel.py
index c415eca674c51..46cdcc7cf7e23 100644
--- a/tests/overrides/test_data_parallel.py
+++ b/tests/overrides/test_data_parallel.py
@@ -129,6 +129,7 @@ def training_step(self, batch, batch_idx):
     trainer = MagicMock()
     trainer.state.stage = RunningStage.TRAINING
     trainer.accelerator_connector._init_deterministic(False)
+    model.trainer = trainer
     batch = torch.rand(2, 32).to(device)
     batch_idx = 0
 

From 1986bb9b05117bd3b123f39bdce2c879eb847f8f Mon Sep 17 00:00:00 2001
From: ananthsub <ananth.subramaniam@gmail.com>
Date: Wed, 29 Sep 2021 19:05:24 -0700
Subject: [PATCH 14/16] Update conftest.py

---
 tests/conftest.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/tests/conftest.py b/tests/conftest.py
index 36110e6c57c37..d4596008f65b4 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -22,6 +22,7 @@
 import torch.distributed
 
 from pytorch_lightning.plugins.environments.lightning_environment import find_free_network_port
+from pytorch_lightning.utilities.imports import _TORCH_GREATER_EQUAL_1_7, _TORCH_GREATER_EQUAL_1_8
 from tests import _PATH_DATASETS
 
 
@@ -87,6 +88,18 @@ def teardown_process_group():
         torch.distributed.destroy_process_group()
 
 
+@pytest.fixture(scope="function", autouse=True)
+def reset_deterministic_algorithm():
+    """Ensures that torch determinism settings are reset before the next test runs."""
+    yield
+    if _TORCH_GREATER_EQUAL_1_8:
+        torch.use_deterministic_algorithms(False)
+    elif _TORCH_GREATER_EQUAL_1_7:
+        torch.set_deterministic(False)
+    else:  # the minimum version Lightning supports is PyTorch 1.6
+        torch._set_deterministic(False)
+
+
 @pytest.fixture
 def tmpdir_server(tmpdir):
     if sys.version_info >= (3, 7):

From fa269b3f731e1279bb0c20cb4696b6e8f2d017b7 Mon Sep 17 00:00:00 2001
From: ananthsub <ananth.subramaniam@gmail.com>
Date: Wed, 29 Sep 2021 20:28:17 -0700
Subject: [PATCH 15/16] Update accelerator_connector.py

---
 pytorch_lightning/trainer/connectors/accelerator_connector.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index 11de9909f388e..dd411da7bc995 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -195,6 +195,8 @@ def _init_deterministic(self, deterministic: bool) -> None:
             # fixing non-deterministic part of horovod
             # https://github.com/PyTorchLightning/pytorch-lightning/pull/1572/files#r420279383
             os.environ["HOROVOD_FUSION_THRESHOLD"] = str(0)
+            # https://docs.nvidia.com/cuda/cublas/index.html#cublasApi_reproducibility
+            os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
 
     def select_accelerator_type(self) -> None:
         if self.distributed_backend == "auto":

From 9ab9bbd76fb57fd79adcda5586a56b8182ae9b59 Mon Sep 17 00:00:00 2001
From: ananthsub <ananth.subramaniam@gmail.com>
Date: Wed, 29 Sep 2021 21:05:26 -0700
Subject: [PATCH 16/16] Update conftest.py

---
 tests/conftest.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/conftest.py b/tests/conftest.py
index d4596008f65b4..860f9357e4636 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -53,6 +53,7 @@ def restore_env_variables():
     os.environ.update(env_backup)
     # these are currently known leakers - ideally these would not be allowed
     allowlist = {
+        "CUBLAS_WORKSPACE_CONFIG",  # enabled with deterministic flag
         "CUDA_DEVICE_ORDER",
         "LOCAL_RANK",
         "NODE_RANK",