typing fixes

awaelchli · awaelchli · commit 80d24fec91b8 · 2022-10-01T17:11:58.000+02:00
diff --git a/src/lightning_lite/connector.py b/src/lightning_lite/connector.py
@@ -414,7 +414,7 @@ def _check_strategy_and_fallback(self) -> None:
             strategy_flag in _FSDP_ALIASES or isinstance(self._strategy_flag, FSDPStrategy)
         ) and self._accelerator_flag not in ("cuda", "gpu"):
             raise ValueError(
-                f"You selected the FSDP strategy but FSDP is only available on GPU. Set `Lite(accelerator='gpu', ...)`"
+                "You selected the FSDP strategy but FSDP is only available on GPU. Set `Lite(accelerator='gpu', ...)`"
                 " to continue or select a different strategy."
             )
         if strategy_flag:
diff --git a/src/lightning_lite/plugins/precision/fsdp.py b/src/lightning_lite/plugins/precision/fsdp.py
@@ -27,7 +27,7 @@
 class FSDPPrecision(NativeMixedPrecision):
     """AMP for Fully Sharded Data Parallel training."""
 
-    def __init__(self, precision: Literal[16, "bf16"], device: str, scaler: Optional[ShardedGradScaler] = None) -> None:
+    def __init__(self, precision: Literal[16, "bf16"], device: str, scaler: Optional["ShardedGradScaler"] = None) -> None:
         if not _TORCH_GREATER_EQUAL_1_12:
             raise RuntimeError("`FSDPPrecision` is supported from PyTorch v1.12.0 onwards.")
 
diff --git a/src/lightning_lite/strategies/fsdp.py b/src/lightning_lite/strategies/fsdp.py
@@ -40,7 +40,7 @@
         FullyShardedDataParallel,
         MixedPrecision,
     )
-    from torch.distributed.fsdp.wrap import enable_wrap
+    from torch.distributed.fsdp.wrap import enable_wrap  # noqa: F401
 
 _FSDP_ALIASES = ("fsdp", "fsdp_full_shard_offload")
 
@@ -86,9 +86,9 @@ def __init__(
         precision_plugin: Optional[Precision] = None,
         process_group_backend: Optional[str] = None,
         timeout: Optional[timedelta] = default_pg_timeout,
-        cpu_offload: Optional[CPUOffload] = None,
-        backward_prefetch: Optional[BackwardPrefetch] = None,
-        mixed_precision: Optional[MixedPrecision] = None,
+        cpu_offload: Optional["CPUOffload"] = None,
+        backward_prefetch: Optional["BackwardPrefetch"] = None,
+        mixed_precision: Optional["MixedPrecision"] = None,
         **kwargs: Any,
     ) -> None:
         if not _TORCH_GREATER_EQUAL_1_12:
@@ -156,7 +156,7 @@ def setup_environment(self) -> None:
         self._setup_distributed()
         super().setup_environment()
 
-    def setup_module(self, module: Module) -> FullyShardedDataParallel:
+    def setup_module(self, module: Module) -> "FullyShardedDataParallel":
         """Wraps the model into a
         :class:`~torch.distributed.fsdp.fully_sharded_data_parallel.FullyShardedDataParallel` module."""
         from torch.distributed.fsdp.fully_sharded_data_parallel import FullyShardedDataParallel

Original file line number	Diff line number	Diff line change
`@@ -414,7 +414,7 @@ def _check_strategy_and_fallback(self) -> None:`
`414`	`414`	`strategy_flag in _FSDP_ALIASES or isinstance(self._strategy_flag, FSDPStrategy)`
`415`	`415`	`) and self._accelerator_flag not in ("cuda", "gpu"):`
`416`	`416`	`raise ValueError(`
`417`		- f"You selected the FSDP strategy but FSDP is only available on GPU. Set `Lite(accelerator='gpu', ...)`"
	`417`	+ "You selected the FSDP strategy but FSDP is only available on GPU. Set `Lite(accelerator='gpu', ...)`"
`418`	`418`	`" to continue or select a different strategy."`
`419`	`419`	`)`
`420`	`420`	`if strategy_flag:`