Fix dtype inference during gradient norm computation (#14051)

carmocca · web-flow · commit d072e4451a73 · 2022-08-08T11:35:06.000Z
diff --git a/src/pytorch_lightning/CHANGELOG.md b/src/pytorch_lightning/CHANGELOG.md
@@ -67,6 +67,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Fixed MPS device being unrecognized ([#13992](https://github.com/Lightning-AI/lightning/pull/13992))
 
 
+- Fixed dtype inference during gradient norm computation ([#14051](https://github.com/Lightning-AI/lightning/pull/14051))
+
+
 ## [1.7.0] - 2022-08-02
 
 ### Added
diff --git a/src/pytorch_lightning/utilities/grads.py b/src/pytorch_lightning/utilities/grads.py
@@ -41,12 +41,12 @@ def grad_norm(module: Module, norm_type: Union[float, int, str], group_separator
         raise ValueError(f"`norm_type` must be a positive number or 'inf' (infinity norm). Got {norm_type}")
 
     norms = {
-        f"grad_{norm_type}_norm{group_separator}{name}": p.grad.data.norm(norm_type).item()
+        f"grad_{norm_type}_norm{group_separator}{name}": p.grad.data.norm(norm_type)
         for name, p in module.named_parameters()
         if p.grad is not None
     }
     if norms:
-        total_norm = torch.tensor(list(norms.values())).norm(norm_type).item()
+        total_norm = torch.tensor(list(norms.values())).norm(norm_type)
         norms[f"grad_{norm_type}_norm_total"] = total_norm
-        norms = {k: round(v, 4) for k, v in norms.items()}
+    norms = {k: round(v.item(), 4) for k, v in norms.items()}
     return norms
diff --git a/tests/tests_pytorch/utilities/test_grads.py b/tests/tests_pytorch/utilities/test_grads.py
@@ -76,3 +76,17 @@ def __init__(self):
 def test_grad_norm_invalid_norm_type(norm_type):
     with pytest.raises(ValueError, match="`norm_type` must be a positive number or 'inf'"):
         grad_norm(Mock(), norm_type)
+
+
+def test_grad_norm_with_double_dtype():
+    class Model(nn.Module):
+        def __init__(self):
+            super().__init__()
+            dtype = torch.double
+            self.param = nn.Parameter(torch.tensor(1.0, dtype=dtype))
+            # grad norm of this would become infinite
+            self.param.grad = torch.tensor(1e23, dtype=dtype)
+
+    model = Model()
+    norms = grad_norm(model, 2)
+    assert all(torch.isfinite(torch.tensor(v)) for v in norms.values()), norms