Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -273,9 +273,13 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).

- Fixed not setting a default value for `max_epochs` if `max_time` was specified on the `Trainer` constructor ([#9072](https://github.com/PyTorchLightning/pytorch-lightning/pull/9072))


- Fixed the CometLogger, no longer modifies the metrics in place. Instead creates a copy of metrics before performing any operations ([#9150](https://github.com/PyTorchLightning/pytorch-lightning/pull/9150))


- Fixed `DDP` "CUDA error: initialization error" due to a `copy` instead of `deepcopy` on `ResultCollection` ([#9239](https://github.com/PyTorchLightning/pytorch-lightning/pull/9239))


## [1.4.3] - 2021-08-17

- Fixed plateau scheduler stepping on incomplete epoch ([#8861](https://github.com/PyTorchLightning/pytorch-lightning/pull/8861))
Expand Down
6 changes: 3 additions & 3 deletions pytorch_lightning/loops/batch/training_batch_loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from copy import copy
from copy import deepcopy
from functools import partial
from typing import Any, Callable, Dict, List, Optional, Tuple

Expand Down Expand Up @@ -142,12 +142,12 @@ def advance(self, batch, batch_idx):

result = self._run_optimization(batch_idx, split_batch, opt_idx, optimizer)
if result:
self.batch_outputs[opt_idx].append(copy(result.result_collection))
self.batch_outputs[opt_idx].append(deepcopy(result.result_collection))
else:
# in manual optimization, there is no looping over optimizers
result = self._run_optimization(batch_idx, split_batch)
if result:
self.batch_outputs[0].append(copy(result.result_collection))
self.batch_outputs[0].append(deepcopy(result.result_collection))

def teardown(self) -> None:
# release memory
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from typing import Any, Callable, Dict, List, Mapping, Optional, Tuple, Union

import torch
from torch.functional import Tensor
from torchmetrics import Metric

from pytorch_lightning.core.mixins import DeviceDtypeModuleMixin
Expand Down Expand Up @@ -435,8 +436,12 @@ def log(
) -> None:
"""See :meth:`~pytorch_lightning.core.lightning.LightningModule.log`"""
# no metrics should be logged with graphs
if not enable_graph and isinstance(value, torch.Tensor):
value = value.detach()
if not enable_graph:

def detach_fn(tensor: Tensor) -> Tensor:
return tensor.detach()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried to add a clone() here but doesn't solve the issue. I'm just perplexed why the deepcopy is necessary. Do you have any intuition what is going on?


value = apply_to_collection(value, Tensor, detach_fn)

# move metrics to cpu on TPU.
if isinstance(value, torch.Tensor) and value.device.type == "xla":
Expand Down