From 1347855e3b24a586c4a90fe68479ea6463e417cc Mon Sep 17 00:00:00 2001 From: Carlos Mocholi Date: Tue, 16 Feb 2021 18:42:44 +0100 Subject: [PATCH 01/17] Remove early_stop_on and checkpoint_on references --- .../callbacks/model_checkpoint.py | 3 - pytorch_lightning/core/step_result.py | 71 ++++++------------- .../logger_connector/logger_connector.py | 22 +----- pytorch_lightning/trainer/evaluation_loop.py | 9 --- pytorch_lightning/trainer/supporters.py | 15 ---- pytorch_lightning/trainer/training_loop.py | 38 ++-------- 6 files changed, 29 insertions(+), 129 deletions(-) diff --git a/pytorch_lightning/callbacks/model_checkpoint.py b/pytorch_lightning/callbacks/model_checkpoint.py index a3f048c67c57d..04864185d1080 100644 --- a/pytorch_lightning/callbacks/model_checkpoint.py +++ b/pytorch_lightning/callbacks/model_checkpoint.py @@ -465,9 +465,6 @@ def _add_backward_monitor_support(self, trainer): if self.monitor is None and 'val_loss' in metrics: self.monitor = 'val_loss' - if self.monitor is None and 'checkpoint_on' in metrics: - self.monitor = 'checkpoint_on' - if self.save_top_k is None and self.monitor is not None: self.save_top_k = 1 diff --git a/pytorch_lightning/core/step_result.py b/pytorch_lightning/core/step_result.py index 974974b032bec..f13b07eafdd36 100644 --- a/pytorch_lightning/core/step_result.py +++ b/pytorch_lightning/core/step_result.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""[Train, Eval]Result for easier logging, checkpointing, early stopping, epoch-wise reduction.""" +"""Result class for easier logging and epoch-wise reduction.""" import numbers import os @@ -23,6 +23,9 @@ from pytorch_lightning.metrics import Metric from pytorch_lightning.utilities.distributed import sync_ddp_if_available +from pytorch_lightning.utilities.warnings import WarningCache + +warning_cache = WarningCache() class Result(Dict): @@ -30,8 +33,6 @@ class Result(Dict): def __init__( self, minimize: Optional[Tensor] = None, - early_stop_on: Optional[Tensor] = None, - checkpoint_on: Optional[Union[Tensor, bool]] = None, hiddens: Optional[Tensor] = None, ): @@ -40,10 +41,6 @@ def __init__( # temporary until dict results are deprecated os.environ['PL_USING_RESULT_OBJ'] = '1' - if early_stop_on is not None: - self.early_stop_on = early_stop_on - if checkpoint_on is not None and checkpoint_on: - self.checkpoint_on = checkpoint_on if hiddens is not None: self.hiddens = hiddens.detach() if minimize is not None: @@ -51,9 +48,6 @@ def __init__( self._assert_grad_tensor_metric('minimize', minimize, err) self.minimize = minimize - if minimize is not None and checkpoint_on is None: - self.checkpoint_on = minimize.detach() - self['meta'] = {'_internal': {'_reduce_on_epoch': False, 'batch_sizes': []}} def __getitem__(self, key: Union[str, Any]) -> Any: @@ -64,9 +58,7 @@ def __getitem__(self, key: Union[str, Any]) -> Any: def __getattr__(self, key: str) -> Any: try: - if key == 'callback_metrics': - return self.get_callback_metrics() - elif key == 'batch_log_metrics': + if key == 'batch_log_metrics': return self.get_batch_log_metrics() elif key == 'batch_pbar_metrics': return self.get_batch_pbar_metrics() @@ -80,16 +72,9 @@ def __getattr__(self, key: str) -> Any: return None def __setattr__(self, key: str, val: Union[Tensor, Any]): - # ensure reserve keys are tensors and detached - if key in {'checkpoint_on', 'early_stop_on'}: - self._assert_tensor_metric(key, val) - if val is not None and isinstance(val, torch.Tensor): - val = val.detach() - - # ensure anything else that is a tensor is detached - elif isinstance(val, torch.Tensor) and key != 'minimize': + # ensure tensors are detached + if isinstance(val, torch.Tensor) and key != 'minimize': val = val.detach() - self[key] = val def __getstate__(self): @@ -98,11 +83,6 @@ def __getstate__(self): def __setstate__(self, d): self.update(d) - def _assert_tensor_metric(self, name: str, potential_metric: Union[bool, Tensor, None, Any]): - if potential_metric is not None and not isinstance(potential_metric, bool): - if not isinstance(potential_metric, Tensor): - raise TypeError(f'{name} must be a torch.Tensor') - def _assert_grad_tensor_metric(self, name: str, x: Union[torch.Tensor, Any], additional_err: str = ''): if x is not None: if not isinstance(x, Tensor): @@ -272,11 +252,6 @@ def get_batch_sizes(self): meta = self['meta'] return torch.tensor(meta['_internal']['batch_sizes']) - def get_callback_metrics(self) -> dict: - result = {'early_stop_on': self.early_stop_on, 'checkpoint_on': self.checkpoint_on} - - return result - def _add_dataloader_idx(self, k: str, dataloader_idx: Union[int, None], add_dataloader_idx: bool) -> str: if dataloader_idx is not None and add_dataloader_idx: return f"{k}/dataloader_idx_{dataloader_idx}" @@ -493,25 +468,28 @@ def padded_gather(cls, outputs): # find the padding used for other values default_padding_idx = 0 for name, value in result.items(): - if isinstance(value, list) and len(value) > 0 and isinstance(value[0], torch.Tensor): - if name not in {'checkpoint_on', 'early_stop_on', 'minimize'}: - default_padding_idx = meta[name]['tbptt_pad_token'] - break + if ( + name != 'minimize' + and isinstance(value, list) + and len(value) > 0 + and isinstance(value[0], torch.Tensor) + ): + default_padding_idx = meta[name]['tbptt_pad_token'] + break # pad across each key individually for name, value in result.items(): - is_reserved = name in {'checkpoint_on', 'early_stop_on', 'minimize'} - if isinstance(value, list) and len(value) > 0 and isinstance(value[0], torch.Tensor): - - if is_reserved: - padding_key = default_padding_idx - else: - padding_key = meta[name]['tbptt_pad_token'] + if ( + isinstance(value, list) + and len(value) > 0 + and isinstance(value[0], torch.Tensor) + ): + padding_key = default_padding_idx if name == 'minimize' else meta[name]['tbptt_pad_token'] padded = torch.nn.utils.rnn.pad_sequence(value, batch_first=True, padding_value=padding_key) result[name] = padded # also update the result - if meta and not is_reserved: + if meta and name != "minimize": meta[name]['value'] = padded if meta: result['meta'] = meta @@ -579,10 +557,7 @@ def reduce_across_time(cls, time_outputs): continue # pick the reduce fx - if k in ['checkpoint_on', 'early_stop_on', 'minimize']: - tbptt_reduce_fx = torch.mean - else: - tbptt_reduce_fx = meta[k]['tbptt_reduce_fx'] + tbptt_reduce_fx = torch.mean if k == "minimize" else meta[k]['tbptt_reduce_fx'] if isinstance(value, list): value = torch.tensor(value) diff --git a/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py b/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py index 595a5e84bf630..d0a75b2463d76 100644 --- a/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py +++ b/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py @@ -327,10 +327,6 @@ def _track_callback_metrics(self, eval_results): elif isinstance(eval_result, dict): flat = flatten_dict(eval_result) - # removing val_loss magic word to map to checkpoint + ES callback - if 'val_loss' in flat: - flat['checkpoint_on'] = flat['val_loss'] - flat['early_stop_on'] = flat['val_loss'] self.trainer.logger_connector.callback_metrics.update(flat) if self.trainer.testing: self.trainer.logger_connector.evaluation_callback_metrics.update(flat) @@ -341,11 +337,6 @@ def _track_callback_metrics(self, eval_results): else: flat = flatten_dict(eval_results) - # removing val_loss magic word to map to checkpoint + ES callback - if 'val_loss' in flat: - flat['checkpoint_on'] = flat['val_loss'] - flat['early_stop_on'] = flat['val_loss'] - self.trainer.logger_connector.callback_metrics.update(flat) if self.trainer.testing: self.trainer.logger_connector.evaluation_callback_metrics.update(flat) @@ -401,9 +392,7 @@ def on_train_epoch_end(self): # inform cached logger connector epoch finished self.cached_results.has_batch_loop_finished = True - def log_train_epoch_end_metrics( - self, epoch_output, checkpoint_accumulator, early_stopping_accumulator, num_optimizers - ): + def log_train_epoch_end_metrics(self, epoch_output, num_optimizers): # epoch output is a list. Each item in that list has all the outputs per optimizer # epoch_output[optimizer_idx][training_step_idx][tbptt_index] # remember that not using truncated backprop is equivalent with truncated back prop of len(1) @@ -412,15 +401,6 @@ def log_train_epoch_end_metrics( epoch_callback_metrics = {} - # ----------------------- - # Calculate epoch callback values if given - # ----------------------- - if checkpoint_accumulator.num_values > 0: - epoch_callback_metrics['checkpoint_on'] = checkpoint_accumulator.mean() - - if early_stopping_accumulator.num_values > 0: - epoch_callback_metrics['early_stop_on'] = early_stopping_accumulator.mean() - # ------------------------ # determine if using a result obj # ------------------------ diff --git a/pytorch_lightning/trainer/evaluation_loop.py b/pytorch_lightning/trainer/evaluation_loop.py index 1fbcc80ca424b..409eba316be95 100644 --- a/pytorch_lightning/trainer/evaluation_loop.py +++ b/pytorch_lightning/trainer/evaluation_loop.py @@ -252,11 +252,6 @@ def __gather_epoch_end_eval_results(self, outputs): eval_results = [] for epoch_output in outputs: result = epoch_output[0].__class__.gather(epoch_output) - if 'checkpoint_on' in result: - result.checkpoint_on = result.checkpoint_on.mean() - if 'early_stop_on' in result: - result.early_stop_on = result.early_stop_on.mean() - eval_results.append(result) # with 1 dataloader don't pass in a list @@ -270,10 +265,6 @@ def __auto_reduce_result_objs(self, outputs): for dl_output in outputs: result = dl_output[0] result = result.__class__.reduce_on_epoch_end(dl_output) - if 'checkpoint_on' in result: - result.checkpoint_on = result.checkpoint_on.mean() - if 'early_stop_on' in result: - result.early_stop_on = result.early_stop_on.mean() eval_results.append(result) return eval_results diff --git a/pytorch_lightning/trainer/supporters.py b/pytorch_lightning/trainer/supporters.py index aff458d1b6084..f884306dc09c8 100644 --- a/pytorch_lightning/trainer/supporters.py +++ b/pytorch_lightning/trainer/supporters.py @@ -104,21 +104,6 @@ def _agg_memory(self, how: str): return getattr(self.memory[:self.current_idx], how)() -class Accumulator(object): - - def __init__(self): - self.num_values = 0 - self.total = 0 - - def accumulate(self, x): - with torch.no_grad(): - self.total += x - self.num_values += 1 - - def mean(self): - return self.total / self.num_values - - class PredictionCollection(object): def __init__(self, global_rank: int, world_size: int): diff --git a/pytorch_lightning/trainer/training_loop.py b/pytorch_lightning/trainer/training_loop.py index f7f44625a3062..2d58294597d76 100644 --- a/pytorch_lightning/trainer/training_loop.py +++ b/pytorch_lightning/trainer/training_loop.py @@ -24,7 +24,7 @@ from pytorch_lightning.core.step_result import Result from pytorch_lightning.plugins import ParallelPlugin from pytorch_lightning.trainer.states import RunningStage, TrainerState -from pytorch_lightning.trainer.supporters import Accumulator, TensorRunningAccum +from pytorch_lightning.trainer.supporters import TensorRunningAccum from pytorch_lightning.utilities import _TPU_AVAILABLE, AMPType, DeviceType, parsing from pytorch_lightning.utilities.distributed import rank_zero_info, rank_zero_warn from pytorch_lightning.utilities.exceptions import MisconfigurationException @@ -38,8 +38,6 @@ class TrainLoop: def __init__(self, trainer, multiple_trainloader_mode): self.trainer = trainer - self.early_stopping_accumulator = None - self.checkpoint_accumulator = None self.accumulated_loss = None self.warning_cache = WarningCache() self._teardown_already_run = False @@ -194,10 +192,6 @@ def on_train_epoch_start(self, epoch): # stores accumulated grad fractions per batch self.accumulated_loss = TensorRunningAccum(window_length=self.trainer.accumulate_grad_batches) - # structured result accumulators for callbacks - self.early_stopping_accumulator = Accumulator() - self.checkpoint_accumulator = Accumulator() - # hook self.trainer.call_hook("on_epoch_start") self.trainer.call_hook("on_train_epoch_start") @@ -509,7 +503,6 @@ def run_training_epoch(self): train_dataloader = self.trainer.data_connector.get_profiled_train_dataloader(train_dataloader) dataloader_idx = 0 - should_check_val = False for batch_idx, (batch, is_last_batch) in train_dataloader: @@ -525,11 +518,7 @@ def run_training_epoch(self): if batch_output.signal == -1: break - batch_end_outputs = self.process_train_step_outputs( - batch_output.training_step_output_for_epoch_end, - self.early_stopping_accumulator, - self.checkpoint_accumulator, - ) + batch_end_outputs = self.process_train_step_outputs(batch_output.training_step_output_for_epoch_end) # hook # TODO: add outputs to batches self.on_train_batch_end(epoch_output, batch_end_outputs, batch, batch_idx, dataloader_idx) @@ -885,32 +874,15 @@ def save_loggers_on_train_batch_end(self): if should_flush_logs and self.trainer.is_global_zero and self.trainer.logger is not None: self.trainer.logger.save() - def process_train_step_outputs(self, all_train_step_outputs, early_stopping_accumulator, checkpoint_accumulator): + def process_train_step_outputs(self, all_train_step_outputs): """ Figure out what needs to be tracked/logged at the end of the epoch """ - # the training step outputs a list per optimizer. The list contains the outputs at each time step # when no TBPTT is used, then the list has 1 item per batch # when TBPTT IS used, then the list has n items (1 per time step) - batch_end_outputs = [] - for optimizer_idx_outputs in all_train_step_outputs: - # extract one representative sample from each time step (1 if no tbptt) and 0th optimizer - if len(optimizer_idx_outputs) == 0: - continue - - sample_output = optimizer_idx_outputs[-1] - - # pull out callback info if available (ie: Results object) - if isinstance(sample_output, dict) and "early_stop_on" in sample_output: - early_stopping_accumulator.accumulate(sample_output["early_stop_on"]) - - if isinstance(sample_output, dict) and "checkpoint_on" in sample_output: - checkpoint_accumulator.accumulate(sample_output["checkpoint_on"]) - - batch_end_outputs.append(optimizer_idx_outputs) - - return batch_end_outputs + # extract one representative sample from each time step (1 if no tbptt) and 0th optimizer + return [opt_idx_out for opt_idx_out in all_train_step_outputs if len(opt_idx_out)] def prepare_optimizers(self): # in manual optimization we loop over all optimizers at once From 305dacf7f6d9a6c2d1504b135ddb17787658bd94 Mon Sep 17 00:00:00 2001 From: Carlos Mocholi Date: Fri, 26 Mar 2021 01:46:54 +0100 Subject: [PATCH 02/17] Remove old code which was adding the return dict items to callback_metrics --- pytorch_lightning/trainer/logging.py | 21 +--------------- .../trainer/logging_/test_logger_connector.py | 24 ++++++++++++++++++- 2 files changed, 24 insertions(+), 21 deletions(-) diff --git a/pytorch_lightning/trainer/logging.py b/pytorch_lightning/trainer/logging.py index 6a036b9fcec49..37911c2de19d7 100644 --- a/pytorch_lightning/trainer/logging.py +++ b/pytorch_lightning/trainer/logging.py @@ -75,20 +75,6 @@ def process_dict_result(self, output, train=False): hiddens = None return output, progress_bar_metrics, log_metrics, callback_metrics, hiddens - # --------------- - # EXTRACT CALLBACK KEYS - # --------------- - # all keys not progress_bar or log are candidates for callbacks - callback_metrics = {} - if isinstance(output, Mapping): - for k, v in output.items(): - if k not in ['progress_bar', 'log', 'hiddens']: - callback_metrics[k] = v - - if train and self._distrib_type in (DistributedType.DP, DistributedType.DDP2): - num_gpus = self.num_gpus - callback_metrics = self.reduce_distributed_output(callback_metrics, num_gpus) - # --------------- # EXTRACT PROGRESS BAR KEYS # --------------- @@ -149,17 +135,12 @@ def process_dict_result(self, output, train=False): # --------------- hiddens = output.get('hiddens', None) if isinstance(output, Mapping) else None - # use every metric passed in as a candidate for callback - callback_metrics.update(progress_bar_metrics) - callback_metrics.update(log_metrics) - # detach all metrics for callbacks to prevent memory leaks # no .item() because it will slow things down - callback_metrics = recursive_detach(callback_metrics) progress_bar_metrics = recursive_detach(progress_bar_metrics) log_metrics = recursive_detach(log_metrics) - return loss, progress_bar_metrics, log_metrics, callback_metrics, hiddens + return loss, progress_bar_metrics, log_metrics, {}, hiddens def reduce_distributed_output(self, output, num_gpus): if num_gpus <= 1: diff --git a/tests/trainer/logging_/test_logger_connector.py b/tests/trainer/logging_/test_logger_connector.py index d14ed71940328..feaa5866929f9 100644 --- a/tests/trainer/logging_/test_logger_connector.py +++ b/tests/trainer/logging_/test_logger_connector.py @@ -461,7 +461,8 @@ class TestModel(BoringModel): def validation_step(self, batch, *args, **kwargs): output = self(batch) - return {"test": output} + self.log('test', output) + return {} # TODO: should be able to have no return def test_step(self, *args, **kwargs): return self.validation_step(*args, **kwargs) @@ -479,6 +480,27 @@ def test_step(self, *args, **kwargs): trainer.test(model) +def test_can_return_tensor_with_more_than_one_element(tmpdir): + """Ensure {validation,test}_step return values are not included as callback metrics. #6623""" + + class TestModel(BoringModel): + + def validation_step(self, batch, *args, **kwargs): + return {"val": torch.tensor([0, 1])} + + def test_step(self, batch, *args, **kwargs): + return {"test": torch.tensor([0, 1])} + + model = TestModel() + model.validation_epoch_end = None + model.test_epoch_end = None + + trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True, progress_bar_refresh_rate=0) + trainer.fit(model) + trainer.validate(model) + trainer.test(model) + + def test_logging_to_progress_bar_with_reserved_key(tmpdir): """ Test that logging a metric with a reserved name to the progress bar raises a warning. """ From bcddef2185545dfcc403e1dd674d3a0a66728a3b Mon Sep 17 00:00:00 2001 From: Carlos Mocholi Date: Fri, 26 Mar 2021 02:33:39 +0100 Subject: [PATCH 03/17] Update legacy tests --- .../callbacks/model_checkpoint.py | 2 +- tests/callbacks/test_early_stopping.py | 30 +-- tests/checkpointing/test_model_checkpoint.py | 4 +- .../test_eval_loop_dict_return.py | 176 ------------------ .../test_trainer_steps_dict_return.py | 22 --- .../logging_/test_eval_loop_logging_1_0.py | 3 +- 6 files changed, 8 insertions(+), 229 deletions(-) diff --git a/pytorch_lightning/callbacks/model_checkpoint.py b/pytorch_lightning/callbacks/model_checkpoint.py index 2781586730151..d9dea5979ae58 100644 --- a/pytorch_lightning/callbacks/model_checkpoint.py +++ b/pytorch_lightning/callbacks/model_checkpoint.py @@ -590,7 +590,7 @@ def _validate_monitor_key(self, trainer): m = ( f"ModelCheckpoint(monitor='{self.monitor}') not found in the returned metrics:" f" {list(metrics.keys())}. " - f"HINT: Did you call self.log('{self.monitor}', tensor) in the LightningModule?" + f"HINT: Did you call self.log('{self.monitor}', value) in the LightningModule?" ) raise MisconfigurationException(m) diff --git a/tests/callbacks/test_early_stopping.py b/tests/callbacks/test_early_stopping.py index 2a15852fc6ee5..cc619077ee136 100644 --- a/tests/callbacks/test_early_stopping.py +++ b/tests/callbacks/test_early_stopping.py @@ -128,7 +128,7 @@ class ModelOverrideValidationReturn(BoringModel): def validation_epoch_end(self, outputs): loss = self.validation_return_values[self.current_epoch] - return {"test_val_loss": loss} + self.log("test_val_loss", loss) model = ModelOverrideValidationReturn() early_stop_callback = EarlyStopping(monitor="test_val_loss", patience=patience, verbose=True) @@ -220,7 +220,7 @@ class CurrentModel(BoringModel): def validation_epoch_end(self, outputs): losses = [8, 4, 2, 3, 4, 5, 8, 10] val_loss = losses[self.current_epoch] - self.log('abc', torch.tensor(val_loss)) + self.log('abc', val_loss) model = CurrentModel() @@ -234,28 +234,6 @@ def validation_epoch_end(self, outputs): assert trainer.current_epoch == 5, 'early_stopping failed' -def test_early_stopping_functionality_arbitrary_key(tmpdir): - """Tests whether early stopping works with a custom key and dictionary results on val step.""" - - class CurrentModel(BoringModel): - - def validation_epoch_end(self, outputs): - losses = [8, 4, 2, 3, 4, 5, 8, 10] - val_loss = losses[self.current_epoch] - return {'jiraffe': torch.tensor(val_loss)} - - model = CurrentModel() - - trainer = Trainer( - default_root_dir=tmpdir, - callbacks=[EarlyStopping(monitor='jiraffe')], - overfit_batches=0.20, - max_epochs=20, - ) - trainer.fit(model) - assert trainer.current_epoch >= 5, 'early_stopping failed' - - @pytest.mark.parametrize('step_freeze, min_steps, min_epochs', [(5, 1, 1), (5, 1, 3), (3, 15, 1)]) def test_min_steps_override_early_stopping_functionality(tmpdir, step_freeze: int, min_steps: int, min_epochs: int): """Excepted Behaviour: @@ -272,7 +250,7 @@ def test_min_steps_override_early_stopping_functionality(tmpdir, step_freeze: in when `early_stopping` is being triggered, THEN the highest between `min_epochs * len(train_dataloader)` and `min_steps` would be reached. - Caviat: IF min_steps is divisible by len(train_dataloader), then it will do min_steps + len(train_dataloader) + Caveat: IF min_steps is divisible by len(train_dataloader), then it will do min_steps + len(train_dataloader) This test validate those expected behaviours """ @@ -309,7 +287,7 @@ def validation_epoch_end(self, outputs): self._count_decrease += 1 self._loss_value -= self._eps self._values.append(_mean) - return {"test_val_loss": _mean} + self.log('test_val_loss', _mean) model = Model(step_freeze) model.training_step_end = None diff --git a/tests/checkpointing/test_model_checkpoint.py b/tests/checkpointing/test_model_checkpoint.py index 75f25b90fa45f..b067d1055dda6 100644 --- a/tests/checkpointing/test_model_checkpoint.py +++ b/tests/checkpointing/test_model_checkpoint.py @@ -724,7 +724,7 @@ def test_model_checkpoint_topk_all(tmpdir): class CustomModel(LogInTwoMethods): def validation_epoch_end(self, outputs): - return {'epoch': self.current_epoch} + self.log('epoch', self.current_epoch) model = CustomModel() checkpoint_callback = ModelCheckpoint( @@ -894,7 +894,7 @@ class ExtendedBoringModel(BoringModel): def validation_step(self, batch, batch_idx): output = self.layer(batch) loss = self.loss(batch, output) - return {"val_loss": loss} + self.log("val_loss", loss) model = ExtendedBoringModel() model.validation_epoch_end = None diff --git a/tests/trainer/legacy_deprecate_flow_log/test_eval_loop_dict_return.py b/tests/trainer/legacy_deprecate_flow_log/test_eval_loop_dict_return.py index 2aac7354c38f6..0894acd5fe72d 100644 --- a/tests/trainer/legacy_deprecate_flow_log/test_eval_loop_dict_return.py +++ b/tests/trainer/legacy_deprecate_flow_log/test_eval_loop_dict_return.py @@ -125,49 +125,6 @@ def test_validation_step_arbitrary_dict_return(tmpdir): assert not model.validation_epoch_end_called -def test_validation_step_dict_return(tmpdir): - """ - Test that val step can return a dict with all the expected keys and they end up - in the correct place - """ - - model = DeterministicModel() - model.training_step = model.training_step__dict_return - model.validation_step = model.validation_step__dict_return - model.validation_step_end = None - model.validation_epoch_end = None - - trainer = Trainer( - default_root_dir=tmpdir, - weights_summary=None, - limit_train_batches=2, - limit_val_batches=2, - max_epochs=2, - ) - trainer.fit(model) - - # out are the results of the full loop - # eval_results are output of _evaluate - callback_metrics, eval_results = trainer.run_evaluation() - assert len(callback_metrics) == 1 - assert len(callback_metrics[0]) == 5 - assert len(eval_results) == 2 - assert eval_results[0]['log']['log_acc1'] == 12 - assert eval_results[1]['log']['log_acc1'] == 13 - - for k in ['val_loss', 'log', 'progress_bar']: - assert k in eval_results[0] - assert k in eval_results[1] - - # ensure all the keys ended up as candidates for callbacks - assert len(trainer.logger_connector.callback_metrics) in [7, 8] - - # make sure correct steps were called - assert model.validation_step_called - assert not model.validation_step_end_called - assert not model.validation_epoch_end_called - - def test_val_step_step_end_no_return(tmpdir): """ Test that val step + val step end work (with no return in val step end) @@ -198,136 +155,3 @@ def test_val_step_step_end_no_return(tmpdir): assert model.validation_step_called assert model.validation_step_end_called assert not model.validation_epoch_end_called - - -def test_val_step_step_end(tmpdir): - """ - Test that val step + val step end work - """ - - model = DeterministicModel() - model.training_step = model.training_step__dict_return - model.validation_step = model.validation_step__dict_return - model.validation_step_end = model.validation_step_end - model.validation_epoch_end = None - - trainer = Trainer( - default_root_dir=tmpdir, - weights_summary=None, - limit_train_batches=2, - limit_val_batches=2, - max_epochs=2, - ) - trainer.fit(model) - - # out are the results of the full loop - # eval_results are output of _evaluate - callback_metrics, eval_results = trainer.run_evaluation() - assert len(callback_metrics) == 1 - assert len(callback_metrics[0]) == 6 - - callback_metrics = callback_metrics[0] - assert callback_metrics['val_step_end'] == 1802 - assert len(eval_results) == 2 - assert eval_results[0]['log']['log_acc1'] == 12 - assert eval_results[1]['log']['log_acc1'] == 13 - - for k in ['val_loss', 'log', 'progress_bar']: - assert k in eval_results[0] - assert k in eval_results[1] - - # ensure all the keys ended up as candidates for callbacks - assert len(trainer.logger_connector.callback_metrics) in [8, 9] - - # make sure correct steps were called - assert model.validation_step_called - assert model.validation_step_end_called - assert not model.validation_epoch_end_called - - -def test_no_val_step_end(tmpdir): - """ - Test that val step + val epoch end - """ - - model = DeterministicModel() - model.training_step = model.training_step__dict_return - model.validation_step = model.validation_step__dict_return - model.validation_step_end = None - model.validation_epoch_end = model.validation_epoch_end - - trainer = Trainer( - default_root_dir=tmpdir, - weights_summary=None, - limit_train_batches=2, - limit_val_batches=3, - num_sanity_val_steps=0, - max_epochs=2 - ) - trainer.fit(model) - - # out are the results of the full loop - # eval_results are output of _evaluate - callback_metrics, eval_results = trainer.run_evaluation() - assert len(callback_metrics) == 1 - assert len(callback_metrics[0]) == 6 - assert len(eval_results) == 1 - - eval_results = eval_results[0] - assert 'val_step_end' not in eval_results - assert eval_results['val_epoch_end'] == 1233 - - for k in ['val_loss', 'log', 'progress_bar']: - assert k in eval_results - - # ensure all the keys ended up as candidates for callbacks - assert len(trainer.logger_connector.callback_metrics) in [8, 9] - - # make sure correct steps were called - assert model.validation_step_called - assert not model.validation_step_end_called - assert model.validation_epoch_end_called - - -def test_full_val_loop(tmpdir): - """ - Test that val step + val step end + val epoch end - """ - - model = DeterministicModel() - model.training_step = model.training_step__dict_return - model.validation_step = model.validation_step__dict_return - model.validation_step_end = model.validation_step_end - model.validation_epoch_end = model.validation_epoch_end - - trainer = Trainer( - default_root_dir=tmpdir, - weights_summary=None, - limit_train_batches=2, - limit_val_batches=3, - num_sanity_val_steps=0, - max_epochs=2 - ) - trainer.fit(model) - - # out are the results of the full loop - # eval_results are output of _evaluate - callback_metrics, eval_results = trainer.run_evaluation() - assert len(callback_metrics) == 1 - assert len(callback_metrics[0]) == 7 - assert len(eval_results) == 1 - - eval_results = eval_results[0] - assert eval_results['val_step_end'] == 1802 - assert eval_results['val_epoch_end'] == 1233 - - for k in ['val_loss', 'log', 'progress_bar']: - assert k in eval_results - - # ensure all the keys ended up as candidates for callbacks - assert len(trainer.logger_connector.callback_metrics) in [9, 10] - - # make sure correct steps were called - assert model.validation_step_called - assert model.validation_step_end_called - assert model.validation_epoch_end_called diff --git a/tests/trainer/legacy_deprecate_flow_log/test_trainer_steps_dict_return.py b/tests/trainer/legacy_deprecate_flow_log/test_trainer_steps_dict_return.py index 9c114f72080d8..3f60e6060d2ae 100644 --- a/tests/trainer/legacy_deprecate_flow_log/test_trainer_steps_dict_return.py +++ b/tests/trainer/legacy_deprecate_flow_log/test_trainer_steps_dict_return.py @@ -171,28 +171,6 @@ def test_result_obj_lr_scheduler_epoch(tmpdir): assert len(trainer.dev_debugger.saved_lr_scheduler_updates) == 3 -@mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"}) -def test_result_obj_lr_scheduler_step(tmpdir): - """ - test that the LR scheduler was called at the correct time with the correct metrics - """ - model = DeterministicModel() - model.training_step = model.training_step__for_step_end_dict - model.training_step_end = model.training_step_end__dict - model.training_epoch_end = model.training_epoch_end__dict - model.val_dataloader = None - model.configure_optimizers = model.configure_optimizers__lr_on_plateau_step - - trainer = Trainer( - default_root_dir=tmpdir, - max_epochs=2, - weights_summary=None, - ) - trainer.fit(model) - - assert len(trainer.dev_debugger.saved_lr_scheduler_updates) == 8 - - def test_train_step_epoch_end(tmpdir): """ Checks train_step + training_epoch_end (NO training_step_end) diff --git a/tests/trainer/logging_/test_eval_loop_logging_1_0.py b/tests/trainer/logging_/test_eval_loop_logging_1_0.py index 674e2aeb6511b..ddd69f8b9ad34 100644 --- a/tests/trainer/logging_/test_eval_loop_logging_1_0.py +++ b/tests/trainer/logging_/test_eval_loop_logging_1_0.py @@ -849,7 +849,7 @@ def validation_step(self, batch, batch_idx): self.log('valid_loss_1', loss, on_step=False, on_epoch=True) self.log('valid_loss_2', loss, on_step=True, on_epoch=False) self.log('valid_loss_3', loss, on_step=False, on_epoch=False) - return {"val_loss": loss} + return {"val_loss": loss} # not added to callback_metrics def test_step(self, batch, batch_idx): output = self.layer(batch) @@ -926,7 +926,6 @@ def get_metrics_at_idx(idx): 'debug_epoch', 'valid_loss_1', 'test_loss', - 'val_loss', } assert set(trainer.callback_metrics) == expected_callback_metrics assert set(results[0]) == {'test_loss', 'debug_epoch'} From 24e7c3f3302d2fb8822afc284e57d95d84e0d365 Mon Sep 17 00:00:00 2001 From: Carlos Mocholi Date: Fri, 26 Mar 2021 02:41:59 +0100 Subject: [PATCH 04/17] Remove dead code --- .../logger_connector/logger_connector.py | 38 +++++-------------- pytorch_lightning/trainer/logging.py | 5 +-- pytorch_lightning/trainer/trainer.py | 9 ----- pytorch_lightning/trainer/training_loop.py | 3 +- 4 files changed, 13 insertions(+), 42 deletions(-) diff --git a/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py b/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py index 0d0c3781c7724..c6b35a1274e77 100644 --- a/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py +++ b/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py @@ -185,9 +185,6 @@ def cache_training_step_metrics(self, opt_closure_result): batch_log_metrics = opt_closure_result.training_step_output.log_metrics logged_metrics_tmp.update(batch_log_metrics) - callback_metrics = opt_closure_result.training_step_output.callback_metrics - callback_metrics_tmp.update(callback_metrics) - batch_pbar_metrics = opt_closure_result.training_step_output.pbar_on_batch_end pbar_metrics_tmp.update(batch_pbar_metrics) @@ -348,9 +345,9 @@ def _track_callback_metrics(self, eval_results): if self.trainer.state in (TrainerState.TESTING, TrainerState.VALIDATING): self.trainer.logger_connector.evaluation_callback_metrics.update(flat) - def __process_eval_epoch_end_results_and_log_legacy_update(self, prog_bar_metrics, log_metrics, callback_metrics): + def __process_eval_epoch_end_results_and_log_legacy_update(self, prog_bar_metrics, log_metrics): # eval loop returns all metrics - dataloader_result_metrics = {**prog_bar_metrics, **log_metrics, **callback_metrics} + dataloader_result_metrics = {**prog_bar_metrics, **log_metrics} # add metrics to prog bar self.trainer.logger_connector.add_progress_bar_metrics(prog_bar_metrics) @@ -359,13 +356,6 @@ def __process_eval_epoch_end_results_and_log_legacy_update(self, prog_bar_metric if len(log_metrics) > 0: self.trainer.logger_connector.log_metrics(log_metrics, {}) - # track metrics for callbacks (all prog bar, logged and callback metrics) - callback_metrics.update(log_metrics) - callback_metrics.update(prog_bar_metrics) - self.trainer.logger_connector.callback_metrics.update(callback_metrics) - if self.trainer.state in (TrainerState.TESTING, TrainerState.VALIDATING): - self.trainer.logger_connector.evaluation_callback_metrics.update(callback_metrics) - if len(dataloader_result_metrics) > 0: self.eval_loop_results.append(dataloader_result_metrics) @@ -380,20 +370,16 @@ def __process_eval_epoch_end_results_and_log_legacy(self, eval_results): eval_results = [eval_results] num_loaders: int = self.trainer.evaluation_loop.num_dataloaders - prog_bar_metrics, log_metrics, callback_metrics = {}, {}, {} + prog_bar_metrics, log_metrics = {}, {} for result_idx, result in enumerate(eval_results): - _, prog_bar_metrics, log_metrics, callback_metrics, _ = self.trainer.process_dict_result(result) + _, prog_bar_metrics, log_metrics, _ = self.trainer.process_dict_result(result) if num_loaders > 1: - self.__process_eval_epoch_end_results_and_log_legacy_update( - prog_bar_metrics, log_metrics, callback_metrics - ) + self.__process_eval_epoch_end_results_and_log_legacy_update(prog_bar_metrics, log_metrics) if num_loaders == 1: - self.__process_eval_epoch_end_results_and_log_legacy_update( - prog_bar_metrics, log_metrics, callback_metrics - ) + self.__process_eval_epoch_end_results_and_log_legacy_update(prog_bar_metrics, log_metrics) def on_train_epoch_end(self): # inform cached logger connector epoch finished @@ -446,10 +432,9 @@ def log_train_epoch_end_metrics( # TODO: deprecate 1.0 else: - out = self.__run_legacy_training_epoch_end( - num_optimizers, epoch_output, model, is_result_obj, epoch_callback_metrics + epoch_log_metrics, epoch_progress_bar_metrics = self.__run_legacy_training_epoch_end( + num_optimizers, epoch_output, model, is_result_obj ) - epoch_log_metrics, epoch_progress_bar_metrics, epoch_callback_metrics = out # it will perform reduction over epoch and return log metrics cached_epoch_log_metrics = self.cached_results.get_epoch_log_metrics() @@ -501,9 +486,7 @@ def training_epoch_end(self, model, epoch_output, num_optimizers): # capture logging self.trainer.logger_connector.cache_logged_metrics() - def __run_legacy_training_epoch_end( - self, num_optimizers, epoch_output, model, is_result_obj, epoch_callback_metrics - ): + def __run_legacy_training_epoch_end(self, num_optimizers, epoch_output, model, is_result_obj): epoch_log_metrics = {} epoch_progress_bar_metrics = {} @@ -534,7 +517,6 @@ def __run_legacy_training_epoch_end( _processed_outputs = self.trainer.process_dict_result(epoch_output) epoch_progress_bar_metrics = _processed_outputs[1] epoch_log_metrics = _processed_outputs[2] - epoch_callback_metrics = _processed_outputs[3] # -------------------------- # Structured Result (auto epoch end) @@ -542,7 +524,7 @@ def __run_legacy_training_epoch_end( elif is_result_obj: epoch_log_metrics, epoch_progress_bar_metrics = self.__auto_reduce_results_on_epoch_end(epoch_output) - return epoch_log_metrics, epoch_progress_bar_metrics, epoch_callback_metrics + return epoch_log_metrics, epoch_progress_bar_metrics def __auto_reduce_results_on_epoch_end(self, epoch_output): epoch_log_metrics = {} diff --git a/pytorch_lightning/trainer/logging.py b/pytorch_lightning/trainer/logging.py index 37911c2de19d7..1b897f3844a78 100644 --- a/pytorch_lightning/trainer/logging.py +++ b/pytorch_lightning/trainer/logging.py @@ -71,9 +71,8 @@ def process_dict_result(self, output, train=False): if isinstance(output, torch.Tensor): progress_bar_metrics = {} log_metrics = {} - callback_metrics = {} hiddens = None - return output, progress_bar_metrics, log_metrics, callback_metrics, hiddens + return output, progress_bar_metrics, log_metrics, hiddens # --------------- # EXTRACT PROGRESS BAR KEYS @@ -140,7 +139,7 @@ def process_dict_result(self, output, train=False): progress_bar_metrics = recursive_detach(progress_bar_metrics) log_metrics = recursive_detach(log_metrics) - return loss, progress_bar_metrics, log_metrics, {}, hiddens + return loss, progress_bar_metrics, log_metrics, hiddens def reduce_distributed_output(self, output, num_gpus): if num_gpus <= 1: diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index c692c3f1c113f..f0f1d3e6b11e1 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -823,15 +823,6 @@ def run_sanity_check(self, ref_model): # run eval step _, eval_results = self.run_evaluation() - # allow no returns from eval - if eval_results is not None and len(eval_results) > 0: - # when we get a list back, used only the last item - if isinstance(eval_results, list): - eval_results = eval_results[-1] - - _, _, _, callback_metrics, _ = self.process_dict_result(eval_results) - self.logger_connector.callback_metrics = callback_metrics - self.on_sanity_check_end() self._running_stage = stage diff --git a/pytorch_lightning/trainer/training_loop.py b/pytorch_lightning/trainer/training_loop.py index 427ef8100af28..a77524b8a55e7 100644 --- a/pytorch_lightning/trainer/training_loop.py +++ b/pytorch_lightning/trainer/training_loop.py @@ -348,8 +348,7 @@ def _process_training_step_output(self, training_step_output, split_batch): batch_loss=training_step_output[0], pbar_on_batch_end=training_step_output[1], log_metrics=training_step_output[2], - callback_metrics=training_step_output[3], - hiddens=training_step_output[4], + hiddens=training_step_output[3], ) # if the user decides to finally reduce things in epoch_end, save raw output without graphs if isinstance(training_step_output_for_epoch_end, torch.Tensor): From fd9b4cb6faac23e14dfe571acfdac4e50f3a5b90 Mon Sep 17 00:00:00 2001 From: Carlos Mocholi Date: Fri, 26 Mar 2021 03:19:31 +0100 Subject: [PATCH 05/17] Fix tests --- .../connectors/logger_connector/epoch_result_store.py | 1 - .../connectors/logger_connector/logger_connector.py | 7 +------ pytorch_lightning/trainer/logging.py | 7 +++++++ tests/base/model_valid_epoch_ends.py | 5 ++--- tests/checkpointing/test_model_checkpoint.py | 8 +------- tests/trainer/logging_/test_logger_connector.py | 1 - 6 files changed, 11 insertions(+), 18 deletions(-) diff --git a/pytorch_lightning/trainer/connectors/logger_connector/epoch_result_store.py b/pytorch_lightning/trainer/connectors/logger_connector/epoch_result_store.py index 7759c8028d325..4a57b14efd89b 100644 --- a/pytorch_lightning/trainer/connectors/logger_connector/epoch_result_store.py +++ b/pytorch_lightning/trainer/connectors/logger_connector/epoch_result_store.py @@ -346,7 +346,6 @@ def update_logger_connector(self) -> Tuple[Dict, Dict]: # update callback_metrics logger_connector._callback_metrics.update(callback_metrics) - logger_connector._callback_metrics.pop("epoch", None) batch_pbar_metrics.pop("debug_epoch", None) return batch_pbar_metrics, batch_log_metrics diff --git a/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py b/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py index c6b35a1274e77..1345ce1991086 100644 --- a/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py +++ b/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py @@ -78,7 +78,7 @@ def progress_bar_metrics(self, progress_bar_metrics: Dict) -> None: @property def cached_results(self) -> Union[EpochResultStore, None]: - return self._cached_results.get(self.trainer._running_stage) # type: ignore + return self._cached_results.get(self.trainer._running_stage) def get_metrics(self, key: str) -> Dict: metrics_holder: MetricsHolder = getattr(self, f"_{key}") @@ -121,8 +121,6 @@ def cache_logged_metrics(self): def on_trainer_init(self, logger, flush_logs_every_n_steps: int, log_every_n_steps: int, move_metrics_to_cpu: bool): # logging self.configure_logger(logger) - # todo: IDE is complaining, these shall be initialized in the Trainer init at leas as placeholders - # and assign here the desired value self.trainer.flush_logs_every_n_steps = flush_logs_every_n_steps self.trainer.log_every_n_steps = log_every_n_steps self.trainer.move_metrics_to_cpu = move_metrics_to_cpu @@ -207,9 +205,6 @@ def log_metrics(self, metrics, grad_norm_dic, step=None): metrics (dict): Metric values grad_norm_dic (dict): Gradient norms step (int): Step for which metrics should be logged. Default value corresponds to `self.global_step` - log_train_step_metrics (bool): Used to track if `log_metrics` function is being called in during training - steps. In training steps, we will log metrics on step: `total_nb_idx` (for accumulated gradients) - and global_step for the rest. """ # add gpu memory if self.trainer._device_type == DeviceType.GPU and self.log_gpu_memory: diff --git a/pytorch_lightning/trainer/logging.py b/pytorch_lightning/trainer/logging.py index 1b897f3844a78..1eb77d517d952 100644 --- a/pytorch_lightning/trainer/logging.py +++ b/pytorch_lightning/trainer/logging.py @@ -20,6 +20,7 @@ from pytorch_lightning.utilities import DistributedType from pytorch_lightning.utilities.distributed import rank_zero_warn +from pytorch_lightning.utilities.exceptions import MisconfigurationException from pytorch_lightning.utilities.memory import recursive_detach @@ -32,8 +33,14 @@ class TrainerLoggingMixin(ABC): def metrics_to_scalars(self, metrics): new_metrics = {} + # TODO: this is duplicated in MetricsHolder. should be unified for k, v in metrics.items(): if isinstance(v, torch.Tensor): + if v.numel() != 1: + raise MisconfigurationException( + f"The metric `{k}` does not contain a single element" + f" thus it cannot be converted to float. Found `{v}`" + ) v = v.item() if isinstance(v, dict): diff --git a/tests/base/model_valid_epoch_ends.py b/tests/base/model_valid_epoch_ends.py index dd29d355a4a98..7b83670acacef 100644 --- a/tests/base/model_valid_epoch_ends.py +++ b/tests/base/model_valid_epoch_ends.py @@ -43,9 +43,8 @@ def _mean(res, key): val_loss_mean = val_loss_mean.item() val_acc_mean = val_acc_mean.item() - metrics_dict = {'early_stop_on': val_loss_mean, 'val_acc': val_acc_mean} - results = {'progress_bar': metrics_dict, 'log': metrics_dict} - return results + self.log('early_stop_on', val_loss_mean, prog_bar=True) + self.log('val_acc', val_acc_mean, prog_bar=True) def validation_epoch_end__multiple_dataloaders(self, outputs): """ diff --git a/tests/checkpointing/test_model_checkpoint.py b/tests/checkpointing/test_model_checkpoint.py index b067d1055dda6..5e50543d37e5f 100644 --- a/tests/checkpointing/test_model_checkpoint.py +++ b/tests/checkpointing/test_model_checkpoint.py @@ -51,7 +51,6 @@ def training_step(self, batch, batch_idx): def validation_epoch_end(self, outputs): outs = torch.stack([x['x'] for x in outputs]).mean() - self.log('epoch', self.current_epoch) self.log('val_acc', outs) @@ -721,12 +720,7 @@ def test_model_checkpoint_topk_all(tmpdir): seed_everything(1000) epochs = 3 - class CustomModel(LogInTwoMethods): - - def validation_epoch_end(self, outputs): - self.log('epoch', self.current_epoch) - - model = CustomModel() + model = BoringModel() checkpoint_callback = ModelCheckpoint( dirpath=tmpdir, filename="{epoch}", diff --git a/tests/trainer/logging_/test_logger_connector.py b/tests/trainer/logging_/test_logger_connector.py index feaa5866929f9..819248ffd6dcf 100644 --- a/tests/trainer/logging_/test_logger_connector.py +++ b/tests/trainer/logging_/test_logger_connector.py @@ -462,7 +462,6 @@ class TestModel(BoringModel): def validation_step(self, batch, *args, **kwargs): output = self(batch) self.log('test', output) - return {} # TODO: should be able to have no return def test_step(self, *args, **kwargs): return self.validation_step(*args, **kwargs) From 5557e242a023666e9f3ee50490b8509774a5cf00 Mon Sep 17 00:00:00 2001 From: Carlos Mocholi Date: Fri, 26 Mar 2021 03:24:11 +0100 Subject: [PATCH 06/17] Fix tests --- .../logging_/test_eval_loop_logging_1_0.py | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/tests/trainer/logging_/test_eval_loop_logging_1_0.py b/tests/trainer/logging_/test_eval_loop_logging_1_0.py index ddd69f8b9ad34..32bff96baf566 100644 --- a/tests/trainer/logging_/test_eval_loop_logging_1_0.py +++ b/tests/trainer/logging_/test_eval_loop_logging_1_0.py @@ -372,11 +372,10 @@ def test_multi_dataloaders_add_suffix_properly(tmpdir): class TestModel(BoringModel): - def test_step(self, batch, batch_idx, dataloader_idx): + def test_step(self, batch, *args): output = self.layer(batch) loss = self.loss(batch, output) self.log("test_loss", loss, on_step=True, on_epoch=True) - return {"y": loss} def test_dataloader(self): return [ @@ -397,22 +396,19 @@ def test_dataloader(self): weights_summary=None, ) results = trainer.test(model) - assert "test_loss_epoch/dataloader_idx_0" in results[0] - assert "test_loss_epoch/dataloader_idx_1" in results[1] + + assert {"test_loss/dataloader_idx_0", "test_loss_epoch/dataloader_idx_0"} == set(results[0]) + assert {"test_loss/dataloader_idx_1", "test_loss_epoch/dataloader_idx_1"} == set(results[1]) def test_single_dataloader_no_suffix_added(tmpdir): class TestModel(BoringModel): - def test_step(self, batch, batch_idx): + def test_step(self, batch, *args): output = self.layer(batch) loss = self.loss(batch, output) self.log("test_loss", loss, on_step=True, on_epoch=True) - return {"y": loss} - - def test_dataloader(self): - return torch.utils.data.DataLoader(RandomDataset(32, 64)) model = TestModel() model.test_epoch_end = None @@ -427,9 +423,9 @@ def test_dataloader(self): weights_summary=None, ) results = trainer.test(model) + assert len(results) == 1 - # error : It is wrong there. `y` should equal test_loss_epoch - assert results[0]['test_loss'] == results[0]['y'] + assert {"test_loss", "test_loss_epoch"} == set(results[0]) @mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"}) From 9cdf20e907dc0de9ba1da57d6331cbb1558554ff Mon Sep 17 00:00:00 2001 From: Carlos Mocholi Date: Fri, 26 Mar 2021 03:41:53 +0100 Subject: [PATCH 07/17] Docs --- docs/source/ecosystem/asr_nlp_tts.rst | 10 +++++----- docs/source/ecosystem/bolts.rst | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/source/ecosystem/asr_nlp_tts.rst b/docs/source/ecosystem/asr_nlp_tts.rst index 49bed0a981a6e..af9a7084583f2 100644 --- a/docs/source/ecosystem/asr_nlp_tts.rst +++ b/docs/source/ecosystem/asr_nlp_tts.rst @@ -270,12 +270,12 @@ with PyTorch Lightning since every NeMo model is a Lightning Module. log_probs=log_probs, targets=transcript, input_lengths=encoded_len, target_lengths=transcript_len ) wer_num, wer_denom = self._wer(predictions, transcript, transcript_len) - tensorboard_logs = { + self.log_dict({ 'train_loss': loss_value, 'training_batch_wer': wer_num / wer_denom, 'learning_rate': self._optimizer.param_groups[0]['lr'], - } - return {'loss': loss_value, 'log': tensorboard_logs} + }) + return loss_value Neural Types in NeMo ASR ------------------------ @@ -539,8 +539,8 @@ since every NeMo model is a Lightning Module. logits = self(input_ids=input_ids, token_type_ids=input_type_ids, attention_mask=input_mask) loss = self.loss(logits=logits, labels=labels, loss_mask=loss_mask) - tensorboard_logs = {'train_loss': loss, 'lr': self._optimizer.param_groups[0]['lr']} - return {'loss': loss, 'log': tensorboard_logs} + self.log_dict({'train_loss': loss, 'lr': self._optimizer.param_groups[0]['lr']}) + return loss ... Neural Types in NeMo NLP diff --git a/docs/source/ecosystem/bolts.rst b/docs/source/ecosystem/bolts.rst index 9133176cab912..f3a4ab9c858be 100644 --- a/docs/source/ecosystem/bolts.rst +++ b/docs/source/ecosystem/bolts.rst @@ -68,8 +68,8 @@ you can trust the implementations and use them to bootstrap your research much f loss = self.criterion(logits.view(-1, logits.size(-1)), x.view(-1).long()) - logs = {"loss": loss} - return {"loss": loss, "log": logs} + self.log("loss", loss) + return loss ---------- From 108fbd3ec320d92d9c4ec52f4518e320d2024489 Mon Sep 17 00:00:00 2001 From: Carlos Mocholi Date: Fri, 26 Mar 2021 03:44:25 +0100 Subject: [PATCH 08/17] CHANGELOG --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 59dd0169173be..1916dbcf998ef 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -147,6 +147,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Removed deprecated `LightningModule` `hparams` setter ([#6207](https://github.com/PyTorchLightning/pytorch-lightning/pull/6207)) +- Removed legacy code to include `step` dictionary returns in `callback_metrics` ([#6682](https://github.com/PyTorchLightning/pytorch-lightning/pull/6682)) + + - Removed `optimizer_idx` argument from `training_step` in manual optimization ([#6093](https://github.com/PyTorchLightning/pytorch-lightning/pull/6093)) From 5aacda21c8818cb67b2d7da9faabbd5dc85384ad Mon Sep 17 00:00:00 2001 From: Carlos Mocholi Date: Fri, 26 Mar 2021 03:57:20 +0100 Subject: [PATCH 09/17] Fix --- pytorch_lightning/trainer/training_loop.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pytorch_lightning/trainer/training_loop.py b/pytorch_lightning/trainer/training_loop.py index 1445e601fbf5f..1c440693b3158 100644 --- a/pytorch_lightning/trainer/training_loop.py +++ b/pytorch_lightning/trainer/training_loop.py @@ -533,9 +533,7 @@ def run_training_epoch(self): self.on_train_epoch_end(epoch_output) # log epoch metrics - self.trainer.logger_connector.log_train_epoch_end_metrics( - epoch_output, self.checkpoint_accumulator, self.early_stopping_accumulator, self.num_optimizers - ) + self.trainer.logger_connector.log_train_epoch_end_metrics(epoch_output, self.num_optimizers) should_check_val = self.should_check_val_fx(batch_idx, is_last_batch, on_epoch=True) should_skip_eval = self.trainer.evaluation_loop.should_skip_evaluation(self.trainer.num_val_batches) From 74f142497f3e4a4b33938c0af40afb7d5f99714a Mon Sep 17 00:00:00 2001 From: Carlos Mocholi Date: Fri, 26 Mar 2021 04:12:59 +0100 Subject: [PATCH 10/17] Docs --- docs/source/common/lightning_module.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/source/common/lightning_module.rst b/docs/source/common/lightning_module.rst index 6e67f591da7c7..d4a6e3ae94dfc 100644 --- a/docs/source/common/lightning_module.rst +++ b/docs/source/common/lightning_module.rst @@ -602,7 +602,6 @@ For cases like production, you might want to iterate different models inside a L loss = F.cross_entropy(y_hat, y) acc = FM.accuracy(y_hat, y) - # loss is tensor. The Checkpoint Callback is monitoring 'checkpoint_on' metrics = {'val_acc': acc, 'val_loss': loss} self.log_dict(metrics) return metrics From 97cf4b4befd659f5f721fd5212cd65ea962f689d Mon Sep 17 00:00:00 2001 From: Carlos Mocholi Date: Fri, 26 Mar 2021 04:14:23 +0100 Subject: [PATCH 11/17] CHANGELOG --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b263db7ddff13..17dbd44c3e9ee 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -147,6 +147,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Removed `mode='auto'` from `EarlyStopping` ([#6167](https://github.com/PyTorchLightning/pytorch-lightning/pull/6167)) +- Removed legacy references for magic keys in the `Result` object ([#6016](https://github.com/PyTorchLightning/pytorch-lightning/pull/6016)) + + - Removed deprecated `LightningModule` `hparams` setter ([#6207](https://github.com/PyTorchLightning/pytorch-lightning/pull/6207)) From 265d06d254d6b0173ac0a5ce43c397871f6e8a19 Mon Sep 17 00:00:00 2001 From: Carlos Mocholi Date: Fri, 26 Mar 2021 04:14:56 +0100 Subject: [PATCH 12/17] Remove hiddens --- pytorch_lightning/core/step_result.py | 26 +++---------------- .../logger_connector/logger_connector.py | 2 +- pytorch_lightning/trainer/logging.py | 12 +++------ pytorch_lightning/trainer/trainer.py | 2 +- pytorch_lightning/trainer/training_loop.py | 14 ---------- 5 files changed, 8 insertions(+), 48 deletions(-) diff --git a/pytorch_lightning/core/step_result.py b/pytorch_lightning/core/step_result.py index 9f65687ba88d4..aff79048034a0 100644 --- a/pytorch_lightning/core/step_result.py +++ b/pytorch_lightning/core/step_result.py @@ -23,26 +23,16 @@ from torchmetrics import Metric from pytorch_lightning.utilities.distributed import sync_ddp_if_available -from pytorch_lightning.utilities.warnings import WarningCache - -warning_cache = WarningCache() class Result(Dict): - def __init__( - self, - minimize: Optional[Tensor] = None, - hiddens: Optional[Tensor] = None, - ): - + def __init__(self, minimize: Optional[Tensor] = None): super().__init__() # temporary until dict results are deprecated os.environ['PL_USING_RESULT_OBJ'] = '1' - if hiddens is not None: - self.hiddens = hiddens.detach() if minimize is not None: err = 'Minimize can only be used in training_step, training_step_end, training_epoch_end' self._assert_grad_tensor_metric('minimize', minimize, err) @@ -471,9 +461,7 @@ def padded_gather(cls, outputs): default_padding_idx = 0 for name, value in result.items(): if ( - name != 'minimize' - and isinstance(value, list) - and len(value) > 0 + name != 'minimize' and isinstance(value, list) and len(value) > 0 and isinstance(value[0], torch.Tensor) ): default_padding_idx = meta[name]['tbptt_pad_token'] @@ -481,11 +469,7 @@ def padded_gather(cls, outputs): # pad across each key individually for name, value in result.items(): - if ( - isinstance(value, list) - and len(value) > 0 - and isinstance(value[0], torch.Tensor) - ): + if (isinstance(value, list) and len(value) > 0 and isinstance(value[0], torch.Tensor)): padding_key = default_padding_idx if name == 'minimize' else meta[name]['tbptt_pad_token'] padded = torch.nn.utils.rnn.pad_sequence(value, batch_first=True, padding_value=padding_key) result[name] = padded @@ -587,10 +571,6 @@ def dp_reduce(self): def should_reduce_on_epoch_end(self) -> bool: return self['meta']['_internal']['_reduce_on_epoch'] - def drop_hiddens(self): - if 'hiddens' in self: - del self['hiddens'] - def rename_keys(self, map_dict: dict): """ Maps key values to the target values. Useful when renaming variables in mass. diff --git a/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py b/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py index 91770b42d73ab..a41e589216d21 100644 --- a/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py +++ b/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py @@ -374,7 +374,7 @@ def __process_eval_epoch_end_results_and_log_legacy(self, eval_results): prog_bar_metrics, log_metrics, callback_metrics = {}, {}, {} for result_idx, result in enumerate(eval_results): - _, prog_bar_metrics, log_metrics, callback_metrics, _ = self.trainer.process_dict_result(result) + _, prog_bar_metrics, log_metrics, callback_metrics = self.trainer.process_dict_result(result) if num_loaders > 1: self.__process_eval_epoch_end_results_and_log_legacy_update( diff --git a/pytorch_lightning/trainer/logging.py b/pytorch_lightning/trainer/logging.py index 6a036b9fcec49..4e022695bc807 100644 --- a/pytorch_lightning/trainer/logging.py +++ b/pytorch_lightning/trainer/logging.py @@ -72,8 +72,7 @@ def process_dict_result(self, output, train=False): progress_bar_metrics = {} log_metrics = {} callback_metrics = {} - hiddens = None - return output, progress_bar_metrics, log_metrics, callback_metrics, hiddens + return output, progress_bar_metrics, log_metrics, callback_metrics # --------------- # EXTRACT CALLBACK KEYS @@ -82,7 +81,7 @@ def process_dict_result(self, output, train=False): callback_metrics = {} if isinstance(output, Mapping): for k, v in output.items(): - if k not in ['progress_bar', 'log', 'hiddens']: + if k not in ['progress_bar', 'log']: callback_metrics[k] = v if train and self._distrib_type in (DistributedType.DP, DistributedType.DDP2): @@ -144,11 +143,6 @@ def process_dict_result(self, output, train=False): if self._distrib_type in (DistributedType.DP, DistributedType.DDP2): loss = self.reduce_distributed_output(loss, self.num_gpus) - # --------------- - # EXTRACT HIDDEN - # --------------- - hiddens = output.get('hiddens', None) if isinstance(output, Mapping) else None - # use every metric passed in as a candidate for callback callback_metrics.update(progress_bar_metrics) callback_metrics.update(log_metrics) @@ -159,7 +153,7 @@ def process_dict_result(self, output, train=False): progress_bar_metrics = recursive_detach(progress_bar_metrics) log_metrics = recursive_detach(log_metrics) - return loss, progress_bar_metrics, log_metrics, callback_metrics, hiddens + return loss, progress_bar_metrics, log_metrics, callback_metrics def reduce_distributed_output(self, output, num_gpus): if num_gpus <= 1: diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index c692c3f1c113f..6de43c2b945d6 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -829,7 +829,7 @@ def run_sanity_check(self, ref_model): if isinstance(eval_results, list): eval_results = eval_results[-1] - _, _, _, callback_metrics, _ = self.process_dict_result(eval_results) + _, _, _, callback_metrics = self.process_dict_result(eval_results) self.logger_connector.callback_metrics = callback_metrics self.on_sanity_check_end() diff --git a/pytorch_lightning/trainer/training_loop.py b/pytorch_lightning/trainer/training_loop.py index 1c440693b3158..1e655fe6e8fad 100644 --- a/pytorch_lightning/trainer/training_loop.py +++ b/pytorch_lightning/trainer/training_loop.py @@ -312,7 +312,6 @@ def training_step(self, split_batch, batch_idx, opt_idx, hiddens): loss=untouched_loss, training_step_output=training_step_output, training_step_output_for_epoch_end=training_step_output_for_epoch_end, - hiddens=training_step_output.hiddens, ) return result @@ -343,7 +342,6 @@ def _process_training_step_output(self, training_step_output, split_batch): pbar_on_batch_end=training_step_output[1], log_metrics=training_step_output[2], callback_metrics=training_step_output[3], - hiddens=training_step_output[4], ) # if the user decides to finally reduce things in epoch_end, save raw output without graphs if isinstance(training_step_output_for_epoch_end, torch.Tensor): @@ -357,12 +355,10 @@ def _process_training_step_output_1_0(self, training_step_output, split_batch): result = self.trainer.lightning_module._results loss = None - hiddens = None # handle dict return if isinstance(training_step_output, dict): loss = training_step_output.pop("loss", None) - hiddens = training_step_output.pop("hiddens", None) result["extra"] = training_step_output # handle scalar return @@ -372,7 +368,6 @@ def _process_training_step_output_1_0(self, training_step_output, split_batch): # map to results under the hood result.minimize = loss - result.hiddens = hiddens # track batch for manual reduction with result result.track_batch_size(len(split_batch)) @@ -438,12 +433,6 @@ def _track_gradient_norm(self): grad_norm_dict = model.grad_norm(self.trainer.track_grad_norm) return grad_norm_dict - def process_hiddens(self, opt_closure_result): - hiddens = opt_closure_result.hiddens - if isinstance(opt_closure_result.training_step_output, Result): - opt_closure_result.training_step_output_for_epoch_end.drop_hiddens() - return hiddens - def tbptt_split_batch(self, batch): splits = [batch] if self.trainer.truncated_bptt_steps is not None: @@ -687,9 +676,6 @@ def _process_closure_result(self, batch_outputs: list, opt_idx: int) -> list: # cache metrics self.trainer.logger_connector.cache_training_step_metrics(opt_closure_result) - # track hiddens - self.trainer.hiddens = self.process_hiddens(opt_closure_result) - # check if loss or model weights are nan if self.trainer.terminate_on_nan: self.trainer.detect_nan_tensors(opt_closure_result.loss) From 9f68a80a18f2b35118cbc12462a23829b8490acf Mon Sep 17 00:00:00 2001 From: Carlos Mocholi Date: Fri, 26 Mar 2021 14:31:13 +0100 Subject: [PATCH 13/17] Bad merge --- .../trainer/connectors/logger_connector/logger_connector.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py b/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py index 6a9d75638bbe0..e9f8f3b7efa0a 100644 --- a/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py +++ b/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py @@ -342,7 +342,7 @@ def __process_eval_epoch_end_results_and_log_legacy(self, eval_results): eval_results = [eval_results] for result_idx, result in enumerate(eval_results): - _, prog_bar_metrics, log_metrics, _ = self.trainer.process_dict_result(result) + _, prog_bar_metrics, log_metrics = self.trainer.process_dict_result(result) # eval loop returns all metrics dataloader_result_metrics = {**prog_bar_metrics, **log_metrics} From 82df1254fe37e31fa0a3194a712e12eb57001867 Mon Sep 17 00:00:00 2001 From: Carlos Mocholi Date: Fri, 26 Mar 2021 16:11:00 +0100 Subject: [PATCH 14/17] Resolve hiddens --- docs/source/common/trainer.rst | 8 +------- pytorch_lightning/trainer/logging.py | 14 ++++++++++---- pytorch_lightning/trainer/training_loop.py | 5 ++++- tests/trainer/logging_/test_logger_connector.py | 3 ++- .../logging_/test_train_loop_logging_1_0.py | 7 +------ 5 files changed, 18 insertions(+), 19 deletions(-) diff --git a/docs/source/common/trainer.rst b/docs/source/common/trainer.rst index d86a8dc1ff472..96e19a7be4694 100644 --- a/docs/source/common/trainer.rst +++ b/docs/source/common/trainer.rst @@ -1478,15 +1478,9 @@ with the hidden def training_step(self, batch, batch_idx, hiddens): # hiddens are the hiddens from the previous truncated backprop step out, hiddens = self.lstm(data, hiddens) - - # remember to detach() hiddens. - # If you don't, you will get a RuntimeError: Trying to backward through - # the graph a second time... - # Using hiddens.detach() allows each split to be disconnected. - return { "loss": ..., - "hiddens": hiddens # remember to detach() this + "hiddens": hiddens } To modify how the batch is split, diff --git a/pytorch_lightning/trainer/logging.py b/pytorch_lightning/trainer/logging.py index da608f032138e..8aaac0a659152 100644 --- a/pytorch_lightning/trainer/logging.py +++ b/pytorch_lightning/trainer/logging.py @@ -14,6 +14,7 @@ import inspect from abc import ABC +from collections import Mapping import torch @@ -75,9 +76,7 @@ def process_dict_result(self, output, train=False): # -------------------------- # single scalar returned from a xx_step if isinstance(output, torch.Tensor): - progress_bar_metrics = {} - log_metrics = {} - return output, progress_bar_metrics, log_metrics + return output, {}, {}, None # --------------- # EXTRACT PROGRESS BAR KEYS @@ -134,12 +133,19 @@ def process_dict_result(self, output, train=False): if self._distrib_type in (DistributedType.DP, DistributedType.DDP2): loss = self.reduce_distributed_output(loss, self.num_gpus) + # --------------- + # EXTRACT HIDDEN + # --------------- + hiddens = output.get('hiddens', None) if isinstance(output, Mapping) else None + if hiddens is not None: + hiddens = hiddens.detach() + # detach all metrics for callbacks to prevent memory leaks # no .item() because it will slow things down progress_bar_metrics = recursive_detach(progress_bar_metrics) log_metrics = recursive_detach(log_metrics) - return loss, progress_bar_metrics, log_metrics + return loss, progress_bar_metrics, log_metrics, hiddens def reduce_distributed_output(self, output, num_gpus): if num_gpus <= 1: diff --git a/pytorch_lightning/trainer/training_loop.py b/pytorch_lightning/trainer/training_loop.py index 15958e6bdab1c..696f14742935c 100644 --- a/pytorch_lightning/trainer/training_loop.py +++ b/pytorch_lightning/trainer/training_loop.py @@ -354,19 +354,22 @@ def _process_training_step_output_1_0(self, training_step_output, split_batch): result = self.trainer.lightning_module._results loss = None + hiddens = None + result["extra"] = {} # handle dict return if isinstance(training_step_output, dict): loss = training_step_output.pop("loss", None) + hiddens = training_step_output.pop("hiddens", None) result["extra"] = training_step_output # handle scalar return elif isinstance(training_step_output, torch.Tensor): loss = training_step_output - result["extra"] = {} # map to results under the hood result.minimize = loss + self.trainer.hiddens = hiddens # track batch for manual reduction with result result.track_batch_size(len(split_batch)) diff --git a/tests/trainer/logging_/test_logger_connector.py b/tests/trainer/logging_/test_logger_connector.py index f22fb30e94c76..25103559cd070 100644 --- a/tests/trainer/logging_/test_logger_connector.py +++ b/tests/trainer/logging_/test_logger_connector.py @@ -111,7 +111,7 @@ def training_step_end(self, *_): assert generated == excepted -def test__logger_connector__epoch_result_store__train__ttbt(tmpdir): +def test__logger_connector__epoch_result_store__train__tbptt(tmpdir): """ Tests that LoggerConnector will properly capture logged information with ttbt and reduce them @@ -142,6 +142,7 @@ def __init__(self): @decorator_with_arguments(fx_name="training_step") def training_step(self, batch, batch_idx, hiddens): + assert hiddens == self.test_hidden, "Hidden state not persistent between tbptt steps" self.test_hidden = torch.rand(1) x_tensor, y_list = batch diff --git a/tests/trainer/logging_/test_train_loop_logging_1_0.py b/tests/trainer/logging_/test_train_loop_logging_1_0.py index f8672eb4ec51e..092751ec68e33 100644 --- a/tests/trainer/logging_/test_train_loop_logging_1_0.py +++ b/tests/trainer/logging_/test_train_loop_logging_1_0.py @@ -318,12 +318,7 @@ def __init__(self): self.layer = torch.nn.Linear(2, 2) def training_step(self, batch, batch_idx, hiddens): - try: - assert hiddens == self.test_hidden, "Hidden state not persistent between tbptt steps" - # todo: specify the possible exception - except Exception as ex: - print(ex) - + assert hiddens == self.test_hidden, "Hidden state not persistent between tbptt steps" self.test_hidden = torch.rand(1) x_tensor, y_list = batch From 84d203fe21b1ea6633c619bbbc811ca149fabc76 Mon Sep 17 00:00:00 2001 From: Carlos Mocholi Date: Fri, 26 Mar 2021 16:24:33 +0100 Subject: [PATCH 15/17] Delete dead code --- pytorch_lightning/callbacks/early_stopping.py | 1 - pytorch_lightning/callbacks/model_checkpoint.py | 1 - pytorch_lightning/core/step_result.py | 3 --- 3 files changed, 5 deletions(-) diff --git a/pytorch_lightning/callbacks/early_stopping.py b/pytorch_lightning/callbacks/early_stopping.py index 4448de8e4834b..24ebcdf807357 100644 --- a/pytorch_lightning/callbacks/early_stopping.py +++ b/pytorch_lightning/callbacks/early_stopping.py @@ -90,7 +90,6 @@ def __init__( self.wait_count = 0 self.stopped_epoch = 0 self.mode = mode - self.warned_result_obj = False if self.mode not in self.mode_dict: raise MisconfigurationException(f"`mode` can be {', '.join(self.mode_dict.keys())}, got {self.mode}") diff --git a/pytorch_lightning/callbacks/model_checkpoint.py b/pytorch_lightning/callbacks/model_checkpoint.py index d9dea5979ae58..5f0318e7ac8d1 100644 --- a/pytorch_lightning/callbacks/model_checkpoint.py +++ b/pytorch_lightning/callbacks/model_checkpoint.py @@ -202,7 +202,6 @@ def __init__( self.best_model_path = "" self.last_model_path = "" self.save_function = None - self.warned_result_obj = False self.__init_monitor_mode(monitor, mode) self.__init_ckpt_dir(dirpath, filename, save_top_k) diff --git a/pytorch_lightning/core/step_result.py b/pytorch_lightning/core/step_result.py index aff79048034a0..53167a284bb92 100644 --- a/pytorch_lightning/core/step_result.py +++ b/pytorch_lightning/core/step_result.py @@ -30,9 +30,6 @@ class Result(Dict): def __init__(self, minimize: Optional[Tensor] = None): super().__init__() - # temporary until dict results are deprecated - os.environ['PL_USING_RESULT_OBJ'] = '1' - if minimize is not None: err = 'Minimize can only be used in training_step, training_step_end, training_epoch_end' self._assert_grad_tensor_metric('minimize', minimize, err) From 355149f917d5bf2ceb31b9bad6d558820c69e3f5 Mon Sep 17 00:00:00 2001 From: Carlos Mocholi Date: Fri, 26 Mar 2021 16:28:38 +0100 Subject: [PATCH 16/17] Undo change --- .../trainer/connectors/logger_connector/logger_connector.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py b/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py index e9f8f3b7efa0a..6a9d75638bbe0 100644 --- a/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py +++ b/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py @@ -342,7 +342,7 @@ def __process_eval_epoch_end_results_and_log_legacy(self, eval_results): eval_results = [eval_results] for result_idx, result in enumerate(eval_results): - _, prog_bar_metrics, log_metrics = self.trainer.process_dict_result(result) + _, prog_bar_metrics, log_metrics, _ = self.trainer.process_dict_result(result) # eval loop returns all metrics dataloader_result_metrics = {**prog_bar_metrics, **log_metrics} From 21db35bd7fbb31062a3aa0a9c1d4bd8af2ccc582 Mon Sep 17 00:00:00 2001 From: Carlos Mocholi Date: Fri, 26 Mar 2021 16:30:00 +0100 Subject: [PATCH 17/17] flake8 --- pytorch_lightning/core/step_result.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pytorch_lightning/core/step_result.py b/pytorch_lightning/core/step_result.py index 53167a284bb92..d61c9fb5d3d1e 100644 --- a/pytorch_lightning/core/step_result.py +++ b/pytorch_lightning/core/step_result.py @@ -14,7 +14,6 @@ """Result class for easier logging and epoch-wise reduction.""" import numbers -import os from copy import copy from typing import Any, Callable, Dict, Iterable, List, MutableMapping, Optional, Sequence, Tuple, Union