From 9c701ed043008424b5b2b2aff1c15fca718c386c Mon Sep 17 00:00:00 2001 From: Carlos Mocholi Date: Tue, 30 Mar 2021 00:15:46 +0200 Subject: [PATCH 01/11] Remove legacy code --- .../logger_connector/logger_connector.py | 128 +----------------- pytorch_lightning/trainer/evaluation_loop.py | 3 +- 2 files changed, 6 insertions(+), 125 deletions(-) diff --git a/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py b/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py index 6a9d75638bbe0..6752411fcbfcd 100644 --- a/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py +++ b/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py @@ -244,10 +244,6 @@ def add_progress_bar_metrics(self, metrics): self.trainer.dev_debugger.track_pbar_metrics_history(metrics) - def track_metrics_deprecated(self, deprecated_eval_results): - self._track_callback_metrics(deprecated_eval_results) - self.__process_eval_epoch_end_results_and_log_legacy(deprecated_eval_results) - def evaluation_epoch_end(self): # reset dataloader idx model_ref = self.trainer.lightning_module @@ -331,32 +327,6 @@ def _track_callback_metrics(self, eval_results): if self.trainer.state in (TrainerState.TESTING, TrainerState.VALIDATING): self.trainer.logger_connector.evaluation_callback_metrics.update(flat) - def __process_eval_epoch_end_results_and_log_legacy(self, eval_results): - if self.trainer.sanity_checking: - return - - if eval_results is not None and len(eval_results) > 0: - - # in eval, the user may return something at every validation step without final reduction - if not isinstance(eval_results, list): - eval_results = [eval_results] - - for result_idx, result in enumerate(eval_results): - _, prog_bar_metrics, log_metrics, _ = self.trainer.process_dict_result(result) - - # eval loop returns all metrics - dataloader_result_metrics = {**prog_bar_metrics, **log_metrics} - - # add metrics to prog bar - self.trainer.logger_connector.add_progress_bar_metrics(prog_bar_metrics) - - # log metrics - if len(log_metrics) > 0: - self.trainer.logger_connector.log_metrics(log_metrics, {}) - - if len(dataloader_result_metrics) > 0: - self.eval_loop_results.append(dataloader_result_metrics) - def on_train_epoch_end(self): # inform cached logger connector epoch finished self.cached_results.has_batch_loop_finished = True @@ -368,36 +338,11 @@ def log_train_epoch_end_metrics(self, epoch_output, num_optimizers): model = self.trainer.lightning_module - # ------------------------ - # determine if using a result obj - # ------------------------ - # [optimizer_idx][training_step_idx][tbptt_index] - opt_idx_outputs = epoch_output[0] - - # TODO: deprecate 1.0 - try: - sample_obj = opt_idx_outputs[0][0] if isinstance(opt_idx_outputs[0], list) else opt_idx_outputs[0] - is_result_obj = len(epoch_output) > 0 and isinstance(sample_obj, Result) - is_1_0_result = is_result_obj and 'extra' in sample_obj - except IndexError: - is_result_obj = False - is_1_0_result = False - - # ------------------ - # NEW 1.0.0 PATH - # ------------------ - if is_1_0_result: - # lightning module hook - self.training_epoch_end(model, epoch_output, num_optimizers) - - # log/aggregate metrics automatically - epoch_log_metrics, epoch_progress_bar_metrics = self.__auto_reduce_results_on_epoch_end(epoch_output) - - # TODO: deprecate 1.0 - else: - epoch_log_metrics, epoch_progress_bar_metrics = self.__run_legacy_training_epoch_end( - num_optimizers, epoch_output, model, is_result_obj - ) + # lightning module hook + self.training_epoch_end(model, epoch_output, num_optimizers) + + # log/aggregate metrics automatically + epoch_log_metrics, epoch_progress_bar_metrics = self.__auto_reduce_results_on_epoch_end(epoch_output) # it will perform reduction over epoch and return log metrics cached_epoch_log_metrics = self.cached_results.get_epoch_log_metrics() @@ -446,46 +391,6 @@ def training_epoch_end(self, model, epoch_output, num_optimizers): # capture logging self.trainer.logger_connector.cache_logged_metrics() - def __run_legacy_training_epoch_end(self, num_optimizers, epoch_output, model, is_result_obj): - - epoch_log_metrics = {} - epoch_progress_bar_metrics = {} - - # -------------------------- - # EPOCH END STEP IF DEFINED - # -------------------------- - if is_overridden('training_epoch_end', model=model): - if is_result_obj: - # with result object gather across time and training steps so each opt idx has a single result obj - epoch_output = self.__gather_result_across_time_and_optimizers(epoch_output) - - if num_optimizers == 1: - epoch_output = epoch_output[0] - - # run training_epoch_end - # a list with a result per optimizer index - model._current_fx_name = 'training_epoch_end' - epoch_output = model.training_epoch_end(epoch_output) - - # capture logging - self.trainer.logger_connector.cache_logged_metrics() - - if isinstance(epoch_output, Result): - epoch_log_metrics = epoch_output.epoch_log_metrics - epoch_progress_bar_metrics = epoch_output.epoch_pbar_metrics - else: - _processed_outputs = self.trainer.process_dict_result(epoch_output) - epoch_progress_bar_metrics = _processed_outputs[1] - epoch_log_metrics = _processed_outputs[2] - - # -------------------------- - # Structured Result (auto epoch end) - # -------------------------- - elif is_result_obj: - epoch_log_metrics, epoch_progress_bar_metrics = self.__auto_reduce_results_on_epoch_end(epoch_output) - - return epoch_log_metrics, epoch_progress_bar_metrics - def __auto_reduce_results_on_epoch_end(self, epoch_output): epoch_log_metrics = {} epoch_progress_bar_metrics = {} @@ -538,29 +443,6 @@ def __prepare_epoch_end_inputs(self, epoch_output): return gathered_epoch_outputs - def __gather_result_across_time_and_optimizers(self, epoch_output): - """ - Gather results into a single padded tensor per metric where each tensor is gathered across - time and across time steps. - - Returns: - a list where each element is a Result with the tensors gathered - """ - gathered_epoch_outputs = [] - for opt_outputs in epoch_output: - # gather across time first - time_gathered_outputs = [] - for tbptt_outs in opt_outputs: - tbptt_outs = tbptt_outs[0].__class__.gather(tbptt_outs) - time_gathered_outputs.append(tbptt_outs) - - # gather across training steps - # each metric has dimensions (training_steps, seq_len) (seq_len=1 when no tbptt is used) - gathered_opt_output = time_gathered_outputs[0].__class__.padded_gather(time_gathered_outputs) - gathered_epoch_outputs.append(gathered_opt_output) - - return gathered_epoch_outputs - def log_train_step_metrics(self, batch_output): if self.trainer.train_loop.should_accumulate() and self.trainer.train_loop.automatic_optimization: return diff --git a/pytorch_lightning/trainer/evaluation_loop.py b/pytorch_lightning/trainer/evaluation_loop.py index a1b66cb561889..8b7543e6bf50f 100644 --- a/pytorch_lightning/trainer/evaluation_loop.py +++ b/pytorch_lightning/trainer/evaluation_loop.py @@ -242,8 +242,7 @@ def __run_eval_epoch_end(self, num_dataloaders): if not isinstance(eval_results, list): eval_results = [eval_results] - # track depreceated metrics - self.trainer.logger_connector.track_metrics_deprecated(eval_results) + self.trainer.logger_connector._track_callback_metrics(eval_results) return eval_results From 493ae1823f3d97c905dd952b2deac80cbde73258 Mon Sep 17 00:00:00 2001 From: Carlos Mocholi Date: Tue, 30 Mar 2021 00:37:29 +0200 Subject: [PATCH 02/11] Remove legacy tests --- tests/helpers/deterministic_model.py | 35 ------ .../test_trainer_steps_dict_return.py | 109 ------------------ 2 files changed, 144 deletions(-) diff --git a/tests/helpers/deterministic_model.py b/tests/helpers/deterministic_model.py index f1bfcd1561e4a..20830a1bc6fc2 100644 --- a/tests/helpers/deterministic_model.py +++ b/tests/helpers/deterministic_model.py @@ -116,19 +116,6 @@ def training_step__dict_return(self, batch, batch_idx): self.training_step_called = True return {'loss': acc, 'log': logs, 'progress_bar': pbar, 'train_step_test': torch.tensor(549).type_as(acc)} - def training_step__for_step_end_dict(self, batch, batch_idx): - """sends outputs to training_batch_end""" - acc = self.step(batch, batch_idx) - - logs = {'log_acc1': torch.tensor(12).type_as(acc), 'log_acc2': torch.tensor(7).type_as(acc)} - pbar = {'pbar_acc1': torch.tensor(17).type_as(acc), 'pbar_acc2': torch.tensor(19).type_as(acc)} - - self.training_step_called = True - result = {'loss': acc} - result.update(logs) - result.update(pbar) - return result - def training_step_end__dict(self, output): self.training_step_end_called = True @@ -151,28 +138,6 @@ def training_step_end__dict(self, output): acc = output['loss'] return {'loss': acc, 'log': logs, 'progress_bar': pbar, 'train_step_end': acc} - def training_epoch_end__dict(self, outputs): - self.training_epoch_end_called = True - - if self._distrib_type in (DistributedType.DP, DistributedType.DDP2): - pass - else: - # only saw 4 batches - assert len(outputs) == 4 - for batch_out in outputs: - assert len(batch_out.keys()) == 4 - assert self.count_num_graphs(batch_out) == 0 - last_key = 'train_step_end' if self.training_step_end_called else 'train_step_test' - keys = ['loss', 'log', 'progress_bar', last_key] - for key in keys: - assert key in batch_out - - prototype_loss = outputs[0]['loss'] - logs = {'epoch_end_log_1': torch.tensor(178).type_as(prototype_loss)} - pbar = {'epoch_end_pbar_1': torch.tensor(234).type_as(prototype_loss)} - - return {'log': logs, 'progress_bar': pbar} - def validation_step__no_return(self, batch, batch_idx): self.validation_step_called = True self.step(batch, batch_idx) diff --git a/tests/trainer/legacy_deprecate_flow_log/test_trainer_steps_dict_return.py b/tests/trainer/legacy_deprecate_flow_log/test_trainer_steps_dict_return.py index 3f60e6060d2ae..817d797e0dd52 100644 --- a/tests/trainer/legacy_deprecate_flow_log/test_trainer_steps_dict_return.py +++ b/tests/trainer/legacy_deprecate_flow_log/test_trainer_steps_dict_return.py @@ -104,112 +104,3 @@ def training_step_with_step_end(tmpdir): assert 'train_step_end' in train_step_end_out assert pbar_metrics['pbar_acc1'] == 19.0 assert pbar_metrics['pbar_acc2'] == 21.0 - - -def test_full_training_loop_dict(tmpdir): - """ - Checks train_step + training_step_end + training_epoch_end - """ - model = DeterministicModel() - model.training_step = model.training_step__for_step_end_dict - model.training_step_end = model.training_step_end__dict - model.training_epoch_end = model.training_epoch_end__dict - model.val_dataloader = None - - trainer = Trainer( - default_root_dir=tmpdir, - max_epochs=1, - weights_summary=None, - ) - trainer.fit(model) - - # make sure correct steps were called - assert model.training_step_called - assert model.training_step_end_called - assert model.training_epoch_end_called - - # assert epoch end metrics were added - assert trainer.logger_connector.callback_metrics['epoch_end_log_1'] == 178 - assert trainer.logger_connector.progress_bar_metrics['epoch_end_pbar_1'] == 234 - - # make sure training outputs what is expected - batch_idx, batch = 0, next(iter(model.train_dataloader())) - - out = trainer.train_loop.run_training_batch(batch, batch_idx, 0) - assert out.signal == 0 - assert trainer.logger_connector.logged_metrics['log_acc1'] == 14.0 - assert trainer.logger_connector.logged_metrics['log_acc2'] == 9.0 - - # get the output of the first optimizer - train_step_end_out = out.training_step_output_for_epoch_end - assert len(train_step_end_out) == 1 - train_step_end_out = train_step_end_out[0][0] - pbar_metrics = train_step_end_out['progress_bar'] - assert pbar_metrics['pbar_acc1'] == 19.0 - assert pbar_metrics['pbar_acc2'] == 21.0 - - -@mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"}) -def test_result_obj_lr_scheduler_epoch(tmpdir): - """ - test that the LR scheduler was called at the correct time with the correct metrics - """ - model = DeterministicModel() - model.training_step = model.training_step__for_step_end_dict - model.training_step_end = model.training_step_end__dict - model.training_epoch_end = model.training_epoch_end__dict - model.val_dataloader = None - model.configure_optimizers = model.configure_optimizers__lr_on_plateau_epoch - - trainer = Trainer( - default_root_dir=tmpdir, - max_epochs=3, - weights_summary=None, - ) - trainer.fit(model) - - assert len(trainer.dev_debugger.saved_lr_scheduler_updates) == 3 - - -def test_train_step_epoch_end(tmpdir): - """ - Checks train_step + training_epoch_end (NO training_step_end) - """ - model = DeterministicModel() - model.training_step = model.training_step__dict_return - model.training_step_end = None - model.training_epoch_end = model.training_epoch_end__dict - model.val_dataloader = None - - trainer = Trainer( - default_root_dir=tmpdir, - max_epochs=1, - weights_summary=None, - ) - trainer.fit(model) - - # make sure correct steps were called - assert model.training_step_called - assert not model.training_step_end_called - assert model.training_epoch_end_called - - # assert epoch end metrics were added - assert trainer.logger_connector.callback_metrics['epoch_end_log_1'] == 178 - assert trainer.logger_connector.progress_bar_metrics['epoch_end_pbar_1'] == 234 - - # make sure training outputs what is expected - batch_idx, batch = 0, next(iter(model.train_dataloader())) - - out = trainer.train_loop.run_training_batch(batch, batch_idx, 0) - assert out.signal == 0 - assert trainer.logger_connector.logged_metrics['log_acc1'] == 12.0 - assert trainer.logger_connector.logged_metrics['log_acc2'] == 7.0 - - # outputs are for 1 optimizer and no tbptt - train_step_end_out = out.training_step_output_for_epoch_end - assert len(train_step_end_out) == 1 - train_step_end_out = train_step_end_out[0][0] - - pbar_metrics = train_step_end_out['progress_bar'] - assert pbar_metrics['pbar_acc1'] == 17.0 - assert pbar_metrics['pbar_acc2'] == 19.0 From f03ec1230310eaf16e5aa62cbcad5e3795d59b39 Mon Sep 17 00:00:00 2001 From: Carlos Mocholi Date: Tue, 30 Mar 2021 00:39:06 +0200 Subject: [PATCH 03/11] flake8 --- .../legacy_deprecate_flow_log/test_trainer_steps_dict_return.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/trainer/legacy_deprecate_flow_log/test_trainer_steps_dict_return.py b/tests/trainer/legacy_deprecate_flow_log/test_trainer_steps_dict_return.py index 817d797e0dd52..b88788f3cc28b 100644 --- a/tests/trainer/legacy_deprecate_flow_log/test_trainer_steps_dict_return.py +++ b/tests/trainer/legacy_deprecate_flow_log/test_trainer_steps_dict_return.py @@ -14,8 +14,6 @@ """ Tests to ensure that the training loop works with a dict """ -import os -from unittest import mock from pytorch_lightning import Trainer from tests.helpers.deterministic_model import DeterministicModel From 1636e0eaa713d5a5f4a69ef7a06b1ed944c77f12 Mon Sep 17 00:00:00 2001 From: Carlos Mocholi Date: Tue, 30 Mar 2021 01:12:25 +0200 Subject: [PATCH 04/11] Remove legacy code --- pytorch_lightning/core/step_result.py | 4 - .../logger_connector/epoch_result_store.py | 4 - .../logger_connector/logger_connector.py | 3 - pytorch_lightning/trainer/logging.py | 130 --------------- pytorch_lightning/trainer/training_loop.py | 57 +------ tests/helpers/deterministic_model.py | 63 ------- .../test_eval_loop_dict_return.py | 157 ------------------ .../test_trainer_steps_dict_return.py | 104 ------------ tests/trainer/test_trainer.py | 17 -- 9 files changed, 7 insertions(+), 532 deletions(-) delete mode 100644 tests/trainer/legacy_deprecate_flow_log/test_eval_loop_dict_return.py delete mode 100644 tests/trainer/legacy_deprecate_flow_log/test_trainer_steps_dict_return.py diff --git a/pytorch_lightning/core/step_result.py b/pytorch_lightning/core/step_result.py index 1db9bd4927cea..eb0f26cec2bbc 100644 --- a/pytorch_lightning/core/step_result.py +++ b/pytorch_lightning/core/step_result.py @@ -526,10 +526,6 @@ def reduce_across_time(cls, time_outputs): # auto-reduce across time for tbptt meta = time_outputs[0]['meta'] - # in 1.0 the results have 'extra'. Once we deprecate 0.10.0 we may not need this - if 'extra' in time_outputs[0]: - [x.pop('extra', None) for x in time_outputs] - result = cls() result = recursive_gather(time_outputs, result) recursive_stack(result) diff --git a/pytorch_lightning/trainer/connectors/logger_connector/epoch_result_store.py b/pytorch_lightning/trainer/connectors/logger_connector/epoch_result_store.py index e2ce66c86ecff..04e3b1b5d1b00 100644 --- a/pytorch_lightning/trainer/connectors/logger_connector/epoch_result_store.py +++ b/pytorch_lightning/trainer/connectors/logger_connector/epoch_result_store.py @@ -394,12 +394,10 @@ def run_batch_from_func_name(self, func_name) -> Dict: def get_latest_batch_log_metrics(self) -> Dict: batch_log_metrics = self.run_batch_from_func_name("get_batch_log_metrics") - batch_log_metrics.update(self.legacy_batch_log_metrics) return batch_log_metrics def get_latest_batch_pbar_metrics(self) -> Dict: batch_pbar_metrics = self.run_batch_from_func_name("get_batch_pbar_metrics") - batch_pbar_metrics.update(self.legacy_batch_pbar_metrics) return batch_pbar_metrics @property @@ -451,8 +449,6 @@ def reset(self): self._opt_idx: Optional[int] = None self._batch_size: Optional[int] = None self._has_batch_loop_finished = False - self.legacy_batch_log_metrics = {} - self.legacy_batch_pbar_metrics = {} def __call__( self, diff --git a/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py b/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py index 6752411fcbfcd..8fd716ee64ad7 100644 --- a/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py +++ b/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py @@ -191,10 +191,7 @@ def cache_training_step_metrics(self, opt_closure_result): self.add_progress_bar_metrics(pbar_metrics_tmp) self._callback_metrics.update(callback_metrics_tmp) - - # save legacy log metrics self._logged_metrics.update(logged_metrics_tmp) - self.cached_results.legacy_batch_log_metrics.update(logged_metrics_tmp) def log_metrics(self, metrics, grad_norm_dic, step=None): """Logs the metric dict passed in. diff --git a/pytorch_lightning/trainer/logging.py b/pytorch_lightning/trainer/logging.py index 8aaac0a659152..d53344a2f9eaf 100644 --- a/pytorch_lightning/trainer/logging.py +++ b/pytorch_lightning/trainer/logging.py @@ -26,11 +26,6 @@ class TrainerLoggingMixin(ABC): - # this is just a summary on variables used in this abstract class, - # the proper values/initialisation should be done in child class - _distrib_type: DistributedType - num_gpus: int - def metrics_to_scalars(self, metrics): new_metrics = {} # TODO: this is duplicated in MetricsHolder. should be unified @@ -49,128 +44,3 @@ def metrics_to_scalars(self, metrics): new_metrics[k] = v return new_metrics - - def process_dict_result(self, output, train=False): - """Reduces output according to the training mode. - - Separates loss from logging and progress bar metrics - """ - # -------------------- - # WARN DEPRECATED KEYS - # -------------------- - # TODO: 1.0.0 remove - if isinstance(output, dict): - for k, v in output.items(): - if k in ['log', 'progress_bar']: - m = inspect.cleandoc( - f"The {{{k}:dict keyword}} was deprecated in 0.9.1 and will be removed in 1.0.0\n" - " Please use self.log(...) inside the lightningModule instead.\n" - " # log on a step or aggregate epoch metric to the logger and/or progress bar" - " (inside LightningModule)\n" - " self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True)" - ) - rank_zero_warn(m) - - # -------------------------- - # handle single scalar only - # -------------------------- - # single scalar returned from a xx_step - if isinstance(output, torch.Tensor): - return output, {}, {}, None - - # --------------- - # EXTRACT PROGRESS BAR KEYS - # --------------- - try: - progress_output = output['progress_bar'] - - # reduce progress metrics for progress bar when using dp - if train and self._distrib_type in (DistributedType.DP, DistributedType.DDP2): - num_gpus = self.num_gpus - progress_output = self.reduce_distributed_output(progress_output, num_gpus) - - progress_bar_metrics = progress_output - # todo: specify the possible exception - except Exception: - progress_bar_metrics = {} - - # --------------- - # EXTRACT LOGGING KEYS - # --------------- - # extract metrics to log to experiment - try: - log_output = output['log'] - - # reduce progress metrics for progress bar when using dp - if train and self._distrib_type in (DistributedType.DP, DistributedType.DDP2): - num_gpus = self.num_gpus - log_output = self.reduce_distributed_output(log_output, num_gpus) - - log_metrics = log_output - # todo: specify the possible exception - except Exception: - log_metrics = {} - - # --------------- - # EXTRACT LOSS - # --------------- - # if output dict doesn't have the keyword loss - # then assume the output=loss if scalar - loss = None - if train: - try: - loss = output['loss'] - # todo: specify the possible exception - except Exception as exp: - if isinstance(output, torch.Tensor): - loss = output - else: - raise RuntimeError( - 'No `loss` value in the dictionary returned from `model.training_step()`.' - ) from exp - - # when using dp need to reduce the loss - if self._distrib_type in (DistributedType.DP, DistributedType.DDP2): - loss = self.reduce_distributed_output(loss, self.num_gpus) - - # --------------- - # EXTRACT HIDDEN - # --------------- - hiddens = output.get('hiddens', None) if isinstance(output, Mapping) else None - if hiddens is not None: - hiddens = hiddens.detach() - - # detach all metrics for callbacks to prevent memory leaks - # no .item() because it will slow things down - progress_bar_metrics = recursive_detach(progress_bar_metrics) - log_metrics = recursive_detach(log_metrics) - - return loss, progress_bar_metrics, log_metrics, hiddens - - def reduce_distributed_output(self, output, num_gpus): - if num_gpus <= 1: - return output - - # when using DP, we get one output per gpu - # average outputs and return - if isinstance(output, torch.Tensor): - return output.mean() - - for k, v in output.items(): - # recurse on nested dics - if isinstance(output[k], dict): - output[k] = self.reduce_distributed_output(output[k], num_gpus) - - # compute the average of scalars - elif isinstance(output[k], list): - output[k] = sum(output[k]) / len(output[k]) - - # do nothing when there's a scalar - elif isinstance(output[k], torch.Tensor) and output[k].dim() == 0: - pass - - # do not reduce metrics that have batch size > num gpus - elif output[k].size(0) <= num_gpus: - output[k] = torch.mean(output[k]) - - return output diff --git a/pytorch_lightning/trainer/training_loop.py b/pytorch_lightning/trainer/training_loop.py index 696f14742935c..ae88f2d983a2b 100644 --- a/pytorch_lightning/trainer/training_loop.py +++ b/pytorch_lightning/trainer/training_loop.py @@ -242,12 +242,7 @@ def get_optimizers_iterable(self): return [[opt_idx, self.trainer.optimizers[opt_idx]]] def on_after_backward(self, training_step_output, batch_idx, untouched_loss): - is_result_obj = isinstance(training_step_output, Result) - - if is_result_obj: - training_step_output = training_step_output.detach() - else: - training_step_output.batch_loss = training_step_output.batch_loss.detach() + training_step_output.detach() # insert after step hook self.trainer.call_hook("on_after_backward") @@ -284,24 +279,16 @@ def training_step(self, split_batch, batch_idx, opt_idx, hiddens): training_step_output_for_epoch_end, training_step_output = self._process_training_step_output( training_step_output, split_batch ) - is_result_obj = isinstance(training_step_output, Result) - if training_step_output_for_epoch_end is None: - return None + return # enable empty loss when using manual opt closure_loss = None untouched_loss = None if self.automatic_optimization: - # accumulate loss - # (if accumulate_grad_batches = 1 no effect) - if is_result_obj: - closure_loss = training_step_output.minimize - else: - closure_loss = training_step_output.batch_loss - - closure_loss = closure_loss / self.trainer.accumulate_grad_batches + # accumulate loss. if accumulate_grad_batches==1, no effect + closure_loss = training_step_output.minimize / self.trainer.accumulate_grad_batches # the loss will get scaled for amp. avoid any modifications to it untouched_loss = closure_loss.detach().clone() @@ -322,35 +309,6 @@ def _process_training_step_output(self, training_step_output, split_batch): if training_step_output_for_epoch_end is None: return None, None - # ----------------------------------------- - # process hybrid (1.0) - # ----------------------------------------- - # no need for these checks in 1.0.0 - # TODO: remove checks in 1.0.0 - is_tensor = isinstance(training_step_output_for_epoch_end, torch.Tensor) - is_1_0_output = is_tensor or ("log" not in training_step_output and "progress_bar" not in training_step_output) - if is_1_0_output: - return self._process_training_step_output_1_0(training_step_output, split_batch) - - # ----------------------------------------- - # process old dict (deprecate 1.0) - # ----------------------------------------- - training_step_output = self.trainer.process_dict_result(training_step_output, train=True) - - training_step_output = AttributeDict( - batch_loss=training_step_output[0], - pbar_on_batch_end=training_step_output[1], - log_metrics=training_step_output[2], - ) - # if the user decides to finally reduce things in epoch_end, save raw output without graphs - if isinstance(training_step_output_for_epoch_end, torch.Tensor): - training_step_output_for_epoch_end = training_step_output_for_epoch_end.detach() - else: - training_step_output_for_epoch_end = recursive_detach(training_step_output_for_epoch_end) - - return training_step_output_for_epoch_end, training_step_output - - def _process_training_step_output_1_0(self, training_step_output, split_batch): result = self.trainer.lightning_module._results loss = None @@ -361,6 +319,8 @@ def _process_training_step_output_1_0(self, training_step_output, split_batch): if isinstance(training_step_output, dict): loss = training_step_output.pop("loss", None) hiddens = training_step_output.pop("hiddens", None) + if hiddens is not None: + hiddens = hiddens.detach() result["extra"] = training_step_output # handle scalar return @@ -380,10 +340,7 @@ def _process_training_step_output_1_0(self, training_step_output, split_batch): if self.trainer.move_metrics_to_cpu: training_step_output_for_epoch_end = training_step_output_for_epoch_end.cpu() - # what flows back into the system - training_step_output = result - - return training_step_output_for_epoch_end, training_step_output + return training_step_output_for_epoch_end, result def optimizer_step(self, optimizer, opt_idx, batch_idx, train_step_and_backward_closure): model_ref = self.trainer.lightning_module diff --git a/tests/helpers/deterministic_model.py b/tests/helpers/deterministic_model.py index 20830a1bc6fc2..613c92a6c56e1 100644 --- a/tests/helpers/deterministic_model.py +++ b/tests/helpers/deterministic_model.py @@ -104,69 +104,6 @@ def training_epoch_end__scalar(self, outputs): assert batch_out.grad_fn is None assert isinstance(batch_out, torch.Tensor) - # -------------------------- - # dictionary returns - # -------------------------- - def training_step__dict_return(self, batch, batch_idx): - acc = self.step(batch, batch_idx) - - logs = {'log_acc1': torch.tensor(12).type_as(acc), 'log_acc2': torch.tensor(7).type_as(acc)} - pbar = {'pbar_acc1': torch.tensor(17).type_as(acc), 'pbar_acc2': torch.tensor(19).type_as(acc)} - - self.training_step_called = True - return {'loss': acc, 'log': logs, 'progress_bar': pbar, 'train_step_test': torch.tensor(549).type_as(acc)} - - def training_step_end__dict(self, output): - self.training_step_end_called = True - - # make sure loss has the grad - assert 'loss' in output - assert output['loss'].grad_fn is not None - - # make sure nothing else has grads - assert self.count_num_graphs(output) == 1 - - # make sure the other keys are there - assert 'log_acc1' in output - assert 'log_acc2' in output - assert 'pbar_acc1' in output - assert 'pbar_acc2' in output - - logs = {'log_acc1': output['log_acc1'] + 2, 'log_acc2': output['log_acc2'] + 2} - pbar = {'pbar_acc1': output['pbar_acc1'] + 2, 'pbar_acc2': output['pbar_acc2'] + 2} - - acc = output['loss'] - return {'loss': acc, 'log': logs, 'progress_bar': pbar, 'train_step_end': acc} - - def validation_step__no_return(self, batch, batch_idx): - self.validation_step_called = True - self.step(batch, batch_idx) - - def validation_step__scalar_return(self, batch, batch_idx): - self.validation_step_called = True - acc = self.step(batch, batch_idx) - return acc - - def validation_step__dummy_dict_return(self, batch, batch_idx): - self.validation_step_called = True - acc = self.step(batch, batch_idx) - return {'some': acc, 'value': 'a'} - - def validation_step__dict_return(self, batch, batch_idx): - self.validation_step_called = True - acc = self.step(batch, batch_idx) - - logs = {'log_acc1': torch.tensor(12 + batch_idx).type_as(acc), 'log_acc2': torch.tensor(7).type_as(acc)} - pbar = {'pbar_acc1': torch.tensor(17).type_as(acc), 'pbar_acc2': torch.tensor(19).type_as(acc)} - return {'val_loss': acc, 'log': logs, 'progress_bar': pbar} - - def validation_step_end__no_return(self, val_step_output): - assert len(val_step_output) == 3 - assert val_step_output['val_loss'] == 171 - assert val_step_output['log']['log_acc1'] >= 12 - assert val_step_output['progress_bar']['pbar_acc1'] == 17 - self.validation_step_end_called = True - def validation_step_end(self, val_step_output): assert len(val_step_output) == 3 assert val_step_output['val_loss'] == 171 diff --git a/tests/trainer/legacy_deprecate_flow_log/test_eval_loop_dict_return.py b/tests/trainer/legacy_deprecate_flow_log/test_eval_loop_dict_return.py deleted file mode 100644 index 0894acd5fe72d..0000000000000 --- a/tests/trainer/legacy_deprecate_flow_log/test_eval_loop_dict_return.py +++ /dev/null @@ -1,157 +0,0 @@ -# Copyright The PyTorch Lightning team. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -Tests to ensure that the training loop works with a dict -""" -import pytest - -from pytorch_lightning import Trainer -from pytorch_lightning.core.lightning import LightningModule -from tests.helpers.deterministic_model import DeterministicModel - - -def test_validation_step_no_return(tmpdir): - """ - Test that val step can return nothing - """ - - class TestModel(DeterministicModel): - - def backward(self, loss, optimizer, optimizer_idx): - return LightningModule.backward(self, loss, optimizer, optimizer_idx) - - model = TestModel() - model.training_step = model.training_step__dict_return - model.validation_step = model.validation_step__no_return - model.validation_step_end = None - model.validation_epoch_end = None - - trainer = Trainer( - default_root_dir=tmpdir, - fast_dev_run=True, - weights_summary=None, - ) - trainer.fit(model) - - # out are the results of the full loop - # eval_results are output of _evaluate - with pytest.warns(RuntimeWarning, match="the running stage is set to None"): - out, eval_results = trainer.run_evaluation() - assert len(out) == 1 - assert len(eval_results) == 0 - - # make sure correct steps were called - assert model.validation_step_called - assert not model.validation_step_end_called - assert not model.validation_epoch_end_called - - -def test_validation_step_scalar_return(tmpdir): - """ - Test that val step can return a scalar - """ - model = DeterministicModel() - model.training_step = model.training_step__dict_return - model.validation_step = model.validation_step__scalar_return - model.validation_step_end = None - model.validation_epoch_end = None - - trainer = Trainer( - default_root_dir=tmpdir, - weights_summary=None, - limit_train_batches=2, - limit_val_batches=2, - max_epochs=2, - ) - trainer.fit(model) - - # out are the results of the full loop - # eval_results are output of _evaluate - out, eval_results = trainer.run_evaluation() - assert len(out) == 1 - assert len(eval_results) == 2 - assert eval_results[0] == 171 and eval_results[1] == 171 - - # make sure correct steps were called - assert model.validation_step_called - assert not model.validation_step_end_called - assert not model.validation_epoch_end_called - - -def test_validation_step_arbitrary_dict_return(tmpdir): - """ - Test that val step can return an arbitrary dict - """ - model = DeterministicModel() - model.training_step = model.training_step__dict_return - model.validation_step = model.validation_step__dummy_dict_return - model.validation_step_end = None - model.validation_epoch_end = None - - trainer = Trainer( - default_root_dir=tmpdir, - weights_summary=None, - limit_train_batches=2, - limit_val_batches=2, - max_epochs=2, - ) - trainer.fit(model) - - # out are the results of the full loop - # eval_results are output of _evaluate - callback_metrics, eval_results = trainer.run_evaluation() - assert len(callback_metrics) == 1 - assert len(eval_results) == 2 - assert eval_results[0]['some'] == 171 - assert eval_results[1]['some'] == 171 - - assert eval_results[0]['value'] == 'a' - assert eval_results[1]['value'] == 'a' - - # make sure correct steps were called - assert model.validation_step_called - assert not model.validation_step_end_called - assert not model.validation_epoch_end_called - - -def test_val_step_step_end_no_return(tmpdir): - """ - Test that val step + val step end work (with no return in val step end) - """ - - model = DeterministicModel() - model.training_step = model.training_step__dict_return - model.validation_step = model.validation_step__dict_return - model.validation_step_end = model.validation_step_end__no_return - model.validation_epoch_end = None - - trainer = Trainer( - default_root_dir=tmpdir, - weights_summary=None, - limit_train_batches=2, - limit_val_batches=2, - max_epochs=2, - ) - trainer.fit(model) - - # out are the results of the full loop - # eval_results are output of _evaluate - callback_metrics, eval_results = trainer.run_evaluation() - assert len(callback_metrics) == 1 - assert len(eval_results) == 0 - - # make sure correct steps were called - assert model.validation_step_called - assert model.validation_step_end_called - assert not model.validation_epoch_end_called diff --git a/tests/trainer/legacy_deprecate_flow_log/test_trainer_steps_dict_return.py b/tests/trainer/legacy_deprecate_flow_log/test_trainer_steps_dict_return.py deleted file mode 100644 index b88788f3cc28b..0000000000000 --- a/tests/trainer/legacy_deprecate_flow_log/test_trainer_steps_dict_return.py +++ /dev/null @@ -1,104 +0,0 @@ -# Copyright The PyTorch Lightning team. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -Tests to ensure that the training loop works with a dict -""" - -from pytorch_lightning import Trainer -from tests.helpers.deterministic_model import DeterministicModel - - -def test_training_step_dict(tmpdir): - """ - Tests that only training_step can be used - """ - model = DeterministicModel() - model.training_step = model.training_step__dict_return - model.val_dataloader = None - - trainer = Trainer( - default_root_dir=tmpdir, - fast_dev_run=True, - weights_summary=None, - ) - trainer.fit(model) - - # make sure correct steps were called - assert model.training_step_called - assert not model.training_step_end_called - assert not model.training_epoch_end_called - - # make sure training outputs what is expected - for batch_idx, batch in enumerate(model.train_dataloader()): - break - - out = trainer.train_loop.run_training_batch(batch, batch_idx, 0) - - assert out.signal == 0 - assert trainer.logger_connector.logged_metrics['log_acc1'] == 12.0 - assert trainer.logger_connector.logged_metrics['log_acc2'] == 7.0 - - train_step_out = out.training_step_output_for_epoch_end - assert len(train_step_out) == 1 - - train_step_out = train_step_out[0][0] - pbar_metrics = train_step_out['progress_bar'] - assert 'log' in train_step_out - assert 'progress_bar' in train_step_out - assert train_step_out['train_step_test'] == 549 - assert pbar_metrics['pbar_acc1'] == 17.0 - assert pbar_metrics['pbar_acc2'] == 19.0 - - # make sure the optimizer closure returns the correct things - opt_closure_result = trainer.train_loop.training_step_and_backward( - batch, batch_idx, 0, trainer.optimizers[0], trainer.hiddens - ) - assert opt_closure_result['loss'] == (42.0 * 3) + (15.0 * 3) - - -def training_step_with_step_end(tmpdir): - """ - Checks train_step + training_step_end - """ - model = DeterministicModel() - model.training_step = model.training_step__dict_return - model.training_step_end = model.training_step_end__dict - model.val_dataloader = None - - trainer = Trainer( - default_root_dir=tmpdir, - fast_dev_run=True, - weights_summary=None, - ) - trainer.fit(model) - - # make sure correct steps were called - assert model.training_step_called - assert model.training_step_end_called - assert not model.training_epoch_end_called - - # make sure training outputs what is expected - for batch_idx, batch in enumerate(model.train_dataloader()): - break - - out = trainer.train_loop.run_training_batch(batch, batch_idx, 0) - assert out.signal == 0 - assert trainer.logger_connector.logged_metrics['log_acc1'] == 14.0 - assert trainer.logger_connector.logged_metrics['log_acc2'] == 9.0 - - train_step_end_out = out.training_step_output_for_epoch_end - pbar_metrics = train_step_end_out['progress_bar'] - assert 'train_step_end' in train_step_end_out - assert pbar_metrics['pbar_acc1'] == 19.0 - assert pbar_metrics['pbar_acc2'] == 21.0 diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index ee93ca59eca76..6944ddf3c2f7e 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -302,23 +302,6 @@ def test_loading_yaml(tmpdir): assert tags["batch_size"] == 32 and tags["hidden_dim"] == 1000 -def test_dp_output_reduce(): - mixin = TrainerLoggingMixin() - - # test identity when we have a single gpu - out = torch.rand(3, 1) - assert mixin.reduce_distributed_output(out, num_gpus=1) is out - - # average when we have multiples - assert mixin.reduce_distributed_output(out, num_gpus=2) == out.mean() - - # when we have a dict of vals - out = {"a": out, "b": {"c": out}} - reduced = mixin.reduce_distributed_output(out, num_gpus=3) - assert reduced["a"] == out["a"] - assert reduced["b"]["c"] == out["b"]["c"] - - @pytest.mark.parametrize( "save_top_k,save_last,expected_files", [ From 8196fb92a6bdb12c5a376285b1d8d287b9a31273 Mon Sep 17 00:00:00 2001 From: Carlos Mocholi Date: Tue, 30 Mar 2021 01:28:38 +0200 Subject: [PATCH 05/11] Update tests --- tests/helpers/deterministic_model.py | 39 --- ...oop_flow_1_0.py => test_eval_loop_flow.py} | 46 +++- tests/trainer/data_flow/test_flow_warnings.py | 3 - ...ct_1_0.py => test_train_loop_flow_dict.py} | 6 - ..._1_0.py => test_train_loop_flow_scalar.py} | 57 +++- .../legacy_deprecate_flow_log/__init__.py | 0 .../test_trainer_steps_scalar_return.py | 249 ------------------ ...gging_1_0.py => test_eval_loop_logging.py} | 0 ...ging_1_0.py => test_train_loop_logging.py} | 0 9 files changed, 90 insertions(+), 310 deletions(-) rename tests/trainer/data_flow/{test_eval_loop_flow_1_0.py => test_eval_loop_flow.py} (81%) rename tests/trainer/data_flow/{test_train_loop_flow_dict_1_0.py => test_train_loop_flow_dict.py} (96%) rename tests/trainer/data_flow/{test_train_loop_flow_scalar_1_0.py => test_train_loop_flow_scalar.py} (79%) delete mode 100644 tests/trainer/legacy_deprecate_flow_log/__init__.py delete mode 100644 tests/trainer/legacy_deprecate_flow_log/test_trainer_steps_scalar_return.py rename tests/trainer/logging_/{test_eval_loop_logging_1_0.py => test_eval_loop_logging.py} (100%) rename tests/trainer/logging_/{test_train_loop_logging_1_0.py => test_train_loop_logging.py} (100%) diff --git a/tests/helpers/deterministic_model.py b/tests/helpers/deterministic_model.py index 613c92a6c56e1..9f884fe041cb0 100644 --- a/tests/helpers/deterministic_model.py +++ b/tests/helpers/deterministic_model.py @@ -65,45 +65,6 @@ def count_num_graphs(self, result, num_graphs=0): return num_graphs - # --------------------------- - # scalar return - # --------------------------- - def training_step__scalar_return(self, batch, batch_idx): - acc = self.step(batch, batch_idx) - self.training_step_called = True - return acc - - def training_step_end__scalar(self, output): - self.training_step_end_called = True - - # make sure loss has the grad - assert isinstance(output, torch.Tensor) - assert output.grad_fn is not None - - # make sure nothing else has grads - assert self.count_num_graphs({'loss': output}) == 1 - - assert output == 171 - - return output - - def training_epoch_end__scalar(self, outputs): - """ - There should be an array of scalars without graphs that are all 171 (4 of them) - """ - self.training_epoch_end_called = True - - if self._distrib_type in (DistributedType.DP, DistributedType.DDP2): - pass - else: - # only saw 4 batches - assert len(outputs) == 4 - for batch_out in outputs: - batch_out = batch_out['loss'] - assert batch_out == 171 - assert batch_out.grad_fn is None - assert isinstance(batch_out, torch.Tensor) - def validation_step_end(self, val_step_output): assert len(val_step_output) == 3 assert val_step_output['val_loss'] == 171 diff --git a/tests/trainer/data_flow/test_eval_loop_flow_1_0.py b/tests/trainer/data_flow/test_eval_loop_flow.py similarity index 81% rename from tests/trainer/data_flow/test_eval_loop_flow_1_0.py rename to tests/trainer/data_flow/test_eval_loop_flow.py index a6de667bf8c19..575de5727a21e 100644 --- a/tests/trainer/data_flow/test_eval_loop_flow_1_0.py +++ b/tests/trainer/data_flow/test_eval_loop_flow.py @@ -14,8 +14,6 @@ """ Tests to ensure that the training loop works with a dict (1.0) """ -import os -from unittest import mock import pytest import torch @@ -25,7 +23,6 @@ from tests.helpers.deterministic_model import DeterministicModel -@mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"}) def test__eval_step__flow(tmpdir): """ Tests that only training_step can be used @@ -69,8 +66,27 @@ def backward(self, loss, optimizer, optimizer_idx): assert not model.validation_step_end_called assert not model.validation_epoch_end_called + # make sure training outputs what is expected + for batch_idx, batch in enumerate(model.train_dataloader()): + break + + out = trainer.train_loop.run_training_batch(batch, batch_idx, 0) + assert out.signal == 0 + assert len(out.grad_norm_dic) == 0 and isinstance(out.grad_norm_dic, dict) + + train_step_out = out.training_step_output_for_epoch_end + assert len(train_step_out) == 1 + train_step_out = train_step_out[0][0] + assert isinstance(train_step_out['minimize'], torch.Tensor) + assert train_step_out['minimize'].item() == 171 + + # make sure the optimizer closure returns the correct things + opt_closure_result = trainer.train_loop.training_step_and_backward( + batch, batch_idx, 0, trainer.optimizers[0], trainer.hiddens + ) + assert opt_closure_result['loss'].item() == 171 + -@mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"}) def test__eval_step__eval_step_end__flow(tmpdir): """ Tests that only training_step can be used @@ -119,8 +135,27 @@ def backward(self, loss, optimizer, optimizer_idx): assert model.validation_step_end_called assert not model.validation_epoch_end_called + # make sure training outputs what is expected + for batch_idx, batch in enumerate(model.train_dataloader()): + break + + out = trainer.train_loop.run_training_batch(batch, batch_idx, 0) + assert out.signal == 0 + assert len(out.grad_norm_dic) == 0 and isinstance(out.grad_norm_dic, dict) + + train_step_out = out.training_step_output_for_epoch_end + assert len(train_step_out) == 1 + train_step_out = train_step_out[0][0] + assert isinstance(train_step_out['minimize'], torch.Tensor) + assert train_step_out['minimize'].item() == 171 + + # make sure the optimizer closure returns the correct things + opt_closure_result = trainer.train_loop.training_step_and_backward( + batch, batch_idx, 0, trainer.optimizers[0], trainer.hiddens + ) + assert opt_closure_result['loss'].item() == 171 + -@mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"}) def test__eval_step__epoch_end__flow(tmpdir): """ Tests that only training_step can be used @@ -180,7 +215,6 @@ def backward(self, loss, optimizer, optimizer_idx): assert model.validation_epoch_end_called -@mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"}) def test__validation_step__step_end__epoch_end__flow(tmpdir): """ Tests that only training_step can be used diff --git a/tests/trainer/data_flow/test_flow_warnings.py b/tests/trainer/data_flow/test_flow_warnings.py index d3280b8eb6a86..469ff648684f6 100644 --- a/tests/trainer/data_flow/test_flow_warnings.py +++ b/tests/trainer/data_flow/test_flow_warnings.py @@ -11,9 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import os import warnings -from unittest import mock from pytorch_lightning import Trainer from tests.helpers.boring_model import BoringModel @@ -26,7 +24,6 @@ def training_step(self, batch, batch_idx): return acc -@mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"}) def test_no_depre_without_epoch_end(tmpdir): """ Tests that only training_step can be used diff --git a/tests/trainer/data_flow/test_train_loop_flow_dict_1_0.py b/tests/trainer/data_flow/test_train_loop_flow_dict.py similarity index 96% rename from tests/trainer/data_flow/test_train_loop_flow_dict_1_0.py rename to tests/trainer/data_flow/test_train_loop_flow_dict.py index f38dda9c530ca..998e5958b9055 100644 --- a/tests/trainer/data_flow/test_train_loop_flow_dict_1_0.py +++ b/tests/trainer/data_flow/test_train_loop_flow_dict.py @@ -14,8 +14,6 @@ """ Tests to ensure that the training loop works with a dict (1.0) """ -import os -from unittest import mock import torch @@ -24,7 +22,6 @@ from tests.helpers.deterministic_model import DeterministicModel -@mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"}) def test__training_step__flow_dict(tmpdir): """ Tests that only training_step can be used @@ -60,7 +57,6 @@ def backward(self, loss, optimizer, optimizer_idx): assert not model.training_epoch_end_called -@mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"}) def test__training_step__tr_step_end__flow_dict(tmpdir): """ Tests that only training_step can be used @@ -103,7 +99,6 @@ def backward(self, loss, optimizer, optimizer_idx): assert not model.training_epoch_end_called -@mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"}) def test__training_step__epoch_end__flow_dict(tmpdir): """ Tests that only training_step can be used @@ -152,7 +147,6 @@ def backward(self, loss, optimizer, optimizer_idx): assert model.training_epoch_end_called -@mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"}) def test__training_step__step_end__epoch_end__flow_dict(tmpdir): """ Tests that only training_step can be used diff --git a/tests/trainer/data_flow/test_train_loop_flow_scalar_1_0.py b/tests/trainer/data_flow/test_train_loop_flow_scalar.py similarity index 79% rename from tests/trainer/data_flow/test_train_loop_flow_scalar_1_0.py rename to tests/trainer/data_flow/test_train_loop_flow_scalar.py index d5a4da79942ed..97fd9063456c0 100644 --- a/tests/trainer/data_flow/test_train_loop_flow_scalar_1_0.py +++ b/tests/trainer/data_flow/test_train_loop_flow_scalar.py @@ -14,8 +14,6 @@ """ Tests to ensure that the training loop works with a dict (1.0) """ -import os -from unittest import mock import pytest import torch @@ -27,7 +25,6 @@ from tests.helpers.utils import no_warning_call -@mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"}) def test__training_step__flow_scalar(tmpdir): """ Tests that only training_step can be used @@ -63,7 +60,6 @@ def backward(self, loss, optimizer, optimizer_idx): assert not model.training_epoch_end_called -@mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"}) def test__training_step__tr_step_end__flow_scalar(tmpdir): """ Tests that only training_step can be used @@ -106,7 +102,6 @@ def backward(self, loss, optimizer, optimizer_idx): assert not model.training_epoch_end_called -@mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"}) def test__training_step__epoch_end__flow_scalar(tmpdir): """ Tests that only training_step can be used @@ -154,11 +149,35 @@ def backward(self, loss, optimizer, optimizer_idx): assert not model.training_step_end_called assert model.training_epoch_end_called + # assert epoch end metrics were added + assert len(trainer.logger_connector.callback_metrics) == 0 + assert len(trainer.logger_connector.progress_bar_metrics) == 0 + + # make sure training outputs what is expected + for batch_idx, batch in enumerate(model.train_dataloader()): + break + + out = trainer.train_loop.run_training_batch(batch, batch_idx, 0) + assert out.signal == 0 + assert len(out.grad_norm_dic) == 0 and isinstance(out.grad_norm_dic, dict) + + train_step_out = out.training_step_output_for_epoch_end + assert len(train_step_out) == 1 + train_step_out = train_step_out[0][0] + assert isinstance(train_step_out['minimize'], torch.Tensor) + assert train_step_out['minimize'].item() == 171 + + # make sure the optimizer closure returns the correct things + opt_closure_result = trainer.train_loop.training_step_and_backward( + batch, batch_idx, 0, trainer.optimizers[0], trainer.hiddens + ) + assert opt_closure_result['loss'].item() == 171 + -@mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"}) def test__training_step__step_end__epoch_end__flow_scalar(tmpdir): """ - Tests that only training_step can be used + Checks train_step + training_step_end + training_epoch_end + (all with scalar return from train_step) """ class TestModel(DeterministicModel): @@ -209,6 +228,30 @@ def backward(self, loss, optimizer, optimizer_idx): assert model.training_step_end_called assert model.training_epoch_end_called + # assert epoch end metrics were added + assert len(trainer.logger_connector.callback_metrics) == 0 + assert len(trainer.logger_connector.progress_bar_metrics) == 0 + + # make sure training outputs what is expected + for batch_idx, batch in enumerate(model.train_dataloader()): + break + + out = trainer.train_loop.run_training_batch(batch, batch_idx, 0) + assert out.signal == 0 + assert len(out.grad_norm_dic) == 0 and isinstance(out.grad_norm_dic, dict) + + train_step_out = out.training_step_output_for_epoch_end + assert len(train_step_out) == 1 + train_step_out = train_step_out[0][0] + assert isinstance(train_step_out['minimize'], torch.Tensor) + assert train_step_out['minimize'].item() == 171 + + # make sure the optimizer closure returns the correct things + opt_closure_result = trainer.train_loop.training_step_and_backward( + batch, batch_idx, 0, trainer.optimizers[0], trainer.hiddens + ) + assert opt_closure_result['loss'].item() == 171 + def test_train_step_no_return(tmpdir): """ diff --git a/tests/trainer/legacy_deprecate_flow_log/__init__.py b/tests/trainer/legacy_deprecate_flow_log/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/tests/trainer/legacy_deprecate_flow_log/test_trainer_steps_scalar_return.py b/tests/trainer/legacy_deprecate_flow_log/test_trainer_steps_scalar_return.py deleted file mode 100644 index 5836251f2c92a..0000000000000 --- a/tests/trainer/legacy_deprecate_flow_log/test_trainer_steps_scalar_return.py +++ /dev/null @@ -1,249 +0,0 @@ -# Copyright The PyTorch Lightning team. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -Tests to ensure that the training loop works with a scalar -""" -import os -from unittest import mock - -import torch - -from pytorch_lightning import Trainer -from tests.helpers import BoringModel -from tests.helpers.deterministic_model import DeterministicModel -from tests.helpers.runif import RunIf - - -def test_training_step_scalar(tmpdir): - """ - Tests that only training_step that returns a single scalar can be used - """ - model = DeterministicModel() - model.training_step = model.training_step__scalar_return - model.val_dataloader = None - - trainer = Trainer( - default_root_dir=tmpdir, - fast_dev_run=True, - weights_summary=None, - ) - trainer.fit(model) - - # make sure correct steps were called - assert model.training_step_called - assert not model.training_step_end_called - assert not model.training_epoch_end_called - - # make sure training outputs what is expected - for batch_idx, batch in enumerate(model.train_dataloader()): - break - - out = trainer.train_loop.run_training_batch(batch, batch_idx, 0) - assert out.signal == 0 - assert len(out.grad_norm_dic) == 0 and isinstance(out.grad_norm_dic, dict) - - train_step_out = out.training_step_output_for_epoch_end - assert len(train_step_out) == 1 - train_step_out = train_step_out[0][0] - assert isinstance(train_step_out['minimize'], torch.Tensor) - assert train_step_out['minimize'].item() == 171 - - # make sure the optimizer closure returns the correct things - opt_closure_result = trainer.train_loop.training_step_and_backward( - batch, batch_idx, 0, trainer.optimizers[0], trainer.hiddens - ) - assert opt_closure_result['loss'].item() == 171 - - -def training_step_scalar_with_step_end(tmpdir): - """ - Checks train_step with scalar only + training_step_end - """ - model = DeterministicModel() - model.training_step = model.training_step__scalar_return - model.training_step_end = model.training_step_end__scalar - model.val_dataloader = None - - trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True, weights_summary=None) - trainer.fit(model) - - # make sure correct steps were called - assert model.training_step_called - assert model.training_step_end_called - assert not model.training_epoch_end_called - - # make sure training outputs what is expected - for batch_idx, batch in enumerate(model.train_dataloader()): - break - - out = trainer.train_loop.run_training_batch(batch, batch_idx, 0) - assert out.signal == 0 - assert len(out.grad_norm_dic) == 0 and isinstance(out.grad_norm_dic, dict) - - train_step_out = out.training_step_output_for_epoch_end - assert len(train_step_out) == 1 - train_step_out = train_step_out[0][0] - assert isinstance(train_step_out, torch.Tensor) - assert train_step_out.item() == 171 - - # make sure the optimizer closure returns the correct things - opt_closure_result = trainer.train_loop.training_step_and_backward( - batch, batch_idx, 0, trainer.optimizers[0], trainer.hiddens - ) - assert opt_closure_result['loss'].item() == 171 - - -def test_full_training_loop_scalar(tmpdir): - """ - Checks train_step + training_step_end + training_epoch_end - (all with scalar return from train_step) - """ - - model = DeterministicModel() - model.training_step = model.training_step__scalar_return - model.training_step_end = model.training_step_end__scalar - model.training_epoch_end = model.training_epoch_end__scalar - model.val_dataloader = None - - trainer = Trainer( - default_root_dir=tmpdir, - max_epochs=1, - weights_summary=None, - ) - trainer.fit(model) - - # make sure correct steps were called - assert model.training_step_called - assert model.training_step_end_called - assert model.training_epoch_end_called - - # assert epoch end metrics were added - assert len(trainer.logger_connector.callback_metrics) == 0 - assert len(trainer.logger_connector.progress_bar_metrics) == 0 - - # make sure training outputs what is expected - for batch_idx, batch in enumerate(model.train_dataloader()): - break - - out = trainer.train_loop.run_training_batch(batch, batch_idx, 0) - assert out.signal == 0 - assert len(out.grad_norm_dic) == 0 and isinstance(out.grad_norm_dic, dict) - - train_step_out = out.training_step_output_for_epoch_end - assert len(train_step_out) == 1 - train_step_out = train_step_out[0][0] - assert isinstance(train_step_out['minimize'], torch.Tensor) - assert train_step_out['minimize'].item() == 171 - - # make sure the optimizer closure returns the correct things - opt_closure_result = trainer.train_loop.training_step_and_backward( - batch, batch_idx, 0, trainer.optimizers[0], trainer.hiddens - ) - assert opt_closure_result['loss'].item() == 171 - - -def test_train_step_epoch_end_scalar(tmpdir): - """ - Checks train_step + training_epoch_end (NO training_step_end) - (with scalar return) - """ - - model = DeterministicModel() - model.training_step = model.training_step__scalar_return - model.training_step_end = None - model.training_epoch_end = model.training_epoch_end__scalar - model.val_dataloader = None - - trainer = Trainer( - default_root_dir=tmpdir, - max_epochs=1, - weights_summary=None, - ) - trainer.fit(model) - - # make sure correct steps were called - assert model.training_step_called - assert not model.training_step_end_called - assert model.training_epoch_end_called - - # assert epoch end metrics were added - assert len(trainer.logger_connector.callback_metrics) == 0 - assert len(trainer.logger_connector.progress_bar_metrics) == 0 - - # make sure training outputs what is expected - for batch_idx, batch in enumerate(model.train_dataloader()): - break - - out = trainer.train_loop.run_training_batch(batch, batch_idx, 0) - assert out.signal == 0 - assert len(out.grad_norm_dic) == 0 and isinstance(out.grad_norm_dic, dict) - - train_step_out = out.training_step_output_for_epoch_end - assert len(train_step_out) == 1 - train_step_out = train_step_out[0][0] - assert isinstance(train_step_out['minimize'], torch.Tensor) - assert train_step_out['minimize'].item() == 171 - - # make sure the optimizer closure returns the correct things - opt_closure_result = trainer.train_loop.training_step_and_backward( - batch, batch_idx, 0, trainer.optimizers[0], trainer.hiddens - ) - assert opt_closure_result['loss'].item() == 171 - - -class DPPReduceMeanPbarModel(BoringModel): - - logged = [] - - def training_step(self, batch, batch_idx): - output = self.layer(batch) - loss = self.loss(batch, output) - loss /= loss.clone().detach() - self.log('self_log', loss, prog_bar=True, sync_dist=True) - return {"loss": loss, "progress_bar": {"loss_2": loss}} - - -@mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"}) -@RunIf(min_gpus=2) -def test_dpp_reduce_mean_pbar(tmpdir): - - model = DPPReduceMeanPbarModel() - model.training_step_end = None - model.training_epoch_end = None - - distributed_backend = "ddp_spawn" - - trainer = Trainer( - max_epochs=1, - default_root_dir=tmpdir, - limit_train_batches=10, - limit_test_batches=2, - limit_val_batches=2, - accelerator=distributed_backend, - gpus=2, - precision=32, - ) - - trainer.fit(model) - - # TODO: Move this test to DDP. pbar_added_metrics is empty with ddp_spawn for some reasons - - pbar_added_metrics = trainer.dev_debugger.pbar_added_metrics - is_in = False - for pbar_metrics in pbar_added_metrics: - if 'loss_2' in pbar_metrics: - is_in = True - assert pbar_metrics["loss_2"].item() == 1 - if distributed_backend == "ddp": - assert is_in is True diff --git a/tests/trainer/logging_/test_eval_loop_logging_1_0.py b/tests/trainer/logging_/test_eval_loop_logging.py similarity index 100% rename from tests/trainer/logging_/test_eval_loop_logging_1_0.py rename to tests/trainer/logging_/test_eval_loop_logging.py diff --git a/tests/trainer/logging_/test_train_loop_logging_1_0.py b/tests/trainer/logging_/test_train_loop_logging.py similarity index 100% rename from tests/trainer/logging_/test_train_loop_logging_1_0.py rename to tests/trainer/logging_/test_train_loop_logging.py From c841289dc56bcb1f0107e1fd0c76181e527522bf Mon Sep 17 00:00:00 2001 From: Carlos Mocholi Date: Tue, 30 Mar 2021 01:31:30 +0200 Subject: [PATCH 06/11] flake8 --- pytorch_lightning/trainer/logging.py | 5 ----- pytorch_lightning/trainer/training_loop.py | 1 - tests/helpers/deterministic_model.py | 1 - tests/trainer/test_trainer.py | 1 - 4 files changed, 8 deletions(-) diff --git a/pytorch_lightning/trainer/logging.py b/pytorch_lightning/trainer/logging.py index d53344a2f9eaf..78a513153880b 100644 --- a/pytorch_lightning/trainer/logging.py +++ b/pytorch_lightning/trainer/logging.py @@ -12,16 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -import inspect from abc import ABC -from collections import Mapping import torch -from pytorch_lightning.utilities import DistributedType -from pytorch_lightning.utilities.distributed import rank_zero_warn from pytorch_lightning.utilities.exceptions import MisconfigurationException -from pytorch_lightning.utilities.memory import recursive_detach class TrainerLoggingMixin(ABC): diff --git a/pytorch_lightning/trainer/training_loop.py b/pytorch_lightning/trainer/training_loop.py index ae88f2d983a2b..ab00ff7a45f68 100644 --- a/pytorch_lightning/trainer/training_loop.py +++ b/pytorch_lightning/trainer/training_loop.py @@ -28,7 +28,6 @@ from pytorch_lightning.utilities import _TPU_AVAILABLE, AMPType, DeviceType, parsing from pytorch_lightning.utilities.distributed import rank_zero_info from pytorch_lightning.utilities.exceptions import MisconfigurationException -from pytorch_lightning.utilities.memory import recursive_detach from pytorch_lightning.utilities.model_helpers import is_overridden from pytorch_lightning.utilities.parsing import AttributeDict from pytorch_lightning.utilities.warnings import WarningCache diff --git a/tests/helpers/deterministic_model.py b/tests/helpers/deterministic_model.py index 9f884fe041cb0..87f690e2643e0 100644 --- a/tests/helpers/deterministic_model.py +++ b/tests/helpers/deterministic_model.py @@ -16,7 +16,6 @@ from torch.utils.data import DataLoader, Dataset from pytorch_lightning.core.lightning import LightningModule -from pytorch_lightning.utilities import DistributedType class DeterministicModel(LightningModule): diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 6944ddf3c2f7e..92d8eb974fea7 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -33,7 +33,6 @@ from pytorch_lightning.core.saving import load_hparams_from_tags_csv, load_hparams_from_yaml, save_hparams_to_tags_csv from pytorch_lightning.loggers import TensorBoardLogger from pytorch_lightning.profiler import AdvancedProfiler, PassThroughProfiler, PyTorchProfiler, SimpleProfiler -from pytorch_lightning.trainer.logging import TrainerLoggingMixin from pytorch_lightning.trainer.states import TrainerState from pytorch_lightning.utilities.cloud_io import load as pl_load from pytorch_lightning.utilities.exceptions import MisconfigurationException From 27fca7fd1070b800b2f034601f2762fb6bc1d27e Mon Sep 17 00:00:00 2001 From: Carlos Mocholi Date: Tue, 30 Mar 2021 01:46:57 +0200 Subject: [PATCH 07/11] Unnecessary dev_debugger --- tests/checkpointing/test_model_checkpoint.py | 2 -- tests/plugins/test_rpc_sequential_plugin.py | 10 ++++------ tests/trainer/logging_/test_eval_loop_logging.py | 12 ------------ tests/trainer/logging_/test_logger_connector.py | 8 ++------ .../trainer/optimization/test_manual_optimization.py | 1 - 5 files changed, 6 insertions(+), 27 deletions(-) diff --git a/tests/checkpointing/test_model_checkpoint.py b/tests/checkpointing/test_model_checkpoint.py index 5e50543d37e5f..f58ff768759e8 100644 --- a/tests/checkpointing/test_model_checkpoint.py +++ b/tests/checkpointing/test_model_checkpoint.py @@ -876,7 +876,6 @@ def validation_epoch_end(self, outputs): assert trainer.dev_debugger.checkpoint_callback_history[-1]['epoch'] == len(monitor) - 1 -@mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"}) def test_checkpoint_repeated_strategy(tmpdir): """ This test validates that the checkpoint can be called when provided to callbacks list @@ -923,7 +922,6 @@ def validation_step(self, batch, batch_idx): assert set(os.listdir(tmpdir.join("lightning_logs"))) == {f'version_{i}' for i in range(4)} -@mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"}) def test_checkpoint_repeated_strategy_extended(tmpdir): """ This test validates checkpoint can be called several times without diff --git a/tests/plugins/test_rpc_sequential_plugin.py b/tests/plugins/test_rpc_sequential_plugin.py index 688424e0f74fb..00a6220036c3e 100644 --- a/tests/plugins/test_rpc_sequential_plugin.py +++ b/tests/plugins/test_rpc_sequential_plugin.py @@ -28,7 +28,7 @@ @mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"}) @RunIf(min_gpus=2, special=True, fairscale_pipe=True) -def test_rpc_sequential_plugin_manual(tmpdir, args=None): +def test_rpc_sequential_plugin_manual(tmpdir): model = SequentialModelRPCManual() trainer = Trainer( max_epochs=2, @@ -50,9 +50,8 @@ def test_rpc_sequential_plugin_manual(tmpdir, args=None): trainer.accelerator.training_type_plugin.exit_rpc_process() -@mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"}) @RunIf(min_gpus=2, special=True, fairscale_pipe=True) -def test_rpc_sequential_plugin_manual_amp(tmpdir, args=None): +def test_rpc_sequential_plugin_manual_amp(tmpdir): model = SequentialModelRPCManual() trainer = Trainer( max_epochs=2, @@ -74,7 +73,7 @@ def test_rpc_sequential_plugin_manual_amp(tmpdir, args=None): @mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"}) @RunIf(min_gpus=2, special=True, fairscale_pipe=True) -def test_rpc_sequential_plugin_automatic(tmpdir, args=None): +def test_rpc_sequential_plugin_automatic(tmpdir): model = SequentialModelRPCAutomatic() trainer = Trainer( max_epochs=2, @@ -96,9 +95,8 @@ def test_rpc_sequential_plugin_automatic(tmpdir, args=None): trainer.accelerator.training_type_plugin.exit_rpc_process() -@mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"}) @RunIf(min_gpus=2, special=True, fairscale_pipe=True) -def test_rpc_sequential_plugin_with_wrong_balance(tmpdir, args=None): +def test_rpc_sequential_plugin_with_wrong_balance(tmpdir): model = SequentialModelRPCAutomatic() trainer = Trainer( max_epochs=2, diff --git a/tests/trainer/logging_/test_eval_loop_logging.py b/tests/trainer/logging_/test_eval_loop_logging.py index 32bff96baf566..c91e8fec05cf7 100644 --- a/tests/trainer/logging_/test_eval_loop_logging.py +++ b/tests/trainer/logging_/test_eval_loop_logging.py @@ -32,7 +32,6 @@ from tests.helpers.deterministic_model import DeterministicModel -@mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"}) def test__validation_step__log(tmpdir): """ Tests that validation_step can log @@ -88,12 +87,10 @@ def backward(self, loss, optimizer, optimizer_idx): # we don't want to enable val metrics during steps because it is not something that users should do # on purpose DO NOT allow step_b... it's silly to monitor val step metrics callback_metrics = set(trainer.callback_metrics.keys()) - callback_metrics.remove('debug_epoch') expected_cb_metrics = {'a', 'a2', 'b', 'a_epoch', 'b_epoch', 'a_step'} assert expected_cb_metrics == callback_metrics -@mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"}) def test__validation_step__step_end__epoch_end__log(tmpdir): """ Tests that validation_step can log @@ -119,8 +116,6 @@ def validation_step(self, batch, batch_idx): def validation_step_end(self, acc): self.validation_step_end_called = True - # self.log('e', acc) - # self.log('f', acc, on_step=True, on_epoch=True) return ['random_thing'] def validation_epoch_end(self, outputs): @@ -153,10 +148,6 @@ def backward(self, loss, optimizer, optimizer_idx): 'd_step/epoch_0', 'd_step/epoch_1', 'd_epoch', - # 'e', - # 'f_step/epoch_0', - # 'f_step/epoch_1', - # 'f_epoch', 'g', } assert expected_logged_metrics == logged_metrics @@ -167,9 +158,7 @@ def backward(self, loss, optimizer, optimizer_idx): # we don't want to enable val metrics during steps because it is not something that users should do callback_metrics = set(trainer.callback_metrics.keys()) - callback_metrics.remove('debug_epoch') expected_cb_metrics = {'a', 'b', 'b_epoch', 'c', 'd', 'd_epoch', 'g', 'b_step'} - # expected_cb_metrics = {'a', 'b', 'c', 'd', 'e', 'b_epoch', 'd_epoch', 'f_epoch', 'f', 'g', 'b_step'} assert expected_cb_metrics == callback_metrics @@ -223,7 +212,6 @@ def validation_epoch_end(self, outputs): assert len(trainer.dev_debugger.logged_metrics) == max_epochs -@mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"}) def test_eval_float_logging(tmpdir): """ Tests that only training_step can be used diff --git a/tests/trainer/logging_/test_logger_connector.py b/tests/trainer/logging_/test_logger_connector.py index c4ba371e6c561..0a8d51c62f09d 100644 --- a/tests/trainer/logging_/test_logger_connector.py +++ b/tests/trainer/logging_/test_logger_connector.py @@ -53,13 +53,11 @@ def wrapper(self, *args, **kwargs) -> Any: return decorator -def test__logger_connector__epoch_result_store__train(tmpdir, monkeypatch): +def test__logger_connector__epoch_result_store__train(tmpdir): """ Tests that LoggerConnector will properly capture logged information and reduce them """ - monkeypatch.setenv("PL_DEV_DEBUG", "1") - class TestModel(BoringModel): train_losses = [] @@ -208,12 +206,10 @@ def training_step_end(self, *_): @pytest.mark.parametrize('num_dataloaders', [1, 2]) -def test__logger_connector__epoch_result_store__test_multi_dataloaders(tmpdir, monkeypatch, num_dataloaders): +def test__logger_connector__epoch_result_store__test_multi_dataloaders(tmpdir, num_dataloaders): """ Tests that LoggerConnector will properly capture logged information in multi dataloaders scenario """ - monkeypatch.setenv("PL_DEV_DEBUG", "1") - class TestModel(BoringModel): test_losses = {dl_idx: [] for dl_idx in range(num_dataloaders)} diff --git a/tests/trainer/optimization/test_manual_optimization.py b/tests/trainer/optimization/test_manual_optimization.py index e197e9b35adc9..8ad603a7677ea 100644 --- a/tests/trainer/optimization/test_manual_optimization.py +++ b/tests/trainer/optimization/test_manual_optimization.py @@ -872,7 +872,6 @@ def configure_optimizers(self): step_mock.assert_has_calls(expected_calls) -@mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"}) @patch("torch.optim.Adam.step") @patch("torch.optim.SGD.step") def test_step_with_optimizer_closure_with_different_frequencies(mock_sgd_step, mock_adam_step, tmpdir): From 3eb001e1a754ec51d7ea78275a37c9f47493e7be Mon Sep 17 00:00:00 2001 From: Carlos Mocholi Date: Tue, 30 Mar 2021 02:02:07 +0200 Subject: [PATCH 08/11] Fix tests --- .../trainer/logging_/test_logger_connector.py | 27 +++++++++ .../trainer/test_correct_freq_accumulation.py | 58 ------------------- tests/trainer/test_trainer.py | 2 +- 3 files changed, 28 insertions(+), 59 deletions(-) delete mode 100644 tests/trainer/test_correct_freq_accumulation.py diff --git a/tests/trainer/logging_/test_logger_connector.py b/tests/trainer/logging_/test_logger_connector.py index 0a8d51c62f09d..923821a5e50e4 100644 --- a/tests/trainer/logging_/test_logger_connector.py +++ b/tests/trainer/logging_/test_logger_connector.py @@ -14,8 +14,10 @@ """ Tests to ensure that the training loop works with a dict (1.0) """ +import os from copy import deepcopy from typing import Any, Callable +from unittest import mock import pytest import torch @@ -557,3 +559,28 @@ def validation_step(self, *args, **kwargs): else: assert 'val_loss_custom_naming_0' in logged assert 'val_loss_custom_naming_1' in logged + + +@mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"}) +def test_logged_metrics_steps(tmpdir): + class TestModel(BoringModel): + def validation_step(self, batch, batch_idx): + loss_val = torch.randn(1) + self.log('val_loss', loss_val) + return loss_val + + model = TestModel() + model.validation_epoch_end = None + + trainer = Trainer( + default_root_dir=tmpdir, + limit_train_batches=2, + limit_val_batches=2, + max_epochs=2, + log_every_n_steps=1, + weights_summary=None, + ) + trainer.fit(model) + + assert trainer.dev_debugger.logged_metrics[0]['global_step'] == 1 + assert trainer.dev_debugger.logged_metrics[1]['global_step'] == 3 diff --git a/tests/trainer/test_correct_freq_accumulation.py b/tests/trainer/test_correct_freq_accumulation.py deleted file mode 100644 index 77cc80407270f..0000000000000 --- a/tests/trainer/test_correct_freq_accumulation.py +++ /dev/null @@ -1,58 +0,0 @@ -# Copyright The PyTorch Lightning team. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -Tests to ensure that the training loop works with a dict -""" -import os -from unittest import mock - -from pytorch_lightning import Trainer -from tests.base.model_template import EvalModelTemplate - - -@mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"}) -def test_training_step_scalar(tmpdir): - """ - Tests that only training_step can be used - """ - - model = EvalModelTemplate() - model.validation_step = None - model.test_step = None - model.training_step_end = None - model.training_epoch_end = None - model.validation_step = model.validation_step__dp - model.validation_step_end = None - model.validation_epoch_end = None - model.test_dataloader = None - - trainer = Trainer( - default_root_dir=tmpdir, - limit_train_batches=2, - limit_val_batches=2, - max_epochs=2, - log_every_n_steps=1, - weights_summary=None, - ) - trainer.fit(model) - - # epoch 0 - assert trainer.dev_debugger.logged_metrics[0]['global_step'] == 0 - assert trainer.dev_debugger.logged_metrics[1]['global_step'] == 1 - assert trainer.dev_debugger.logged_metrics[2]['global_step'] == 1 - - # epoch 1 - assert trainer.dev_debugger.logged_metrics[3]['global_step'] == 2 - assert trainer.dev_debugger.logged_metrics[4]['global_step'] == 3 - assert trainer.dev_debugger.logged_metrics[5]['global_step'] == 3 diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 92d8eb974fea7..f7112ad17d4a0 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -1323,7 +1323,7 @@ def setup(self, model, stage): ) @patch("pytorch_lightning.loggers.tensorboard.TensorBoardLogger.log_metrics") def test_log_every_n_steps(log_metrics_mock, tmpdir, train_batches, max_steps, log_interval): - model = EvalModelTemplate() + model = BoringModel() trainer = Trainer( default_root_dir=tmpdir, log_every_n_steps=log_interval, From 549fdaff785cbb2a14b6c78d3cfae67b17ad5099 Mon Sep 17 00:00:00 2001 From: Carlos Mocholi Date: Tue, 30 Mar 2021 02:17:00 +0200 Subject: [PATCH 09/11] Fix test --- tests/trainer/test_trainer.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index f7112ad17d4a0..1f8768a1d2a23 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -1323,7 +1323,12 @@ def setup(self, model, stage): ) @patch("pytorch_lightning.loggers.tensorboard.TensorBoardLogger.log_metrics") def test_log_every_n_steps(log_metrics_mock, tmpdir, train_batches, max_steps, log_interval): - model = BoringModel() + class TestModel(BoringModel): + def training_step(self, *args, **kwargs): + self.log("foo", -1) + return super().training_step(*args, **kwargs) + + model = TestModel() trainer = Trainer( default_root_dir=tmpdir, log_every_n_steps=log_interval, From acdd3f5b9379aeae9eafe641f910f42cf2101a52 Mon Sep 17 00:00:00 2001 From: Carlos Mocholi Date: Tue, 30 Mar 2021 13:24:38 +0200 Subject: [PATCH 10/11] Docs --- docs/source/ecosystem/asr_nlp_tts.rst | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/docs/source/ecosystem/asr_nlp_tts.rst b/docs/source/ecosystem/asr_nlp_tts.rst index af9a7084583f2..e1a94eda9e805 100644 --- a/docs/source/ecosystem/asr_nlp_tts.rst +++ b/docs/source/ecosystem/asr_nlp_tts.rst @@ -751,13 +751,8 @@ be customized with PyTorch Lightning since every NeMo model is a LightningModule l_mle, l_length, logdet, loss, _ = self.step(y, y_lengths, x, x_lengths) - output = { - "loss": loss, # required - "progress_bar": {"l_mle": l_mle, "l_length": l_length, "logdet": logdet}, - "log": {"loss": loss, "l_mle": l_mle, "l_length": l_length, "logdet": logdet}, - } - - return output + self.log_dict({"l_mle": l_mle, "l_length": l_length, "logdet": logdet}, prog_bar=True) + return loss ... Neural Types in NeMo TTS From ef9d8c2eee882d7790c3700dfa79df28a4b2e4f9 Mon Sep 17 00:00:00 2001 From: Carlos Mocholi Date: Tue, 30 Mar 2021 13:27:44 +0200 Subject: [PATCH 11/11] Update CHANGELOG --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 375d6ee060d0c..b1ce691124894 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -165,6 +165,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Removed legacy code to include `step` dictionary returns in `callback_metrics`. Use `self.log_dict` instead. ([#6682](https://github.com/PyTorchLightning/pytorch-lightning/pull/6682)) +- Removed legacy code to log or include metrics in the progress bar by returning them in a dict with the `"log"/"progress_bar"` magic keys. Use `self.log` instead ([#6734](https://github.com/PyTorchLightning/pytorch-lightning/pull/6734)) + + - Removed `optimizer_idx` argument from `training_step` in manual optimization ([#6093](https://github.com/PyTorchLightning/pytorch-lightning/pull/6093))