diff --git a/tests/base/develop_pipelines.py b/tests/base/develop_pipelines.py index 24535dc67da8e..b6289079a35ab 100644 --- a/tests/base/develop_pipelines.py +++ b/tests/base/develop_pipelines.py @@ -14,8 +14,8 @@ import torch from pytorch_lightning import Trainer -from tests.base.develop_utils import load_model_from_checkpoint, get_default_logger, \ - reset_seed +from tests.base import BoringModel +from tests.base.develop_utils import get_default_logger, load_model_from_checkpoint, reset_seed def run_model_test_without_loggers(trainer_options, model, min_acc: float = 0.50): @@ -31,6 +31,7 @@ def run_model_test_without_loggers(trainer_options, model, min_acc: float = 0.50 pretrained_model = load_model_from_checkpoint( trainer.logger, trainer.checkpoint_callback.best_model_path, + type(model) ) # test new model accuracy @@ -39,7 +40,7 @@ def run_model_test_without_loggers(trainer_options, model, min_acc: float = 0.50 test_loaders = [test_loaders] for dataloader in test_loaders: - run_prediction(dataloader, pretrained_model, min_acc=min_acc) + run_prediction(pretrained_model, dataloader, min_acc=min_acc) if trainer.use_ddp: # on hpc this would work fine... but need to hack it for the purpose of the test @@ -47,7 +48,8 @@ def run_model_test_without_loggers(trainer_options, model, min_acc: float = 0.50 trainer.optimizers, trainer.lr_schedulers = pretrained_model.configure_optimizers() -def run_model_test(trainer_options, model, on_gpu: bool = True, version=None, with_hpc: bool = True): +def run_model_test(trainer_options, model, on_gpu: bool = True, version=None, + with_hpc: bool = True, min_acc: float = 0.25): reset_seed() save_dir = trainer_options['default_root_dir'] @@ -56,9 +58,6 @@ def run_model_test(trainer_options, model, on_gpu: bool = True, version=None, wi logger = get_default_logger(save_dir, version=version) trainer_options.update(logger=logger) - if 'checkpoint_callback' not in trainer_options: - trainer_options.update(checkpoint_callback=True) - trainer = Trainer(**trainer_options) initial_values = torch.tensor([torch.sum(torch.abs(x)) for x in model.parameters()]) result = trainer.fit(model) @@ -66,10 +65,11 @@ def run_model_test(trainer_options, model, on_gpu: bool = True, version=None, wi assert result == 1, 'trainer failed' # Check that the model is actually changed post-training - assert torch.norm(initial_values - post_train_values) > 0.1 + change_ratio = torch.norm(initial_values - post_train_values) + assert change_ratio > 0.1, f"the model is changed of {change_ratio}" # test model loading - pretrained_model = load_model_from_checkpoint(logger, trainer.checkpoint_callback.best_model_path) + pretrained_model = load_model_from_checkpoint(logger, trainer.checkpoint_callback.best_model_path, type(model)) # test new model accuracy test_loaders = model.test_dataloader() @@ -77,14 +77,15 @@ def run_model_test(trainer_options, model, on_gpu: bool = True, version=None, wi test_loaders = [test_loaders] for dataloader in test_loaders: - run_prediction(dataloader, pretrained_model) + run_prediction(pretrained_model, dataloader, min_acc=min_acc) if with_hpc: if trainer.use_ddp or trainer.use_ddp2: # on hpc this would work fine... but need to hack it for the purpose of the test trainer.model = pretrained_model - trainer.optimizers, trainer.lr_schedulers, trainer.optimizer_frequencies = \ - trainer.init_optimizers(pretrained_model) + trainer.optimizers, trainer.lr_schedulers, trainer.optimizer_frequencies = trainer.init_optimizers( + pretrained_model + ) # test HPC saving trainer.checkpoint_connector.hpc_save(save_dir, logger) @@ -93,7 +94,14 @@ def run_model_test(trainer_options, model, on_gpu: bool = True, version=None, wi trainer.checkpoint_connector.hpc_load(checkpoint_path, on_gpu=on_gpu) -def run_prediction(dataloader, trained_model, dp=False, min_acc=0.50): +def run_prediction(trained_model, dataloader, dp=False, min_acc=0.25): + if isinstance(trained_model, BoringModel): + return _boring_model_run_prediction(trained_model, dataloader, dp, min_acc) + else: + return _eval_model_template_run_prediction(trained_model, dataloader, dp, min_acc) + + +def _eval_model_template_run_prediction(trained_model, dataloader, dp=False, min_acc=0.50): # run prediction on 1 batch batch = next(iter(dataloader)) x, y = batch @@ -102,7 +110,7 @@ def run_prediction(dataloader, trained_model, dp=False, min_acc=0.50): if dp: with torch.no_grad(): output = trained_model(batch, 0) - acc = output['val_acc'] + acc = output['val_acc'] acc = torch.mean(acc).item() else: @@ -119,3 +127,13 @@ def run_prediction(dataloader, trained_model, dp=False, min_acc=0.50): acc = acc.item() assert acc >= min_acc, f"This model is expected to get > {min_acc} in test set (it got {acc})" + + +def _boring_model_run_prediction(trained_model, dataloader, dp=False, min_acc=0.25): + # run prediction on 1 batch + batch = next(iter(dataloader)) + with torch.no_grad(): + output = trained_model(batch) + acc = trained_model.loss(batch, output) + + assert acc >= min_acc, f"This model is expected to get, {min_acc} in test set but got {acc}" diff --git a/tests/models/data/horovod/train_default_model.py b/tests/models/data/horovod/train_default_model.py index 62f874902b094..c38b5b4efafe8 100644 --- a/tests/models/data/horovod/train_default_model.py +++ b/tests/models/data/horovod/train_default_model.py @@ -72,7 +72,7 @@ def run_test_from_config(trainer_options): test_loaders = [test_loaders] for dataloader in test_loaders: - run_prediction(dataloader, pretrained_model) + run_prediction(pretrained_model, dataloader) # test HPC saving trainer.checkpoint_connector.hpc_save(ckpt_path, trainer.logger) diff --git a/tests/models/test_cpu.py b/tests/models/test_cpu.py index 892077ccdb1be..cc24f6f187502 100644 --- a/tests/models/test_cpu.py +++ b/tests/models/test_cpu.py @@ -21,15 +21,14 @@ import tests.base.develop_pipelines as tpipes import tests.base.develop_utils as tutils from pytorch_lightning import Trainer -from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint -from tests.base import EvalModelTemplate +from pytorch_lightning.callbacks import Callback, EarlyStopping, ModelCheckpoint +from tests.base import BoringModel @pytest.mark.parametrize("enable_pl_optimizer", [False, True]) def test_cpu_slurm_save_load(enable_pl_optimizer, tmpdir): """Verify model save/load/checkpoint on CPU.""" - hparams = EvalModelTemplate.get_default_hparams() - model = EvalModelTemplate(**hparams) + model = BoringModel() # logger file to get meta logger = tutils.get_default_logger(tmpdir) @@ -61,11 +60,8 @@ def test_cpu_slurm_save_load(enable_pl_optimizer, tmpdir): for batch in dataloader: break - x, y = batch - x = x.view(x.size(0), -1) - model.eval() - pred_before_saving = model(x) + pred_before_saving = model(batch) # test HPC saving # simulate snapshot on slurm @@ -75,26 +71,26 @@ def test_cpu_slurm_save_load(enable_pl_optimizer, tmpdir): # new logger file to get meta logger = tutils.get_default_logger(tmpdir, version=version) + model = BoringModel() + + class _StartCallback(Callback): + # set the epoch start hook so we can predict before the model does the full training + def on_train_epoch_start(self, trainer, model): + assert trainer.global_step == real_global_step and trainer.global_step > 0 + # predict with loaded model to make sure answers are the same + mode = model.training + model.eval() + new_pred = model(batch) + assert torch.eq(pred_before_saving, new_pred).all() + model.train(mode) + trainer = Trainer( default_root_dir=tmpdir, max_epochs=1, logger=logger, - callbacks=[ModelCheckpoint(dirpath=tmpdir)], enable_pl_optimizer=enable_pl_optimizer, + callbacks=[_StartCallback(), ModelCheckpoint(dirpath=tmpdir)], ) - model = EvalModelTemplate(**hparams) - - # set the epoch start hook so we can predict before the model does the full training - def assert_pred_same(): - assert trainer.global_step == real_global_step and trainer.global_step > 0 - - # predict with loaded model to make sure answers are the same - trainer.model.eval() - new_pred = trainer.model(x) - assert torch.all(torch.eq(pred_before_saving, new_pred)).item() == 1 - - model.on_epoch_start = assert_pred_same - # by calling fit again, we trigger training, loading weights from the cluster # and our hook to predict using current model before any more weight updates trainer.fit(model) @@ -102,21 +98,26 @@ def assert_pred_same(): @pytest.mark.parametrize("enable_pl_optimizer", [False, True]) def test_early_stopping_cpu_model(enable_pl_optimizer, tmpdir): - """Test each of the trainer options.""" - stopping = EarlyStopping(monitor='early_stop_on', min_delta=0.1) + class ModelTrainVal(BoringModel): + def validation_epoch_end(self, outputs) -> None: + val_loss = torch.stack([x["x"] for x in outputs]).mean() + self.log('val_loss', val_loss) + + stopping = EarlyStopping(monitor="val_loss", min_delta=0.1) trainer_options = dict( - default_root_dir=tmpdir, callbacks=[stopping], - max_epochs=2, + default_root_dir=tmpdir, gradient_clip_val=1.0, overfit_batches=0.20, track_grad_norm=2, + enable_pl_optimizer=enable_pl_optimizer, + progress_bar_refresh_rate=0, + accumulate_grad_batches=2, limit_train_batches=0.1, limit_val_batches=0.1, - enable_pl_optimizer=enable_pl_optimizer, ) - model = EvalModelTemplate() + model = ModelTrainVal() tpipes.run_model_test(trainer_options, model, on_gpu=False) # test freeze on cpu @@ -146,26 +147,29 @@ def test_multi_cpu_model_ddp(enable_pl_optimizer, tmpdir): enable_pl_optimizer=enable_pl_optimizer, ) - model = EvalModelTemplate() - tpipes.run_model_test(trainer_options, model, on_gpu=False) + model = BoringModel() + tpipes.run_model_test(trainer_options, model, on_gpu=False, min_acc=0.05) def test_lbfgs_cpu_model(tmpdir): - """Test each of the trainer options.""" + """Test each of the trainer options. Testing LBFGS optimizer""" + class ModelSpecifiedOptimizer(BoringModel): + def __init__(self, optimizer_name, learning_rate): + super().__init__() + self.optimizer_name = optimizer_name + self.learning_rate = learning_rate + self.save_hyperparameters() + trainer_options = dict( default_root_dir=tmpdir, max_epochs=1, progress_bar_refresh_rate=0, - weights_summary='top', + weights_summary="top", limit_train_batches=0.2, limit_val_batches=0.2, ) - hparams = EvalModelTemplate.get_default_hparams() - hparams.update(optimizer_name='lbfgs', - learning_rate=0.004) - model = EvalModelTemplate(**hparams) - model.configure_optimizers = model.configure_optimizers__lbfgs + model = ModelSpecifiedOptimizer(optimizer_name="LBFGS", learning_rate=0.004) tpipes.run_model_test_without_loggers(trainer_options, model, min_acc=0.25) @@ -181,8 +185,8 @@ def test_default_logger_callbacks_cpu_model(tmpdir): limit_val_batches=0.01, ) - model = EvalModelTemplate() - tpipes.run_model_test_without_loggers(trainer_options, model) + model = BoringModel() + tpipes.run_model_test_without_loggers(trainer_options, model, min_acc=0.01) # test freeze on cpu model.freeze() @@ -191,7 +195,17 @@ def test_default_logger_callbacks_cpu_model(tmpdir): def test_running_test_after_fitting(tmpdir): """Verify test() on fitted model.""" - model = EvalModelTemplate() + class ModelTrainValTest(BoringModel): + + def validation_epoch_end(self, outputs) -> None: + val_loss = torch.stack([x["x"] for x in outputs]).mean() + self.log('val_loss', val_loss) + + def test_epoch_end(self, outputs) -> None: + test_loss = torch.stack([x["y"] for x in outputs]).mean() + self.log('test_loss', test_loss) + + model = ModelTrainValTest() # logger file to get meta logger = tutils.get_default_logger(tmpdir) @@ -217,12 +231,22 @@ def test_running_test_after_fitting(tmpdir): trainer.test() # test we have good test accuracy - tutils.assert_ok_model_acc(trainer, thr=0.5) + tutils.assert_ok_model_acc(trainer, key='test_loss', thr=0.5) def test_running_test_no_val(tmpdir): - """Verify `test()` works on a model with no `val_loader`.""" - model = EvalModelTemplate() + """Verify `test()` works on a model with no `val_dataloader`. It performs + train and test only""" + class ModelTrainTest(BoringModel): + + def val_dataloader(self): + pass + + def test_epoch_end(self, outputs) -> None: + test_loss = torch.stack([x["y"] for x in outputs]).mean() + self.log('test_loss', test_loss) + + model = ModelTrainTest() # logger file to get meta logger = tutils.get_default_logger(tmpdir) @@ -248,12 +272,12 @@ def test_running_test_no_val(tmpdir): trainer.test() # test we have good test accuracy - tutils.assert_ok_model_acc(trainer) + tutils.assert_ok_model_acc(trainer, key='test_loss') def test_simple_cpu(tmpdir): """Verify continue training session on CPU.""" - model = EvalModelTemplate() + model = BoringModel() # fit model trainer = Trainer( @@ -275,32 +299,12 @@ def test_cpu_model(tmpdir): progress_bar_refresh_rate=0, max_epochs=1, limit_train_batches=0.4, - limit_val_batches=0.4 - ) - - model = EvalModelTemplate() - - tpipes.run_model_test(trainer_options, model, on_gpu=False) - - -@pytest.mark.parametrize("enable_pl_optimizer", [False, True]) -def test_all_features_cpu_model(enable_pl_optimizer, tmpdir): - """Test each of the trainer options.""" - trainer_options = dict( - default_root_dir=tmpdir, - gradient_clip_val=1.0, - overfit_batches=0.20, - track_grad_norm=2, - progress_bar_refresh_rate=0, - accumulate_grad_batches=2, - max_epochs=1, - limit_train_batches=0.4, limit_val_batches=0.4, - enable_pl_optimizer=enable_pl_optimizer, ) - model = EvalModelTemplate() - tpipes.run_model_test(trainer_options, model, on_gpu=False) + model = BoringModel() + + tpipes.run_model_test(trainer_options, model, on_gpu=False, min_acc=0.01) def test_tbptt_cpu_model(tmpdir): @@ -319,10 +323,12 @@ def __getitem__(self, i): def __len__(self): return 1 - class BpttTestModel(EvalModelTemplate): - def __init__(self, *args, **kwargs): + class BpttTestModel(BoringModel): + def __init__(self, batch_size, in_features, out_features, *args, **kwargs): super().__init__(*args, **kwargs) self.test_hidden = None + self.batch_size = batch_size + self.layer = torch.nn.Linear(in_features, out_features) def training_step(self, batch, batch_idx, hiddens): assert hiddens == self.test_hidden, "Hidden state not persistent between tbptt steps" @@ -335,18 +341,17 @@ def training_step(self, batch, batch_idx, hiddens): assert y_tensor.shape[1] == truncated_bptt_steps, "tbptt split list failed" pred = self(x_tensor.view(batch_size, truncated_bptt_steps)) - loss_val = torch.nn.functional.mse_loss( - pred, y_tensor.view(batch_size, truncated_bptt_steps)) + loss_val = torch.nn.functional.mse_loss(pred, y_tensor.view(batch_size, truncated_bptt_steps)) return { - 'loss': loss_val, - 'hiddens': self.test_hidden, + "loss": loss_val, + "hiddens": self.test_hidden, } def training_epoch_end(self, training_step_outputs): training_step_outputs = training_step_outputs[0] assert len(training_step_outputs) == (sequence_size / truncated_bptt_steps) - loss = torch.stack([x['loss'] for x in training_step_outputs]).mean() - self.log('train_loss', loss) + loss = torch.stack([x["loss"] for x in training_step_outputs]).mean() + self.log("train_loss", loss) def train_dataloader(self): return torch.utils.data.DataLoader( @@ -356,15 +361,8 @@ def train_dataloader(self): sampler=None, ) - hparams = EvalModelTemplate.get_default_hparams() - hparams.update( - batch_size=batch_size, - in_features=truncated_bptt_steps, - hidden_dim=truncated_bptt_steps, - out_features=truncated_bptt_steps - ) - - model = BpttTestModel(**hparams) + model = BpttTestModel(batch_size=batch_size, + in_features=truncated_bptt_steps, out_features=truncated_bptt_steps) model.example_input_array = torch.randn(5, truncated_bptt_steps) # fit model diff --git a/tests/models/test_gpu.py b/tests/models/test_gpu.py index 169552ce1bd75..7cfeb8f0ae53e 100644 --- a/tests/models/test_gpu.py +++ b/tests/models/test_gpu.py @@ -21,11 +21,10 @@ import tests.base.develop_pipelines as tpipes import tests.base.develop_utils as tutils from pytorch_lightning import Trainer +from pytorch_lightning.accelerators.gpu_accelerator import GPUAccelerator from pytorch_lightning.utilities import device_parser from pytorch_lightning.utilities.exceptions import MisconfigurationException -from tests.base import EvalModelTemplate -from pytorch_lightning.accelerators.gpu_accelerator import GPUAccelerator - +from tests.base import BoringModel PRETEND_N_OF_GPUS = 16 @@ -43,8 +42,8 @@ def test_multi_gpu_none_backend(tmpdir): gpus=2, ) - model = EvalModelTemplate() - tpipes.run_model_test(trainer_options, model) + model = BoringModel() + tpipes.run_model_test(trainer_options, model, min_acc=0.20) @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") @@ -60,7 +59,7 @@ def test_single_gpu_model(tmpdir, gpus): gpus=gpus ) - model = EvalModelTemplate() + model = BoringModel() tpipes.run_model_test(trainer_options, model) diff --git a/tests/models/test_hparams.py b/tests/models/test_hparams.py index 5e5fab7d0a0b4..7081d450ee256 100644 --- a/tests/models/test_hparams.py +++ b/tests/models/test_hparams.py @@ -20,14 +20,14 @@ import pytest import torch from fsspec.implementations.local import LocalFileSystem -from omegaconf import OmegaConf, Container +from omegaconf import Container, OmegaConf from torch.nn import functional as F from torch.utils.data import DataLoader -from pytorch_lightning import Trainer, LightningModule -from pytorch_lightning.core.saving import save_hparams_to_yaml, load_hparams_from_yaml +from pytorch_lightning import LightningModule, Trainer +from pytorch_lightning.core.saving import load_hparams_from_yaml, save_hparams_to_yaml from pytorch_lightning.utilities import AttributeDict, is_picklable -from tests.base import EvalModelTemplate, TrialMNIST, BoringModel +from tests.base import BoringModel, EvalModelTemplate, TrialMNIST class SaveHparamsModel(BoringModel): @@ -595,13 +595,7 @@ def __init__(self, **kwargs): self.save_hyperparameters() -class RuntimeParamChangeModelAssign(BoringModel): - def __init__(self, **kwargs): - super().__init__() - self.hparams = kwargs - - -@pytest.mark.parametrize("cls", [RuntimeParamChangeModelSaving, RuntimeParamChangeModelAssign]) +@pytest.mark.parametrize("cls", [RuntimeParamChangeModelSaving]) def test_init_arg_with_runtime_change(tmpdir, cls): """Test that we save/export only the initial hparams, no other runtime change allowed""" model = cls(running_arg=123) diff --git a/tests/models/test_restore.py b/tests/models/test_restore.py index a2a9aa6b9042c..6ee5d362ffcaa 100644 --- a/tests/models/test_restore.py +++ b/tests/models/test_restore.py @@ -161,6 +161,7 @@ def test_callbacks_references_resume_from_checkpoint(enable_pl_optimizer, tmpdir @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") def test_running_test_pretrained_model_distrib_dp(tmpdir): """Verify `test()` on pretrained model.""" + tutils.set_random_master_port() model = EvalModelTemplate() @@ -205,7 +206,7 @@ def test_running_test_pretrained_model_distrib_dp(tmpdir): dataloaders = [dataloaders] for dataloader in dataloaders: - tpipes.run_prediction(dataloader, pretrained_model) + tpipes.run_prediction(pretrained_model, dataloader) @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") @@ -256,7 +257,7 @@ def test_running_test_pretrained_model_distrib_ddp_spawn(tmpdir): dataloaders = [dataloaders] for dataloader in dataloaders: - tpipes.run_prediction(dataloader, pretrained_model) + tpipes.run_prediction(pretrained_model, dataloader) def test_running_test_pretrained_model_cpu(tmpdir): @@ -398,7 +399,7 @@ def assert_good_acc(): dp_model.eval() dataloader = trainer.train_dataloader - tpipes.run_prediction(dataloader, dp_model, dp=True) + tpipes.run_prediction(dp_model, dataloader, dp=True) # new model model = EvalModelTemplate(**hparams) diff --git a/tests/trainer/test_dataloaders.py b/tests/trainer/test_dataloaders.py index 9b42aa98c9dd0..614b2a8e66ab8 100644 --- a/tests/trainer/test_dataloaders.py +++ b/tests/trainer/test_dataloaders.py @@ -128,7 +128,7 @@ def test_multiple_val_dataloader(tmpdir): # make sure predictions are good for each val set for dataloader in trainer.val_dataloaders: - tpipes.run_prediction(dataloader, trainer.model) + tpipes.run_prediction(trainer.model, dataloader) @pytest.mark.parametrize('ckpt_path', [None, 'best', 'specific']) @@ -164,7 +164,7 @@ def test_step(self, batch, batch_idx, *args, **kwargs): # make sure predictions are good for each test set for dataloader in trainer.test_dataloaders: - tpipes.run_prediction(dataloader, trainer.model) + tpipes.run_prediction(trainer.model, dataloader) # run the test method trainer.test(ckpt_path=ckpt_path)