-
Notifications
You must be signed in to change notification settings - Fork 3.6k
Closed
Description
🐛 Bug
To Reproduce
import os
import torch
from torch.utils.data import DataLoader, Dataset
from pytorch_lightning import LightningModule, Trainer, seed_everything
class RandomDataset(Dataset):
def __init__(self, size, length):
self.len = length
self.data = torch.randn(length, size)
def __getitem__(self, index):
return self.data[index]
def __len__(self):
return self.len
class BoringModel(LightningModule):
def __init__(self):
super().__init__()
self.layer = torch.nn.Linear(32, 2)
self.learning_rate=1
def forward(self, x):
return self.layer(x)
def training_step(self, batch, batch_idx):
loss = self(batch).sum()
self.log("train_loss", loss)
return {"loss": loss}
def configure_optimizers(self):
return torch.optim.SGD(self.layer.parameters(), lr=self.learning_rate)
def run():
train_data = DataLoader(RandomDataset(32, 64), batch_size=2)
model = BoringModel()
trainer = Trainer(
default_root_dir=os.getcwd(),
enable_model_summary=False,
auto_lr_find=True,
)
trainer.tune(model, train_dataloaders=train_data)
if __name__ == "__main__":
seed_everything(1)
run()The output is:
Finding best initial lr: 2%|▏ | 2/100 [00:00<00:00, 393.63it/s]
`Trainer.fit` stopped: `max_steps=2` reached.
LR finder stopped early after 2 steps due to diverging loss.
Failed to compute suggesting for `lr`. There might not be enough points.
Traceback (most recent call last):
File "/Users/adrian/repositories/lightning/src/pytorch_lightning/tuner/lr_finder.py", line 188, in suggestion
min_grad = np.gradient(loss).argmin()
File "<__array_function__ internals>", line 180, in gradient
File "/Users/adrian/miniconda3/envs/lightning/lib/python3.10/site-packages/numpy/lib/function_base.py", line 1219, in gradient
raise ValueError(
ValueError: Shape of array too small to calculate a numerical gradient, at least (edge_order + 1) elements are required.
Learning rate set to None
Restoring states from the checkpoint path at /Users/adrian/repositories/lightning/examples/pl_bug_report/.lr_find_4abd548b-20eb-4784-9141-be67af274177.ckpt
Expected behavior
The error message should contain the relevant information instead of a mix of different errors, and propose an action for the user if possible. Instead, we get
- A value error from numpy
- A message "LR finder stopped early after 2 steps due to diverging loss."
- Failed to compute suggesting for
lr. There might not be enough points. - The learning rate on the model gets set to None. This is undesired.
Environment
Latest everything.