Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
b716212
Add trainer argument for detect_anomaly.
yopknopixx Aug 28, 2021
60d0535
Deprecate `terminate_on_nan` trainer argument.
yopknopixx Aug 28, 2021
577c301
Minor Changes for deprecation warnings.
yopknopixx Aug 29, 2021
2f3632b
Fix PEP8 errors
yopknopixx Aug 29, 2021
48fea2c
Merge branch 'master' into feature/8313_detect_anomaly
yopknopixx Aug 29, 2021
0eb3a28
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Aug 29, 2021
b426095
Fix error with test_detect_anomaly_nan.
yopknopixx Aug 29, 2021
62a8962
Merge branch 'feature/8313_detect_anomaly' of https://github.com/yopk…
yopknopixx Aug 29, 2021
85bf40e
Update pytorch_lightning/trainer/trainer.py
yopknopixx Aug 29, 2021
a62a07a
Recommended Changes
yopknopixx Sep 2, 2021
a6315b3
Merge branch 'master' into feature/8313_detect_anomaly
yopknopixx Sep 2, 2021
ae7daac
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Sep 2, 2021
0f1a7e6
Merge branch 'master' of https://github.com/PyTorchLightning/pytorch-…
yopknopixx Sep 8, 2021
085c0a2
Recommended changes
yopknopixx Sep 8, 2021
5ba2c7d
Update pytorch_lightning/loops/utilities.py
yopknopixx Sep 9, 2021
a34f8d9
Merge branch 'master' into feature/8313_detect_anomaly
awaelchli Oct 11, 2021
9772e7d
update tests
awaelchli Oct 11, 2021
132446b
reset
awaelchli Oct 11, 2021
5a31539
reset _notebooks
awaelchli Oct 11, 2021
9fa71cd
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 11, 2021
650dc30
undo empty line
awaelchli Oct 11, 2021
2ce4c79
Merge remote-tracking branch 'yopknopixx/feature/8313_detect_anomaly'…
awaelchli Oct 11, 2021
75c76f5
extend test
awaelchli Oct 11, 2021
944694c
fix merge error
awaelchli Oct 11, 2021
8e8a9e9
swap order to support positional args
awaelchli Oct 11, 2021
9de66ea
add additional asserts for trainer property
awaelchli Oct 11, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).

### Deprecated

- Deprecated trainer argument `terminate_on_nan` in favour of `detect_anomaly`([#9175](https://github.com/PyTorchLightning/pytorch-lightning/pull/9175))


- Deprecated `LightningModule.summarize()` in favor of `pytorch_lightning.utilities.model_summary.summarize()`


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Union
from typing import Optional, Union

from pytorch_lightning.utilities import GradClipAlgorithmType
from pytorch_lightning.utilities import GradClipAlgorithmType, rank_zero_deprecation
from pytorch_lightning.utilities.exceptions import MisconfigurationException


Expand All @@ -26,10 +26,15 @@ def on_trainer_init(
gradient_clip_val: Union[int, float],
gradient_clip_algorithm: str,
track_grad_norm: Union[int, float, str],
terminate_on_nan: bool,
terminate_on_nan: Optional[bool],
):
if not isinstance(terminate_on_nan, bool):
raise TypeError(f"`terminate_on_nan` should be a bool, got {terminate_on_nan}.")
if terminate_on_nan is not None:
rank_zero_deprecation(
"Trainer argument `terminate_on_nan` was deprecated in v1.5 and will be removed in 1.7."
" Please use `Trainer(detect_anomaly=True)` instead."
)
if not isinstance(terminate_on_nan, bool):
raise TypeError(f"`terminate_on_nan` should be a bool, got {terminate_on_nan}.")

# gradient clipping
if not isinstance(gradient_clip_val, (int, float)):
Expand Down
10 changes: 8 additions & 2 deletions pytorch_lightning/trainer/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ def __init__(
reload_dataloaders_every_epoch: bool = False,
auto_lr_find: Union[bool, str] = False,
replace_sampler_ddp: bool = True,
terminate_on_nan: bool = False,
detect_anomaly: bool = False,
auto_scale_batch_size: Union[str, bool] = False,
prepare_data_per_node: Optional[bool] = None,
plugins: Optional[Union[PLUGIN_INPUT, List[PLUGIN_INPUT]]] = None,
Expand All @@ -177,7 +177,7 @@ def __init__(
move_metrics_to_cpu: bool = False,
multiple_trainloader_mode: str = "max_size_cycle",
stochastic_weight_avg: bool = False,
detect_anomaly: bool = False,
terminate_on_nan: Optional[bool] = None,
):
r"""
Customize every aspect of training via flags.
Expand Down Expand Up @@ -351,6 +351,12 @@ def __init__(
terminate_on_nan: If set to True, will terminate training (by raising a `ValueError`) at the
end of each training batch, if any of the parameters or the loss are NaN or +/-inf.

.. deprecated:: v1.5
Trainer argument ``terminate_on_nan`` was deprecated in v1.5 and will be removed in 1.7.
Please use ``detect_anomaly`` instead.

detect_anomaly: Enable anomaly detection for the autograd engine.

tpu_cores: How many TPU cores to train on (1 or 8) / Single TPU to train on [1]

ipus: How many IPUs to train on.
Expand Down
10 changes: 10 additions & 0 deletions tests/deprecated_api/test_remove_1-7.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,16 @@ def test_v1_7_0_stochastic_weight_avg_trainer_constructor(tmpdir):
_ = Trainer(stochastic_weight_avg=True)


@pytest.mark.parametrize("terminate_on_nan", [True, False])
def test_v1_7_0_trainer_terminate_on_nan(tmpdir, terminate_on_nan):
with pytest.deprecated_call(
match="Trainer argument `terminate_on_nan` was deprecated in v1.5 and will be removed in 1.7"
):
trainer = Trainer(terminate_on_nan=terminate_on_nan)
assert trainer.terminate_on_nan is terminate_on_nan
assert trainer._detect_anomaly is False


def test_v1_7_0_deprecated_on_task_dataloader(tmpdir):
class CustomBoringModel(BoringModel):
def on_train_dataloader(self):
Expand Down