File tree Expand file tree Collapse file tree 1 file changed +9
-2
lines changed
pytorch_lightning/utilities Expand file tree Collapse file tree 1 file changed +9
-2
lines changed Original file line number Diff line number Diff line change @@ -53,7 +53,8 @@ def is_oom_error(exception):
5353def is_cuda_out_of_memory (exception ):
5454 return isinstance (exception , RuntimeError ) \
5555 and len (exception .args ) == 1 \
56- and "CUDA out of memory." in exception .args [0 ]
56+ and "CUDA" in exception .args [0 ] \
57+ and "out of memory" in exception .args [0 ]
5758
5859
5960# based on https://github.com/BlackHC/toma/blob/master/toma/torch_cuda_memory.py
@@ -76,4 +77,10 @@ def garbage_collection_cuda():
7677 """Garbage collection Torch (CUDA) memory."""
7778 gc .collect ()
7879 if torch .cuda .is_available ():
79- torch .cuda .empty_cache ()
80+ try :
81+ # This is the last thing that should cause an OOM error, but seemingly it can.
82+ torch .cuda .empty_cache ()
83+ except RuntimeError as exception :
84+ if not is_oom_error (exception ):
85+ # Only handle OOM errors
86+ raise
You can’t perform that action at this time.
0 commit comments