From f8be17b55a0d72b027058466573a2d6954b70e6d Mon Sep 17 00:00:00 2001 From: Liu Date: Tue, 28 Jun 2022 11:21:37 -0700 Subject: [PATCH] add a check to determine if horovod.torch import succeeds --- smdebug/core/utils.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/smdebug/core/utils.py b/smdebug/core/utils.py index 0557072b4..99f229391 100644 --- a/smdebug/core/utils.py +++ b/smdebug/core/utils.py @@ -64,14 +64,20 @@ class FRAMEWORK(Enum): except (ImportError, ModuleNotFoundError): _torch_dist_imported = None - +logger = get_logger() try: import horovod.torch as hvd # This redundant import is necessary because horovod does not raise an ImportError if the library is not present import torch # noqa + #make sure the library is correctly imported + hvd.init() _hvd_imported = hvd +except AttributeError: + _hvd_imported = None + logger.error("horovod.torch is not correctly imported.") + raise except (ModuleNotFoundError, ImportError): try: import horovod.tensorflow as hvd @@ -79,9 +85,8 @@ class FRAMEWORK(Enum): _hvd_imported = hvd except (ModuleNotFoundError, ImportError): _hvd_imported = None + raise - -logger = get_logger() error_handling_agent = ( ErrorHandlingAgent.get_error_handling_agent() ) # set up error handler to wrap smdebug functions