From 2d3de7995dc511798d91fdedd8c90e3cb9c70b60 Mon Sep 17 00:00:00 2001 From: gianscarpe Date: Thu, 5 Nov 2020 10:55:29 +0100 Subject: [PATCH 1/3] Fixed PYTHONPATH for ddp test model --- tests/backends/ddp_model.py | 27 +++++++++++++++++++++------ tests/backends/test_ddp.py | 4 +++- tests/utilities/dist.py | 7 +++---- 3 files changed, 27 insertions(+), 11 deletions(-) diff --git a/tests/backends/ddp_model.py b/tests/backends/ddp_model.py index b625d8cc985fc..3a00130d599f7 100644 --- a/tests/backends/ddp_model.py +++ b/tests/backends/ddp_model.py @@ -14,20 +14,23 @@ """ Runs either `.fit()` or `.test()` on a single node across multiple gpus. """ -from argparse import ArgumentParser - +import os +import tests as pl_tests from pytorch_lightning import Trainer, seed_everything +from argparse import ArgumentParser from tests.base import EvalModelTemplate -import os + import torch def main(): seed_everything(1234) + parser = ArgumentParser(add_help=False) parser = Trainer.add_argparse_args(parser) parser.add_argument('--trainer_method', default='fit') parser.add_argument('--tmpdir') + parser.add_argument('--workdir') parser.set_defaults(gpus=2) parser.set_defaults(distributed_backend="ddp") args = parser.parse_args() @@ -38,14 +41,26 @@ def main(): result = {} if args.trainer_method == 'fit': trainer.fit(model) - result = {'status': 'complete', 'method': args.trainer_method, 'result': None} + result = { + 'status': 'complete', + 'method': args.trainer_method, + 'result': None + } if args.trainer_method == 'test': result = trainer.test(model) - result = {'status': 'complete', 'method': args.trainer_method, 'result': result} + result = { + 'status': 'complete', + 'method': args.trainer_method, + 'result': result + } if args.trainer_method == 'fit_test': trainer.fit(model) result = trainer.test(model) - result = {'status': 'complete', 'method': args.trainer_method, 'result': result} + result = { + 'status': 'complete', + 'method': args.trainer_method, + 'result': result + } if len(result) > 0: file_path = os.path.join(args.tmpdir, 'ddp.result') diff --git a/tests/backends/test_ddp.py b/tests/backends/test_ddp.py index a2517a2f976cb..6888daae0cd47 100644 --- a/tests/backends/test_ddp.py +++ b/tests/backends/test_ddp.py @@ -11,9 +11,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import os + import pytest import torch -import os + from tests.backends import ddp_model from tests.utilities.dist import call_training_script diff --git a/tests/utilities/dist.py b/tests/utilities/dist.py index b17805c77f18a..cd081b3ee0c87 100644 --- a/tests/utilities/dist.py +++ b/tests/utilities/dist.py @@ -13,9 +13,9 @@ # limitations under the License. import os import subprocess -from subprocess import TimeoutExpired import sys from pathlib import Path +from subprocess import TimeoutExpired import pytorch_lightning @@ -29,11 +29,10 @@ def call_training_script(module_file, cli_args, method, tmpdir, timeout=60): # need to set the PYTHONPATH in case pytorch_lightning was not installed into the environment env = os.environ.copy() - env['PYTHONPATH'] = f'{pytorch_lightning.__file__}:' + env.get('PYTHONPATH', '') + env['PYTHONPATH'] = env.get('PYTHONPATH', '') + f'{pytorch_lightning.__file__}:' # for running in ddp mode, we need to lauch it's own process or pytest will get stuck p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env) - try: std, err = p.communicate(timeout=timeout) err = str(err.decode("utf-8")) @@ -42,5 +41,5 @@ def call_training_script(module_file, cli_args, method, tmpdir, timeout=60): except TimeoutExpired: p.kill() std, err = p.communicate() - + print(err) return std, err From 096f457d2585681f9e5cfe44f3905e10c385580e Mon Sep 17 00:00:00 2001 From: gianscarpe Date: Thu, 5 Nov 2020 11:26:38 +0100 Subject: [PATCH 2/3] Removed debug calls --- tests/utilities/dist.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/utilities/dist.py b/tests/utilities/dist.py index cd081b3ee0c87..80c0246ce6c57 100644 --- a/tests/utilities/dist.py +++ b/tests/utilities/dist.py @@ -41,5 +41,4 @@ def call_training_script(module_file, cli_args, method, tmpdir, timeout=60): except TimeoutExpired: p.kill() std, err = p.communicate() - print(err) return std, err From 75504203ec2b7760e949847da409300c06ac8d9c Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Mon, 16 Nov 2020 17:34:13 +0100 Subject: [PATCH 3/3] Apply suggestions from code review --- tests/backends/ddp_model.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/backends/ddp_model.py b/tests/backends/ddp_model.py index 3a00130d599f7..32b30c05538be 100644 --- a/tests/backends/ddp_model.py +++ b/tests/backends/ddp_model.py @@ -15,9 +15,10 @@ Runs either `.fit()` or `.test()` on a single node across multiple gpus. """ import os +from argparse import ArgumentParser + import tests as pl_tests from pytorch_lightning import Trainer, seed_everything -from argparse import ArgumentParser from tests.base import EvalModelTemplate import torch