From 8f9996a64ef4fe180404922ef6d7c46a2c237b82 Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Fri, 22 Apr 2022 13:16:26 +0900 Subject: [PATCH 01/28] Use new rank_zero_debug --- pytorch_lightning/utilities/distributed.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pytorch_lightning/utilities/distributed.py b/pytorch_lightning/utilities/distributed.py index 86449db1190a9..39d7654ffb291 100644 --- a/pytorch_lightning/utilities/distributed.py +++ b/pytorch_lightning/utilities/distributed.py @@ -290,14 +290,16 @@ def register_ddp_comm_hook( if ddp_comm_wrapper is not None: if not _TORCH_GREATER_EQUAL_1_9: - rank_zero_warn("Not applying DDP comm wrapper. To use communication wrapper, please use pytorch>=1.9.0.") + new_rank_zero_warn( + "Not applying DDP comm wrapper. To use communication wrapper, please use pytorch>=1.9.0." + ) else: new_rank_zero_info( f"DDP comm wrapper is provided, apply {ddp_comm_wrapper.__qualname__}({ddp_comm_hook.__qualname__})." ) ddp_comm_hook = ddp_comm_wrapper(ddp_comm_hook) - rank_zero_debug(f"Registering DDP comm hook: {ddp_comm_hook.__qualname__}.") + new_rank_zero_debug(f"Registering DDP comm hook: {ddp_comm_hook.__qualname__}.") model.register_comm_hook(state=ddp_comm_state, hook=ddp_comm_hook) From d9a42146d0272d2d9fa815f714034aa9999405a8 Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Fri, 22 Apr 2022 13:35:49 +0900 Subject: [PATCH 02/28] Fix and move import statement to the top --- pytorch_lightning/utilities/distributed.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pytorch_lightning/utilities/distributed.py b/pytorch_lightning/utilities/distributed.py index 39d7654ffb291..cd839f7fd5465 100644 --- a/pytorch_lightning/utilities/distributed.py +++ b/pytorch_lightning/utilities/distributed.py @@ -25,6 +25,7 @@ from pytorch_lightning.utilities.rank_zero import rank_zero_only # noqa: F401 from pytorch_lightning.utilities.rank_zero import rank_zero_deprecation from pytorch_lightning.utilities.rank_zero import rank_zero_info as new_rank_zero_info +from pytorch_lightning.utilities.rank_zero import rank_zero_warn as new_rank_zero_warn if _TPU_AVAILABLE: import torch_xla.core.xla_model as xm @@ -281,8 +282,6 @@ def register_ddp_comm_hook( ... ddp_comm_wrapper=default.fp16_compress_wrapper, ... ) """ - from pytorch_lightning.utilities import rank_zero_warn - if ddp_comm_hook is None: return # inform mypy that ddp_comm_hook is callable From 2bec3394e1c6d6419acd5f4e73395925e77b3229 Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Sat, 23 Apr 2022 08:54:26 +0900 Subject: [PATCH 03/28] Fix deepspeed installation --- .azure-pipelines/gpu-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.azure-pipelines/gpu-tests.yml b/.azure-pipelines/gpu-tests.yml index 0a2465b85c484..5bb1fb432e9f4 100644 --- a/.azure-pipelines/gpu-tests.yml +++ b/.azure-pipelines/gpu-tests.yml @@ -53,7 +53,7 @@ jobs: - bash: | python -c "fname = 'requirements/extra.txt' ; lines = [line for line in open(fname).readlines() if 'horovod' not in line] ; open(fname, 'w').writelines(lines)" pip install fairscale>=0.4.5 - pip install deepspeed>=0.6.0 + pip install "deepspeed>=0.6.0" CUDA_VERSION_MM=$(python -c "import torch ; print(''.join(map(str, torch.version.cuda.split('.')[:2])))") pip install "bagua-cuda$CUDA_VERSION_MM>=0.9.0" pip install . --requirement requirements/devel.txt From aef821fa857b0aec371edfac600d445839fc9272 Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Fri, 22 Apr 2022 20:35:54 -0400 Subject: [PATCH 04/28] Adapt to deepspeed>=0.5.9 --- pytorch_lightning/utilities/imports.py | 1 + tests/strategies/test_deepspeed_strategy.py | 13 +++++++------ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/pytorch_lightning/utilities/imports.py b/pytorch_lightning/utilities/imports.py index 232c51ad636ee..569e3530201d2 100644 --- a/pytorch_lightning/utilities/imports.py +++ b/pytorch_lightning/utilities/imports.py @@ -98,6 +98,7 @@ def _compare_version(package: str, op: Callable, version: str, use_base_version: _APEX_AVAILABLE = _module_available("apex.amp") _BAGUA_AVAILABLE = _package_available("bagua") _DEEPSPEED_AVAILABLE = _package_available("deepspeed") +_DEEPSPEED_GREATER_EQUAL_0_5_9 = _DEEPSPEED_AVAILABLE and _compare_version("deepspeed", operator.ge, "0.5.9") _DEEPSPEED_GREATER_EQUAL_0_6 = _DEEPSPEED_AVAILABLE and _compare_version("deepspeed", operator.ge, "0.6.0") _FAIRSCALE_AVAILABLE = not _IS_WINDOWS and _module_available("fairscale.nn") _FAIRSCALE_OSS_FP16_BROADCAST_AVAILABLE = _FAIRSCALE_AVAILABLE and _compare_version("fairscale", operator.ge, "0.3.3") diff --git a/tests/strategies/test_deepspeed_strategy.py b/tests/strategies/test_deepspeed_strategy.py index e2be98b970967..a84fa33bd7a28 100644 --- a/tests/strategies/test_deepspeed_strategy.py +++ b/tests/strategies/test_deepspeed_strategy.py @@ -32,7 +32,7 @@ from pytorch_lightning.strategies import DeepSpeedStrategy from pytorch_lightning.strategies.deepspeed import LightningDeepSpeedModule from pytorch_lightning.utilities.exceptions import MisconfigurationException -from pytorch_lightning.utilities.imports import _DEEPSPEED_AVAILABLE, _DEEPSPEED_GREATER_EQUAL_0_6 +from pytorch_lightning.utilities.imports import _DEEPSPEED_AVAILABLE, _DEEPSPEED_GREATER_EQUAL_0_5_9, _DEEPSPEED_GREATER_EQUAL_0_6 from pytorch_lightning.utilities.meta import init_meta_context from tests.helpers.boring_model import BoringModel, RandomDataset, RandomIterableDataset from tests.helpers.datamodules import ClassifDataModule @@ -42,6 +42,10 @@ import deepspeed from deepspeed.utils.zero_to_fp32 import convert_zero_checkpoint_to_fp32_state_dict + if _DEEPSPEED_GREATER_EQUAL_0_5_9: + from deepspeed.runtime.zero.stage_1_and_2 import DeepSpeedZeroOptimizer + else: + from deepspeed.runtime.zero.stage2 import FP16_DeepSpeedZeroOptimizer as DeepSpeedZeroOptimizer class ModelParallelBoringModel(BoringModel): def __init__(self): @@ -296,9 +300,7 @@ def test_deepspeed_run_configure_optimizers(tmpdir): class TestCB(Callback): def on_train_start(self, trainer, pl_module) -> None: - from deepspeed.runtime.zero.stage2 import FP16_DeepSpeedZeroOptimizer - - assert isinstance(trainer.optimizers[0], FP16_DeepSpeedZeroOptimizer) + assert isinstance(trainer.optimizers[0], DeepSpeedZeroOptimizer) assert isinstance(trainer.optimizers[0].optimizer, torch.optim.SGD) assert isinstance(trainer.lr_scheduler_configs[0].scheduler, torch.optim.lr_scheduler.StepLR) # check that the lr_scheduler config was preserved @@ -337,9 +339,8 @@ def test_deepspeed_config(tmpdir, deepspeed_zero_config): class TestCB(Callback): def on_train_start(self, trainer, pl_module) -> None: from deepspeed.runtime.lr_schedules import WarmupLR - from deepspeed.runtime.zero.stage2 import FP16_DeepSpeedZeroOptimizer - assert isinstance(trainer.optimizers[0], FP16_DeepSpeedZeroOptimizer) + assert isinstance(trainer.optimizers[0], DeepSpeedZeroOptimizer) assert isinstance(trainer.optimizers[0].optimizer, torch.optim.SGD) assert isinstance(trainer.lr_scheduler_configs[0].scheduler, WarmupLR) assert trainer.lr_scheduler_configs[0].interval == "step" From b1624100b4d45e796544b0d99d2ecae6ec329f9f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 23 Apr 2022 00:37:21 +0000 Subject: [PATCH 05/28] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/strategies/test_deepspeed_strategy.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/strategies/test_deepspeed_strategy.py b/tests/strategies/test_deepspeed_strategy.py index a84fa33bd7a28..319289d200f4f 100644 --- a/tests/strategies/test_deepspeed_strategy.py +++ b/tests/strategies/test_deepspeed_strategy.py @@ -32,7 +32,11 @@ from pytorch_lightning.strategies import DeepSpeedStrategy from pytorch_lightning.strategies.deepspeed import LightningDeepSpeedModule from pytorch_lightning.utilities.exceptions import MisconfigurationException -from pytorch_lightning.utilities.imports import _DEEPSPEED_AVAILABLE, _DEEPSPEED_GREATER_EQUAL_0_5_9, _DEEPSPEED_GREATER_EQUAL_0_6 +from pytorch_lightning.utilities.imports import ( + _DEEPSPEED_AVAILABLE, + _DEEPSPEED_GREATER_EQUAL_0_5_9, + _DEEPSPEED_GREATER_EQUAL_0_6, +) from pytorch_lightning.utilities.meta import init_meta_context from tests.helpers.boring_model import BoringModel, RandomDataset, RandomIterableDataset from tests.helpers.datamodules import ClassifDataModule @@ -47,6 +51,7 @@ else: from deepspeed.runtime.zero.stage2 import FP16_DeepSpeedZeroOptimizer as DeepSpeedZeroOptimizer + class ModelParallelBoringModel(BoringModel): def __init__(self): super().__init__() From ef5b5d7979dba25039ab9dd173997329c507a496 Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Sat, 23 Apr 2022 09:53:18 +0900 Subject: [PATCH 06/28] Fix fairscale installation --- .azure-pipelines/gpu-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.azure-pipelines/gpu-tests.yml b/.azure-pipelines/gpu-tests.yml index 0a2465b85c484..7c617d3ffbe02 100644 --- a/.azure-pipelines/gpu-tests.yml +++ b/.azure-pipelines/gpu-tests.yml @@ -52,7 +52,7 @@ jobs: - bash: | python -c "fname = 'requirements/extra.txt' ; lines = [line for line in open(fname).readlines() if 'horovod' not in line] ; open(fname, 'w').writelines(lines)" - pip install fairscale>=0.4.5 + pip install "fairscale>=0.4.5" pip install deepspeed>=0.6.0 CUDA_VERSION_MM=$(python -c "import torch ; print(''.join(map(str, torch.version.cuda.split('.')[:2])))") pip install "bagua-cuda$CUDA_VERSION_MM>=0.9.0" From cb5a0ce321519136917330e021e8b079e570d647 Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Sat, 23 Apr 2022 21:21:25 +0900 Subject: [PATCH 07/28] Don't collect test_warnings.py --- setup.cfg | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.cfg b/setup.cfg index 95612febfae77..729fa31f51db2 100644 --- a/setup.cfg +++ b/setup.cfg @@ -24,6 +24,7 @@ addopts = --doctest-modules --color=yes --disable-pytest-warnings + --ignore=tests/utilities/test_warnings.py filterwarnings = # error out on our deprecation warnings - ensures the code and tests are kept up-to-date error::pytorch_lightning.utilities.rank_zero.LightningDeprecationWarning From 238659d7d11b79b858a67a63c94cbc75c33a3861 Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Sat, 23 Apr 2022 21:40:01 +0900 Subject: [PATCH 08/28] Temporarily skip standard testing --- .azure-pipelines/gpu-tests.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.azure-pipelines/gpu-tests.yml b/.azure-pipelines/gpu-tests.yml index d8ada86bed767..8f669c78bb559 100644 --- a/.azure-pipelines/gpu-tests.yml +++ b/.azure-pipelines/gpu-tests.yml @@ -74,9 +74,9 @@ jobs: ls -l legacy/checkpoints/ displayName: 'Get legacy checkpoints' - - bash: | - python -m coverage run --source pytorch_lightning -m pytest pytorch_lightning tests --ignore tests/benchmarks -v --junitxml=$(Build.StagingDirectory)/test-results.xml --durations=50 - displayName: 'Testing: standard' + # - bash: | + # python -m coverage run --source pytorch_lightning -m pytest pytorch_lightning tests --ignore tests/benchmarks -v --junitxml=$(Build.StagingDirectory)/test-results.xml --durations=50 + # displayName: 'Testing: standard' - bash: | bash tests/standalone_tests.sh From 79dd1d1e3271d1cbbfc729368cd7c1c99b58a3fe Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Sat, 23 Apr 2022 21:54:00 +0900 Subject: [PATCH 09/28] Add testpaths for pytest --- setup.cfg | 2 ++ 1 file changed, 2 insertions(+) diff --git a/setup.cfg b/setup.cfg index 729fa31f51db2..87705f3e81b3e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -13,6 +13,8 @@ # limitations under the License. [tool:pytest] +testpaths = + tests norecursedirs = .git .github From f7f8e200b4ca8bdfd255e420da06a54afba959c4 Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Sat, 23 Apr 2022 22:01:32 +0900 Subject: [PATCH 10/28] Specify dir to run pytest on --- tests/standalone_tests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/standalone_tests.sh b/tests/standalone_tests.sh index 7b7dd361ab0b1..a3946779303ad 100755 --- a/tests/standalone_tests.sh +++ b/tests/standalone_tests.sh @@ -17,7 +17,7 @@ set -e # this environment variable allows special tests to run export PL_RUN_STANDALONE_TESTS=1 # python arguments -defaults='-m coverage run --source pytorch_lightning --append -m pytest --capture=no' +defaults='-m coverage run --source pytorch_lightning --append -m pytest --capture=no tests' # find tests marked as `@RunIf(standalone=True)`. done manually instead of with pytest because it is faster grep_output=$(grep --recursive --word-regexp 'tests' --regexp 'standalone=True' --include '*.py' --exclude 'tests/conftest.py') From b0f103a4e82b32bb559ea0a19e6a3291f2fe5b29 Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Sat, 23 Apr 2022 22:10:37 +0900 Subject: [PATCH 11/28] Show files in standalone tests --- tests/standalone_tests.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/standalone_tests.sh b/tests/standalone_tests.sh index a3946779303ad..bbe4318faa2a2 100755 --- a/tests/standalone_tests.sh +++ b/tests/standalone_tests.sh @@ -25,6 +25,9 @@ grep_output=$(grep --recursive --word-regexp 'tests' --regexp 'standalone=True' # file paths, remove duplicates files=$(echo "$grep_output" | cut -f1 -d: | sort | uniq) +echo "asdfasdfasdf" +echo $files + # get the list of parametrizations. we need to call them separately. the last two lines are removed. # note: if there's a syntax error, this will fail with some garbled output if [[ "$OSTYPE" == "darwin"* ]]; then From 06768933f34ed2f30a2d7e84e08ce966df973baf Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Sat, 23 Apr 2022 22:21:34 +0900 Subject: [PATCH 12/28] asdf --- tests/standalone_tests.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/standalone_tests.sh b/tests/standalone_tests.sh index bbe4318faa2a2..0a837341bfe9e 100755 --- a/tests/standalone_tests.sh +++ b/tests/standalone_tests.sh @@ -33,16 +33,21 @@ echo $files if [[ "$OSTYPE" == "darwin"* ]]; then parametrizations=$(pytest $files --collect-only --quiet "$@" | tail -r | sed -e '1,3d' | tail -r) else + echo "asdfasdfasdf" parametrizations=$(pytest $files --collect-only --quiet "$@" | head -n -2) fi parametrizations_arr=($parametrizations) +echo "assdafsdaf" + # tests to skip - space separated blocklist='tests/profiler/test_profiler.py::test_pytorch_profiler_nested_emit_nvtx' report='' for i in "${!parametrizations_arr[@]}"; do parametrization=${parametrizations_arr[$i]} + echo "assdafsdaf" $i $parametrization + # check blocklist if echo $blocklist | grep -F "${parametrization}"; then From 162810e4813660e2f40563170722ea18101166ed Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Sat, 23 Apr 2022 22:32:52 +0900 Subject: [PATCH 13/28] asdf --- tests/standalone_tests.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/standalone_tests.sh b/tests/standalone_tests.sh index 0a837341bfe9e..3c8e066cb3380 100755 --- a/tests/standalone_tests.sh +++ b/tests/standalone_tests.sh @@ -39,6 +39,7 @@ fi parametrizations_arr=($parametrizations) echo "assdafsdaf" +echo $parametrizations # tests to skip - space separated blocklist='tests/profiler/test_profiler.py::test_pytorch_profiler_nested_emit_nvtx' From d7e273dad97d2062e04b50108681477dd6b1033a Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Sat, 23 Apr 2022 23:21:34 +0900 Subject: [PATCH 14/28] Don't fail fast --- tests/standalone_tests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/standalone_tests.sh b/tests/standalone_tests.sh index 3c8e066cb3380..429ce32cbdc81 100755 --- a/tests/standalone_tests.sh +++ b/tests/standalone_tests.sh @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -set -e +# set -e # FIXME # this environment variable allows special tests to run export PL_RUN_STANDALONE_TESTS=1 From 51b3062194ec84dc3c5daff2c47e9fff8bc8693d Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Sun, 24 Apr 2022 00:00:55 +0900 Subject: [PATCH 15/28] Show exact command running --- tests/standalone_tests.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/standalone_tests.sh b/tests/standalone_tests.sh index 429ce32cbdc81..c4db81ca9b8bb 100755 --- a/tests/standalone_tests.sh +++ b/tests/standalone_tests.sh @@ -58,6 +58,7 @@ for i in "${!parametrizations_arr[@]}"; do # run the test echo "Running ${parametrization}" + echo Running python ${defaults} "${parametrization}" python ${defaults} "${parametrization}" report+="Ran\t$parametrization\n" From 3e1c0ea76ffebb576fb4c4a46924ffed31e87bce Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Sun, 24 Apr 2022 00:11:09 +0900 Subject: [PATCH 16/28] Revert "Specify dir to run pytest on" This reverts commit f7f8e200b4ca8bdfd255e420da06a54afba959c4. --- tests/standalone_tests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/standalone_tests.sh b/tests/standalone_tests.sh index c4db81ca9b8bb..b793ba8cb568c 100755 --- a/tests/standalone_tests.sh +++ b/tests/standalone_tests.sh @@ -17,7 +17,7 @@ # this environment variable allows special tests to run export PL_RUN_STANDALONE_TESTS=1 # python arguments -defaults='-m coverage run --source pytorch_lightning --append -m pytest --capture=no tests' +defaults='-m coverage run --source pytorch_lightning --append -m pytest --capture=no' # find tests marked as `@RunIf(standalone=True)`. done manually instead of with pytest because it is faster grep_output=$(grep --recursive --word-regexp 'tests' --regexp 'standalone=True' --include '*.py' --exclude 'tests/conftest.py') From b423b50c7c73c7a0519beac7531a9117e8236381 Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Sun, 24 Apr 2022 00:11:14 +0900 Subject: [PATCH 17/28] Revert "Add testpaths for pytest" This reverts commit 79dd1d1e3271d1cbbfc729368cd7c1c99b58a3fe. --- setup.cfg | 2 -- 1 file changed, 2 deletions(-) diff --git a/setup.cfg b/setup.cfg index 87705f3e81b3e..729fa31f51db2 100644 --- a/setup.cfg +++ b/setup.cfg @@ -13,8 +13,6 @@ # limitations under the License. [tool:pytest] -testpaths = - tests norecursedirs = .git .github From 0a2e22848e51217c07f1317a7f9e14fdcefe5d5a Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Sun, 24 Apr 2022 00:26:56 +0900 Subject: [PATCH 18/28] Revert "asdf" This reverts commit 162810e4813660e2f40563170722ea18101166ed. --- tests/standalone_tests.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/standalone_tests.sh b/tests/standalone_tests.sh index b793ba8cb568c..a1ce73c76a4ea 100755 --- a/tests/standalone_tests.sh +++ b/tests/standalone_tests.sh @@ -39,7 +39,6 @@ fi parametrizations_arr=($parametrizations) echo "assdafsdaf" -echo $parametrizations # tests to skip - space separated blocklist='tests/profiler/test_profiler.py::test_pytorch_profiler_nested_emit_nvtx' From 7fe5faca00f5051bf2491040a9ccca393a927ca7 Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Sun, 24 Apr 2022 00:27:06 +0900 Subject: [PATCH 19/28] Revert "asdf" This reverts commit 06768933f34ed2f30a2d7e84e08ce966df973baf. --- tests/standalone_tests.sh | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tests/standalone_tests.sh b/tests/standalone_tests.sh index a1ce73c76a4ea..e05fb3c81460d 100755 --- a/tests/standalone_tests.sh +++ b/tests/standalone_tests.sh @@ -33,21 +33,16 @@ echo $files if [[ "$OSTYPE" == "darwin"* ]]; then parametrizations=$(pytest $files --collect-only --quiet "$@" | tail -r | sed -e '1,3d' | tail -r) else - echo "asdfasdfasdf" parametrizations=$(pytest $files --collect-only --quiet "$@" | head -n -2) fi parametrizations_arr=($parametrizations) -echo "assdafsdaf" - # tests to skip - space separated blocklist='tests/profiler/test_profiler.py::test_pytorch_profiler_nested_emit_nvtx' report='' for i in "${!parametrizations_arr[@]}"; do parametrization=${parametrizations_arr[$i]} - echo "assdafsdaf" $i $parametrization - # check blocklist if echo $blocklist | grep -F "${parametrization}"; then From 0cc407d4a91a0f09f0b3cf2871c797130442c5cb Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Sun, 24 Apr 2022 00:27:14 +0900 Subject: [PATCH 20/28] Revert "Don't fail fast" This reverts commit d7e273dad97d2062e04b50108681477dd6b1033a. --- tests/standalone_tests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/standalone_tests.sh b/tests/standalone_tests.sh index e05fb3c81460d..5873c9e3e396a 100755 --- a/tests/standalone_tests.sh +++ b/tests/standalone_tests.sh @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# set -e # FIXME +set -e # this environment variable allows special tests to run export PL_RUN_STANDALONE_TESTS=1 From 1a333097b8e048a823ca355362e7b06db1165912 Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Sun, 24 Apr 2022 00:27:49 +0900 Subject: [PATCH 21/28] Revert "Show exact command running" This reverts commit 51b3062194ec84dc3c5daff2c47e9fff8bc8693d. --- tests/standalone_tests.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/standalone_tests.sh b/tests/standalone_tests.sh index 5873c9e3e396a..5824c1c693b74 100755 --- a/tests/standalone_tests.sh +++ b/tests/standalone_tests.sh @@ -52,7 +52,6 @@ for i in "${!parametrizations_arr[@]}"; do # run the test echo "Running ${parametrization}" - echo Running python ${defaults} "${parametrization}" python ${defaults} "${parametrization}" report+="Ran\t$parametrization\n" From a3614fbe63ac915dc5cff4a0a0111e4e7fb27799 Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Sun, 24 Apr 2022 00:28:35 +0900 Subject: [PATCH 22/28] Revert "Show files in standalone tests" This reverts commit b0f103a4e82b32bb559ea0a19e6a3291f2fe5b29. --- tests/standalone_tests.sh | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/standalone_tests.sh b/tests/standalone_tests.sh index 5824c1c693b74..7b7dd361ab0b1 100755 --- a/tests/standalone_tests.sh +++ b/tests/standalone_tests.sh @@ -25,9 +25,6 @@ grep_output=$(grep --recursive --word-regexp 'tests' --regexp 'standalone=True' # file paths, remove duplicates files=$(echo "$grep_output" | cut -f1 -d: | sort | uniq) -echo "asdfasdfasdf" -echo $files - # get the list of parametrizations. we need to call them separately. the last two lines are removed. # note: if there's a syntax error, this will fail with some garbled output if [[ "$OSTYPE" == "darwin"* ]]; then From 67ea34576b0d911d8dc36d1029e1dd25eb33ef05 Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Sun, 24 Apr 2022 00:32:10 +0900 Subject: [PATCH 23/28] Skip deepspeed==0.6.{0,1} --- .azure-pipelines/gpu-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.azure-pipelines/gpu-tests.yml b/.azure-pipelines/gpu-tests.yml index 8f669c78bb559..9edcc98106419 100644 --- a/.azure-pipelines/gpu-tests.yml +++ b/.azure-pipelines/gpu-tests.yml @@ -53,7 +53,7 @@ jobs: - bash: | python -c "fname = 'requirements/extra.txt' ; lines = [line for line in open(fname).readlines() if 'horovod' not in line] ; open(fname, 'w').writelines(lines)" pip install "fairscale>=0.4.5" - pip install "deepspeed>=0.6.0" + pip install "deepspeed<0.6.0,>=0.6.2" # https://github.com/microsoft/DeepSpeed/issues/1878 CUDA_VERSION_MM=$(python -c "import torch ; print(''.join(map(str, torch.version.cuda.split('.')[:2])))") pip install "bagua-cuda$CUDA_VERSION_MM>=0.9.0" pip install . --requirement requirements/devel.txt From e15b21d4debf5692e31e7dab199875d3c053b28d Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Sun, 24 Apr 2022 01:05:29 +0900 Subject: [PATCH 24/28] Revert "Temporarily skip standard testing" This reverts commit 238659d7d11b79b858a67a63c94cbc75c33a3861. --- .azure-pipelines/gpu-tests.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.azure-pipelines/gpu-tests.yml b/.azure-pipelines/gpu-tests.yml index 9edcc98106419..e7a6a59206c75 100644 --- a/.azure-pipelines/gpu-tests.yml +++ b/.azure-pipelines/gpu-tests.yml @@ -74,9 +74,9 @@ jobs: ls -l legacy/checkpoints/ displayName: 'Get legacy checkpoints' - # - bash: | - # python -m coverage run --source pytorch_lightning -m pytest pytorch_lightning tests --ignore tests/benchmarks -v --junitxml=$(Build.StagingDirectory)/test-results.xml --durations=50 - # displayName: 'Testing: standard' + - bash: | + python -m coverage run --source pytorch_lightning -m pytest pytorch_lightning tests --ignore tests/benchmarks -v --junitxml=$(Build.StagingDirectory)/test-results.xml --durations=50 + displayName: 'Testing: standard' - bash: | bash tests/standalone_tests.sh From cf722c3b8748b7f5ecfffee8124dae94028b2586 Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Sun, 24 Apr 2022 01:07:42 +0900 Subject: [PATCH 25/28] Revert "Fix and move import statement to the top" This reverts commit d9a42146d0272d2d9fa815f714034aa9999405a8. --- pytorch_lightning/utilities/distributed.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pytorch_lightning/utilities/distributed.py b/pytorch_lightning/utilities/distributed.py index cd839f7fd5465..39d7654ffb291 100644 --- a/pytorch_lightning/utilities/distributed.py +++ b/pytorch_lightning/utilities/distributed.py @@ -25,7 +25,6 @@ from pytorch_lightning.utilities.rank_zero import rank_zero_only # noqa: F401 from pytorch_lightning.utilities.rank_zero import rank_zero_deprecation from pytorch_lightning.utilities.rank_zero import rank_zero_info as new_rank_zero_info -from pytorch_lightning.utilities.rank_zero import rank_zero_warn as new_rank_zero_warn if _TPU_AVAILABLE: import torch_xla.core.xla_model as xm @@ -282,6 +281,8 @@ def register_ddp_comm_hook( ... ddp_comm_wrapper=default.fp16_compress_wrapper, ... ) """ + from pytorch_lightning.utilities import rank_zero_warn + if ddp_comm_hook is None: return # inform mypy that ddp_comm_hook is callable From fb9d7c2f3b302187188cea2de68247b764bf5b47 Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Sun, 24 Apr 2022 01:07:58 +0900 Subject: [PATCH 26/28] Revert "Use new rank_zero_debug" This reverts commit 8f9996a64ef4fe180404922ef6d7c46a2c237b82. --- pytorch_lightning/utilities/distributed.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pytorch_lightning/utilities/distributed.py b/pytorch_lightning/utilities/distributed.py index 39d7654ffb291..86449db1190a9 100644 --- a/pytorch_lightning/utilities/distributed.py +++ b/pytorch_lightning/utilities/distributed.py @@ -290,16 +290,14 @@ def register_ddp_comm_hook( if ddp_comm_wrapper is not None: if not _TORCH_GREATER_EQUAL_1_9: - new_rank_zero_warn( - "Not applying DDP comm wrapper. To use communication wrapper, please use pytorch>=1.9.0." - ) + rank_zero_warn("Not applying DDP comm wrapper. To use communication wrapper, please use pytorch>=1.9.0.") else: new_rank_zero_info( f"DDP comm wrapper is provided, apply {ddp_comm_wrapper.__qualname__}({ddp_comm_hook.__qualname__})." ) ddp_comm_hook = ddp_comm_wrapper(ddp_comm_hook) - new_rank_zero_debug(f"Registering DDP comm hook: {ddp_comm_hook.__qualname__}.") + rank_zero_debug(f"Registering DDP comm hook: {ddp_comm_hook.__qualname__}.") model.register_comm_hook(state=ddp_comm_state, hook=ddp_comm_hook) From 88776161dca7216e75436ccc0a6aaaea8d3c20b1 Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Sun, 24 Apr 2022 01:35:20 +0900 Subject: [PATCH 27/28] Pin deepspeed<0.6.0 --- .azure-pipelines/gpu-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.azure-pipelines/gpu-tests.yml b/.azure-pipelines/gpu-tests.yml index e7a6a59206c75..68206ad8e980c 100644 --- a/.azure-pipelines/gpu-tests.yml +++ b/.azure-pipelines/gpu-tests.yml @@ -53,7 +53,7 @@ jobs: - bash: | python -c "fname = 'requirements/extra.txt' ; lines = [line for line in open(fname).readlines() if 'horovod' not in line] ; open(fname, 'w').writelines(lines)" pip install "fairscale>=0.4.5" - pip install "deepspeed<0.6.0,>=0.6.2" # https://github.com/microsoft/DeepSpeed/issues/1878 + pip install "deepspeed<0.6.0" # https://github.com/microsoft/DeepSpeed/issues/1878 CUDA_VERSION_MM=$(python -c "import torch ; print(''.join(map(str, torch.version.cuda.split('.')[:2])))") pip install "bagua-cuda$CUDA_VERSION_MM>=0.9.0" pip install . --requirement requirements/devel.txt From 3b36af0dea2cb6a23619a0cb2deb7c4c9e626856 Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Tue, 26 Apr 2022 01:04:14 +0900 Subject: [PATCH 28/28] Revert "Don't collect test_warnings.py" This reverts commit cb5a0ce321519136917330e021e8b079e570d647. --- setup.cfg | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 729fa31f51db2..95612febfae77 100644 --- a/setup.cfg +++ b/setup.cfg @@ -24,7 +24,6 @@ addopts = --doctest-modules --color=yes --disable-pytest-warnings - --ignore=tests/utilities/test_warnings.py filterwarnings = # error out on our deprecation warnings - ensures the code and tests are kept up-to-date error::pytorch_lightning.utilities.rank_zero.LightningDeprecationWarning