Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
8f9996a
Use new rank_zero_debug
akihironitta Apr 22, 2022
d9a4214
Fix and move import statement to the top
akihironitta Apr 22, 2022
2bec339
Fix deepspeed installation
akihironitta Apr 22, 2022
aef821f
Adapt to deepspeed>=0.5.9
Apr 23, 2022
b162410
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 23, 2022
ef5b5d7
Fix fairscale installation
akihironitta Apr 23, 2022
5344948
Merge branch 'bugfix/use-new-rank-zero' into bugfix/fairscale-version
akihironitta Apr 23, 2022
0a83cee
Merge branch 'bugfix/use-new-rank-zero' into bugfix/deepspeed-0.6.0
akihironitta Apr 23, 2022
4cf2585
Merge branch 'bugfix/fairscale-version' into bugfix/deepspeed-0.6.0
akihironitta Apr 23, 2022
cb5a0ce
Don't collect test_warnings.py
akihironitta Apr 23, 2022
238659d
Temporarily skip standard testing
akihironitta Apr 23, 2022
79dd1d1
Add testpaths for pytest
akihironitta Apr 23, 2022
f7f8e20
Specify dir to run pytest on
akihironitta Apr 23, 2022
b0f103a
Show files in standalone tests
akihironitta Apr 23, 2022
0676893
asdf
akihironitta Apr 23, 2022
162810e
asdf
akihironitta Apr 23, 2022
d7e273d
Don't fail fast
akihironitta Apr 23, 2022
51b3062
Show exact command running
akihironitta Apr 23, 2022
3e1c0ea
Revert "Specify dir to run pytest on"
akihironitta Apr 23, 2022
b423b50
Revert "Add testpaths for pytest"
akihironitta Apr 23, 2022
0a2e228
Revert "asdf"
akihironitta Apr 23, 2022
7fe5fac
Revert "asdf"
akihironitta Apr 23, 2022
0cc407d
Revert "Don't fail fast"
akihironitta Apr 23, 2022
1a33309
Revert "Show exact command running"
akihironitta Apr 23, 2022
a3614fb
Revert "Show files in standalone tests"
akihironitta Apr 23, 2022
67ea345
Skip deepspeed==0.6.{0,1}
akihironitta Apr 23, 2022
e15b21d
Revert "Temporarily skip standard testing"
akihironitta Apr 23, 2022
cf722c3
Revert "Fix and move import statement to the top"
akihironitta Apr 23, 2022
fb9d7c2
Revert "Use new rank_zero_debug"
akihironitta Apr 23, 2022
8877616
Pin deepspeed<0.6.0
akihironitta Apr 23, 2022
3b36af0
Revert "Don't collect test_warnings.py"
akihironitta Apr 25, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .azure-pipelines/gpu-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ jobs:

- bash: |
python -c "fname = 'requirements/extra.txt' ; lines = [line for line in open(fname).readlines() if 'horovod' not in line] ; open(fname, 'w').writelines(lines)"
pip install fairscale>=0.4.5
pip install deepspeed>=0.6.0
pip install "fairscale>=0.4.5"
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For people dealing with CI/CD:

The command here was not appropriate! It just redirects stdout from pip install fairscale into a file named =0.4.5 and leads to installing the wrong versions, so make sure to put "" when specifying versions :)

- pip install fairscale>=0.4.5
+ pip install "fairscale>=0.4.5"

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yep, I know, it just creates a file 0.4.5 :D

pip install "deepspeed<0.6.0" # https://github.com/microsoft/DeepSpeed/issues/1878
CUDA_VERSION_MM=$(python -c "import torch ; print(''.join(map(str, torch.version.cuda.split('.')[:2])))")
pip install "bagua-cuda$CUDA_VERSION_MM>=0.9.0"
pip install . --requirement requirements/devel.txt
Expand Down
1 change: 1 addition & 0 deletions pytorch_lightning/utilities/imports.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ def _compare_version(package: str, op: Callable, version: str, use_base_version:
_APEX_AVAILABLE = _module_available("apex.amp")
_BAGUA_AVAILABLE = _package_available("bagua")
_DEEPSPEED_AVAILABLE = _package_available("deepspeed")
_DEEPSPEED_GREATER_EQUAL_0_5_9 = _DEEPSPEED_AVAILABLE and _compare_version("deepspeed", operator.ge, "0.5.9")
_DEEPSPEED_GREATER_EQUAL_0_6 = _DEEPSPEED_AVAILABLE and _compare_version("deepspeed", operator.ge, "0.6.0")
_FAIRSCALE_AVAILABLE = not _IS_WINDOWS and _module_available("fairscale.nn")
_FAIRSCALE_OSS_FP16_BROADCAST_AVAILABLE = _FAIRSCALE_AVAILABLE and _compare_version("fairscale", operator.ge, "0.3.3")
Expand Down
18 changes: 12 additions & 6 deletions tests/strategies/test_deepspeed_strategy.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,11 @@
from pytorch_lightning.strategies import DeepSpeedStrategy
from pytorch_lightning.strategies.deepspeed import LightningDeepSpeedModule
from pytorch_lightning.utilities.exceptions import MisconfigurationException
from pytorch_lightning.utilities.imports import _DEEPSPEED_AVAILABLE, _DEEPSPEED_GREATER_EQUAL_0_6
from pytorch_lightning.utilities.imports import (
_DEEPSPEED_AVAILABLE,
_DEEPSPEED_GREATER_EQUAL_0_5_9,
_DEEPSPEED_GREATER_EQUAL_0_6,
)
from pytorch_lightning.utilities.meta import init_meta_context
from tests.helpers.boring_model import BoringModel, RandomDataset, RandomIterableDataset
from tests.helpers.datamodules import ClassifDataModule
Expand All @@ -42,6 +46,11 @@
import deepspeed
from deepspeed.utils.zero_to_fp32 import convert_zero_checkpoint_to_fp32_state_dict

if _DEEPSPEED_GREATER_EQUAL_0_5_9:
from deepspeed.runtime.zero.stage_1_and_2 import DeepSpeedZeroOptimizer
else:
from deepspeed.runtime.zero.stage2 import FP16_DeepSpeedZeroOptimizer as DeepSpeedZeroOptimizer


class ModelParallelBoringModel(BoringModel):
def __init__(self):
Expand Down Expand Up @@ -296,9 +305,7 @@ def test_deepspeed_run_configure_optimizers(tmpdir):

class TestCB(Callback):
def on_train_start(self, trainer, pl_module) -> None:
from deepspeed.runtime.zero.stage2 import FP16_DeepSpeedZeroOptimizer

assert isinstance(trainer.optimizers[0], FP16_DeepSpeedZeroOptimizer)
assert isinstance(trainer.optimizers[0], DeepSpeedZeroOptimizer)
assert isinstance(trainer.optimizers[0].optimizer, torch.optim.SGD)
assert isinstance(trainer.lr_scheduler_configs[0].scheduler, torch.optim.lr_scheduler.StepLR)
# check that the lr_scheduler config was preserved
Expand Down Expand Up @@ -337,9 +344,8 @@ def test_deepspeed_config(tmpdir, deepspeed_zero_config):
class TestCB(Callback):
def on_train_start(self, trainer, pl_module) -> None:
from deepspeed.runtime.lr_schedules import WarmupLR
from deepspeed.runtime.zero.stage2 import FP16_DeepSpeedZeroOptimizer

assert isinstance(trainer.optimizers[0], FP16_DeepSpeedZeroOptimizer)
assert isinstance(trainer.optimizers[0], DeepSpeedZeroOptimizer)
assert isinstance(trainer.optimizers[0].optimizer, torch.optim.SGD)
assert isinstance(trainer.lr_scheduler_configs[0].scheduler, WarmupLR)
assert trainer.lr_scheduler_configs[0].interval == "step"
Expand Down