Skip to content

Commit e41999e

Browse files
akihironittaAkihiro Nitta
authored andcommitted
Update deepspeed and fairscale versions (#12860)
* Fix deepspeed installation * Adapt to deepspeed>=0.5.9 * Fix fairscale installation Co-authored-by: Akihiro Nitta <[email protected]>
1 parent eea6b44 commit e41999e

File tree

3 files changed

+12
-8
lines changed

3 files changed

+12
-8
lines changed

.azure-pipelines/gpu-tests.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,8 @@ jobs:
5252
5353
- bash: |
5454
python -c "fname = 'requirements/extra.txt' ; lines = [line for line in open(fname).readlines() if 'horovod' not in line] ; open(fname, 'w').writelines(lines)"
55-
pip install fairscale>=0.4.5
56-
pip install deepspeed>=0.6.0
55+
pip install "fairscale>=0.4.5"
56+
pip install "deepspeed<0.6.0" # https://github.com/microsoft/DeepSpeed/issues/1878
5757
CUDA_VERSION_MM=$(python -c "import torch ; print(''.join(map(str, torch.version.cuda.split('.')[:2])))")
5858
pip install "bagua-cuda$CUDA_VERSION_MM>=0.9.0"
5959
pip install . --requirement requirements/devel.txt

pytorch_lightning/utilities/imports.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,8 @@ def _compare_version(package: str, op: Callable, version: str, use_base_version:
9898
_APEX_AVAILABLE = _module_available("apex.amp")
9999
_BAGUA_AVAILABLE = _package_available("bagua")
100100
_DEEPSPEED_AVAILABLE = _package_available("deepspeed")
101+
_DEEPSPEED_GREATER_EQUAL_0_5_9 = _DEEPSPEED_AVAILABLE and _compare_version("deepspeed", operator.ge, "0.5.9")
102+
_DEEPSPEED_GREATER_EQUAL_0_6 = _DEEPSPEED_AVAILABLE and _compare_version("deepspeed", operator.ge, "0.6.0")
101103
_FAIRSCALE_AVAILABLE = not _IS_WINDOWS and _module_available("fairscale.nn")
102104
_FAIRSCALE_OSS_FP16_BROADCAST_AVAILABLE = _FAIRSCALE_AVAILABLE and _compare_version("fairscale", operator.ge, "0.3.3")
103105
_FAIRSCALE_FULLY_SHARDED_AVAILABLE = _FAIRSCALE_AVAILABLE and _compare_version("fairscale", operator.ge, "0.3.4")

tests/strategies/test_deepspeed_strategy.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
from pytorch_lightning.strategies import DeepSpeedStrategy
2020
from pytorch_lightning.strategies.deepspeed import LightningDeepSpeedModule
2121
from pytorch_lightning.utilities.exceptions import MisconfigurationException
22-
from pytorch_lightning.utilities.imports import _DEEPSPEED_AVAILABLE
22+
from pytorch_lightning.utilities.imports import _DEEPSPEED_AVAILABLE, _DEEPSPEED_GREATER_EQUAL_0_5_9
2323
from pytorch_lightning.utilities.meta import init_meta_context
2424
from tests.helpers.boring_model import BoringModel, RandomDataset, RandomIterableDataset
2525
from tests.helpers.datamodules import ClassifDataModule
@@ -29,6 +29,11 @@
2929
import deepspeed
3030
from deepspeed.utils.zero_to_fp32 import convert_zero_checkpoint_to_fp32_state_dict
3131

32+
if _DEEPSPEED_GREATER_EQUAL_0_5_9:
33+
from deepspeed.runtime.zero.stage_1_and_2 import DeepSpeedZeroOptimizer
34+
else:
35+
from deepspeed.runtime.zero.stage2 import FP16_DeepSpeedZeroOptimizer as DeepSpeedZeroOptimizer
36+
3237

3338
class ModelParallelBoringModel(BoringModel):
3439
def __init__(self):
@@ -280,9 +285,7 @@ def test_deepspeed_run_configure_optimizers(tmpdir):
280285

281286
class TestCB(Callback):
282287
def on_train_start(self, trainer, pl_module) -> None:
283-
from deepspeed.runtime.zero.stage2 import FP16_DeepSpeedZeroOptimizer
284-
285-
assert isinstance(trainer.optimizers[0], FP16_DeepSpeedZeroOptimizer)
288+
assert isinstance(trainer.optimizers[0], DeepSpeedZeroOptimizer)
286289
assert isinstance(trainer.optimizers[0].optimizer, torch.optim.SGD)
287290
assert isinstance(trainer.lr_scheduler_configs[0].scheduler, torch.optim.lr_scheduler.StepLR)
288291
# check that the lr_scheduler config was preserved
@@ -319,9 +322,8 @@ def test_deepspeed_config(tmpdir, deepspeed_zero_config):
319322
class TestCB(Callback):
320323
def on_train_start(self, trainer, pl_module) -> None:
321324
from deepspeed.runtime.lr_schedules import WarmupLR
322-
from deepspeed.runtime.zero.stage2 import FP16_DeepSpeedZeroOptimizer
323325

324-
assert isinstance(trainer.optimizers[0], FP16_DeepSpeedZeroOptimizer)
326+
assert isinstance(trainer.optimizers[0], DeepSpeedZeroOptimizer)
325327
assert isinstance(trainer.optimizers[0].optimizer, torch.optim.SGD)
326328
assert isinstance(trainer.lr_scheduler_configs[0].scheduler, WarmupLR)
327329
assert trainer.lr_scheduler_configs[0].interval == "step"

0 commit comments

Comments
 (0)