File tree Expand file tree Collapse file tree 4 files changed +13
-19
lines changed Expand file tree Collapse file tree 4 files changed +13
-19
lines changed Original file line number Diff line number Diff line change @@ -28,18 +28,12 @@ jobs:
2828 cancelTimeoutInMinutes : " 2"
2929 pool : azure-gpus-spot
3030 container :
31- image : " pytorchlightning/pytorch_lightning:base-cuda-py3.7 -torch1.8 "
31+ image : " pytorchlightning/pytorch_lightning:base-cuda-py3.9 -torch1.11 "
3232 options : " --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all --shm-size=32g"
3333 workspace :
3434 clean : all
3535
3636 steps :
37- - bash : |
38- # TODO: Prepare a docker image with 1.8.2 (LTS) installed and remove manual installation.
39- pip install torch==1.8.2+cu102 torchvision==0.9.2+cu102 torchtext==0.9.2 -f https://download.pytorch.org/whl/lts/1.8/torch_lts.html
40- pip list
41- displayName: 'Install PyTorch LTS'
42-
4337 - bash : |
4438 python -m pytest tests/benchmarks -v --durations=0
4539 displayName: 'Testing: benchmarks'
Original file line number Diff line number Diff line change 1818
1919jobs :
2020 - job : pytest
21+ strategy :
22+ matrix :
23+ ' PyTorch - LTS ' :
24+ image : " pytorchlightning/pytorch_lightning:base-cuda-py3.7-torch1.8"
25+ ' PyTorch - stable ' :
26+ image : " pytorchlightning/pytorch_lightning:base-cuda-py3.9-torch1.11"
2127 # how long to run the job before automatically cancelling
2228 timeoutInMinutes : " 65"
2329 # how much time to give 'run always even if cancelled tasks' before stopping them
2430 cancelTimeoutInMinutes : " 2"
2531
2632 pool : azure-gpus-spot
2733
28- # ToDo: this need to have installed docker in the base image...
2934 container :
30- # base ML image: mcr.microsoft.com/azureml/openmpi3.1.2-cuda10.2-cudnn8-ubuntu18.04
31- # run on torch 1.8 as it's the LTS version
32- image : " pytorchlightning/pytorch_lightning:base-cuda-py3.7-torch1.8"
35+ image : $(image)
3336 # default shm size is 64m. Increase it to avoid:
3437 # 'Error while creating shared memory: unhandled system error, NCCL version 2.7.8'
3538 options : " --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all --shm-size=512m"
5255 - bash : |
5356 python -c "fname = 'requirements/strategies.txt' ; lines = [line for line in open(fname).readlines() if 'horovod' not in line] ; open(fname, 'w').writelines(lines)"
5457 CUDA_VERSION_MM=$(python -c "import torch ; print(''.join(map(str, torch.version.cuda.split('.')[:2])))")
55- # TODO: Prepare a docker image with 1.8.2 (LTS) installed and remove manual installation.
56- pip install torch==1.8.2+cu102 torchvision==0.9.2+cu102 torchtext==0.9.2 -f https://download.pytorch.org/whl/lts/1.8/torch_lts.html
5758 pip install "bagua-cuda$CUDA_VERSION_MM>=0.9.0"
5859 pip install . --requirement requirements/devel.txt
5960 pip install . --requirement requirements/strategies.txt
Original file line number Diff line number Diff line change 1616 pip install "check-jsonschema>=0.10"
1717
1818 - name : GH Workflows
19- run : |
20- check-jsonschema .github/workflows/*.yml --builtin-schema "github-workflows"
19+ run : check-jsonschema .github/workflows/*.yml --builtin-schema "github-workflows"
2120
2221 - name : Azure Pipelines
23- run : |
24- check-jsonschema .azure-pipelines/*.yml --schemafile "https://raw.githubusercontent.com/microsoft/azure-pipelines-vscode/v1.188.1/service-schema.json"
22+ env :
23+ SCHEMA_FILE : https://raw.githubusercontent.com/microsoft/azure-pipelines-vscode/v1.204.0/service-schema.json
24+ run : check-jsonschema .azure-pipelines/*.yml --schemafile "$SCHEMA_FILE"
Original file line number Diff line number Diff line change 1212# See the License for the specific language governing permissions and
1313# limitations under the License.
1414
15- ARG CUDA_VERSION=11.3.1
1615ARG UBUNTU_VERSION=20.04
16+ ARG CUDA_VERSION=11.3.1
1717
18- # TODO: Remove OS arg to always use ubuntu20.04 when dropping CUDA 10.2
1918FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
2019
2120ARG PYTHON_VERSION=3.9
You can’t perform that action at this time.
0 commit comments