Skip to content

Commit 460147d

Browse files
Bordaakihironittacarmocca
committed
CI: Azure - multiple configs (#12984)
* CI: Azure - multiple configs * names * benchmark * Apply suggestions from code review Co-authored-by: Akihiro Nitta <[email protected]> Co-authored-by: Carlos Mocholí <[email protected]>
1 parent 27db899 commit 460147d

File tree

4 files changed

+13
-19
lines changed

4 files changed

+13
-19
lines changed

.azure-pipelines/gpu-benchmark.yml

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -28,18 +28,12 @@ jobs:
2828
cancelTimeoutInMinutes: "2"
2929
pool: azure-gpus-spot
3030
container:
31-
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.7-torch1.8"
31+
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.9-torch1.11"
3232
options: "--runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all --shm-size=32g"
3333
workspace:
3434
clean: all
3535

3636
steps:
37-
- bash: |
38-
# TODO: Prepare a docker image with 1.8.2 (LTS) installed and remove manual installation.
39-
pip install torch==1.8.2+cu102 torchvision==0.9.2+cu102 torchtext==0.9.2 -f https://download.pytorch.org/whl/lts/1.8/torch_lts.html
40-
pip list
41-
displayName: 'Install PyTorch LTS'
42-
4337
- bash: |
4438
python -m pytest tests/benchmarks -v --durations=0
4539
displayName: 'Testing: benchmarks'

.azure-pipelines/gpu-tests.yml

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,18 +18,21 @@ pr:
1818

1919
jobs:
2020
- job: pytest
21+
strategy:
22+
matrix:
23+
'PyTorch - LTS':
24+
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.7-torch1.8"
25+
'PyTorch - stable':
26+
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.9-torch1.11"
2127
# how long to run the job before automatically cancelling
2228
timeoutInMinutes: "65"
2329
# how much time to give 'run always even if cancelled tasks' before stopping them
2430
cancelTimeoutInMinutes: "2"
2531

2632
pool: azure-gpus-spot
2733

28-
# ToDo: this need to have installed docker in the base image...
2934
container:
30-
# base ML image: mcr.microsoft.com/azureml/openmpi3.1.2-cuda10.2-cudnn8-ubuntu18.04
31-
# run on torch 1.8 as it's the LTS version
32-
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.7-torch1.8"
35+
image: $(image)
3336
# default shm size is 64m. Increase it to avoid:
3437
# 'Error while creating shared memory: unhandled system error, NCCL version 2.7.8'
3538
options: "--runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all --shm-size=512m"
@@ -52,8 +55,6 @@ jobs:
5255
- bash: |
5356
python -c "fname = 'requirements/strategies.txt' ; lines = [line for line in open(fname).readlines() if 'horovod' not in line] ; open(fname, 'w').writelines(lines)"
5457
CUDA_VERSION_MM=$(python -c "import torch ; print(''.join(map(str, torch.version.cuda.split('.')[:2])))")
55-
# TODO: Prepare a docker image with 1.8.2 (LTS) installed and remove manual installation.
56-
pip install torch==1.8.2+cu102 torchvision==0.9.2+cu102 torchtext==0.9.2 -f https://download.pytorch.org/whl/lts/1.8/torch_lts.html
5758
pip install "bagua-cuda$CUDA_VERSION_MM>=0.9.0"
5859
pip install . --requirement requirements/devel.txt
5960
pip install . --requirement requirements/strategies.txt

.github/workflows/ci_schema.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,9 @@ jobs:
1616
pip install "check-jsonschema>=0.10"
1717
1818
- name: GH Workflows
19-
run: |
20-
check-jsonschema .github/workflows/*.yml --builtin-schema "github-workflows"
19+
run: check-jsonschema .github/workflows/*.yml --builtin-schema "github-workflows"
2120

2221
- name: Azure Pipelines
23-
run: |
24-
check-jsonschema .azure-pipelines/*.yml --schemafile "https://raw.githubusercontent.com/microsoft/azure-pipelines-vscode/v1.188.1/service-schema.json"
22+
env:
23+
SCHEMA_FILE: https://raw.githubusercontent.com/microsoft/azure-pipelines-vscode/v1.204.0/service-schema.json
24+
run: check-jsonschema .azure-pipelines/*.yml --schemafile "$SCHEMA_FILE"

dockers/base-cuda/Dockerfile

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,9 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
ARG CUDA_VERSION=11.3.1
1615
ARG UBUNTU_VERSION=20.04
16+
ARG CUDA_VERSION=11.3.1
1717

18-
# TODO: Remove OS arg to always use ubuntu20.04 when dropping CUDA 10.2
1918
FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
2019

2120
ARG PYTHON_VERSION=3.9

0 commit comments

Comments
 (0)