Skip to content

Commit 470842f

Browse files
Bordacarmocca
andauthored
CI: validate JSON & fix benchmark (#8567)
* CI: validate JSON * as GHA * PT1.8 * 32g Co-authored-by: Carlos Mocholí <[email protected]>
1 parent 0a71fe2 commit 470842f

File tree

5 files changed

+51
-67
lines changed

5 files changed

+51
-67
lines changed

.azure-pipelines/gpu_benchmark.yml renamed to .azure-pipelines/gpu-benchmark.yml

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,19 @@
1-
name: GPU Parity testing
2-
3-
on:
4-
schedule:
5-
- cron: "0 0 * * *" # At the end of every day
1+
schedules:
2+
- cron: "0 0 * * *" # At the end of every day
3+
displayName: Daily midnight benchmark
4+
branches:
5+
include:
6+
- "master"
67

78
jobs:
8-
parity-test:
9-
timeoutInMinutes: 120
10-
11-
cancelTimeoutInMinutes: 2
12-
9+
- job: benchmarks
10+
timeoutInMinutes: "90"
11+
cancelTimeoutInMinutes: "2"
1312
pool: gridai-spot-pool
14-
1513
container:
1614
# base ML image: mcr.microsoft.com/azureml/openmpi3.1.2-cuda10.2-cudnn8-ubuntu18.04
17-
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.8-torch1.6"
18-
15+
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.8-torch1.8"
16+
options: "--runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all --shm-size=32g"
1917
workspace:
2018
clean: all
2119

.azure-pipelines/gpu-tests.yml

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -9,19 +9,19 @@ trigger:
99
- '*'
1010
branches:
1111
include:
12-
- master
13-
- release/*
14-
- refs/tags/*
12+
- "master"
13+
- "release/*"
14+
- "refs/tags/*"
1515
pr:
16-
- master
17-
- release/*
16+
- "master"
17+
- "release/*"
1818

1919
jobs:
2020
- job: pytest
2121
# how long to run the job before automatically cancelling
22-
timeoutInMinutes: 45
22+
timeoutInMinutes: "45"
2323
# how much time to give 'run always even if cancelled tasks' before stopping them
24-
cancelTimeoutInMinutes: 2
24+
cancelTimeoutInMinutes: "2"
2525

2626
pool: gridai-spot-pool
2727

@@ -92,14 +92,15 @@ jobs:
9292
testRunTitle: '$(Agent.OS) - $(Build.DefinitionName) - Python $(python.version)'
9393
condition: succeededOrFailed()
9494

95-
- task: PublishCodeCoverageResults@1
96-
displayName: 'Publish coverage report'
97-
inputs:
98-
codeCoverageTool: 'cobertura'
99-
summaryFileLocation: 'coverage.xml'
100-
reportDirectory: '$(Build.SourcesDirectory)/htmlcov'
101-
testRunTitle: '$(Agent.OS) - $(Build.BuildNumber)[$(Agent.JobName)] - Python $(python.version)'
102-
condition: succeededOrFailed()
95+
# todo: re-enable after schema check pass, also atm it seems does not have any effect
96+
#- task: PublishCodeCoverageResults@2
97+
# displayName: 'Publish coverage report'
98+
# inputs:
99+
# codeCoverageTool: 'Cobertura'
100+
# summaryFileLocation: 'coverage.xml'
101+
# reportDirectory: '$(Build.SourcesDirectory)/htmlcov'
102+
# testRunTitle: '$(Agent.OS) - $(Build.BuildNumber)[$(Agent.JobName)] - Python $(python.version)'
103+
# condition: succeededOrFailed()
103104

104105
- script: |
105106
set -e

.github/workflows/ci_dockers.yml

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -123,21 +123,6 @@ jobs:
123123
push: false
124124
timeout-minutes: 50
125125

126-
build-nvidia:
127-
runs-on: ubuntu-20.04
128-
# todo: temporarily skip as the base container does not fit to agent
129-
if: false
130-
steps:
131-
- name: Checkout
132-
uses: actions/checkout@v2
133-
134-
- name: Build NVIDIA Docker
135-
uses: docker/build-push-action@v2
136-
with:
137-
file: dockers/nvidia/Dockerfile
138-
push: false
139-
timeout-minutes: 50
140-
141126
build-ipu:
142127
runs-on: ubuntu-20.04
143128
strategy:

.github/workflows/ci_schema.yml

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
name: CI action schema
2+
on: # Trigger the workflow on push or pull request, but only for the master branch
3+
push: {}
4+
pull_request:
5+
branches: [master, "release/*"]
6+
7+
jobs:
8+
validate-schema:
9+
runs-on: ubuntu-20.04
10+
steps:
11+
- name: Checkout
12+
uses: actions/checkout@v2
13+
14+
- name: Install pkg
15+
run: |
16+
pip install check-jsonschema
17+
18+
- name: GH Workflows
19+
run: |
20+
check-jsonschema .github/workflows/*.yml --schemafile "https://json.schemastore.org/github-workflow"
21+
22+
- name: Azure Pipelines
23+
run: |
24+
check-jsonschema .azure-pipelines/*.yml --schemafile "https://raw.githubusercontent.com/microsoft/azure-pipelines-vscode/v1.188.1/service-schema.json"

.github/workflows/events-nightly.yml

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -153,30 +153,6 @@ jobs:
153153
tags: pytorchlightning/pytorch_lightning:base-conda-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }}
154154
timeout-minutes: 55
155155

156-
docker-NVIDIA:
157-
runs-on: ubuntu-20.04
158-
# todo: temporarily skip as the base container does not fit to agent
159-
if: false
160-
steps:
161-
- name: Checkout
162-
uses: actions/checkout@v2
163-
164-
# https://github.com/docker/setup-buildx-action
165-
# Set up Docker Buildx - to use cache-from and cache-to argument of buildx command
166-
- uses: docker/setup-buildx-action@v1
167-
- name: Login to DockerHub
168-
uses: docker/login-action@v1
169-
with:
170-
username: ${{ secrets.DOCKER_USERNAME }}
171-
password: ${{ secrets.DOCKER_PASSWORD }}
172-
173-
- name: Publish NVIDIA to Docker Hub
174-
uses: docker/build-push-action@v2
175-
with:
176-
file: dockers/nvidia/Dockerfile
177-
tags: nvcr.io/pytorchlightning/pytorch_lightning:latest
178-
timeout-minutes: 55
179-
180156
docker-IPU:
181157
runs-on: ubuntu-20.04
182158
strategy:

0 commit comments

Comments
 (0)