Skip to content

Commit fcfa7fa

Browse files
authored
move TPU cleaning to GH actions (#5991)
* move TPU cleaning to GH actions * test * .
1 parent 27ab769 commit fcfa7fa

File tree

3 files changed

+42
-38
lines changed

3 files changed

+42
-38
lines changed

.circleci/config.yml

Lines changed: 0 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -85,19 +85,6 @@ references:
8585
sudo pip install pycobertura
8686
pycobertura show coverage.xml
8787
88-
delete_gke_jobs: &delete_gke_jobs
89-
run:
90-
name: Delete GKE Jobs
91-
command: |
92-
# Match jobs whose age matches patterns like '1h' or '1d', i.e. any job
93-
# that has been around longer than 1hr. First print all columns for
94-
# matches, then execute the delete.
95-
jobs_to_delete=$(kubectl get job | awk 'match($4,/[0-9]+[dh]/) {print $0}')
96-
echo $jobs_to_delete
97-
if [ ${#jobs_to_delete} -gt 1 ];
98-
then kubectl delete job $(kubectl get job | awk 'match($4,/[0-9]+[dh]/) {print $1}');
99-
fi
100-
10188
jobs:
10289

10390
TPU-tests:
@@ -138,33 +125,9 @@ jobs:
138125
path: docs/build/html/
139126
destination: html
140127

141-
cleanup-gke-jobs:
142-
docker:
143-
- image: circleci/python:3.7
144-
steps:
145-
- gcp-gke/install
146-
- gcp-gke/update-kubeconfig-with-credentials:
147-
cluster: $GKE_CLUSTER
148-
perform-login: true
149-
- *delete_gke_jobs
150-
151-
152128
workflows:
153129
version: 2
154130
tpu-tests:
155131
jobs:
156132
- build-Docs
157133
- TPU-tests
158-
tpu-cleanup:
159-
triggers:
160-
- schedule:
161-
# The cron format is:
162-
# min (0-59) hour (0-23) monthday (1-31) month (1-12) weekday (0-6, 0=Sun)
163-
# Set to run at the first minute of every hour.
164-
cron: "0 * * * *"
165-
filters:
166-
branches:
167-
only:
168-
- master
169-
jobs:
170-
- cleanup-gke-jobs

.github/workflows/ci_test-tpu.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ on:
99
# - master
1010

1111
env:
12-
PROJECT_ID: ${{ secrets.GKE_PROJECT }}
1312
GKE_CLUSTER: lightning-cluster
1413
GKE_ZONE: us-central1-a
1514
IMAGE: gcr.io/${{ secrets.GKE_PROJECT }}/tpu-testing-image
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
name: Recurent events
2+
3+
# https://jasonet.co/posts/scheduled-actions/
4+
# https://github.202132.xyzmunity/t/distinct-job-for-each-schedule/17811/2
5+
on:
6+
push:
7+
branches: [ master ]
8+
schedule:
9+
- cron: "*/20 * * * *" # At every 20 minutes
10+
11+
env:
12+
GKE_CLUSTER: lightning-cluster
13+
GKE_ZONE: us-central1-a
14+
15+
jobs:
16+
tpu-cleanup:
17+
name: TPU cleaning
18+
runs-on: ubuntu-20.04
19+
20+
steps:
21+
- name: Setup gcloud CLI
22+
uses: GoogleCloudPlatform/github-actions/setup-gcloud@master
23+
with:
24+
version: '290.0.1'
25+
service_account_key: ${{ secrets.GKE_SA_KEY_BASE64 }}
26+
project_id: ${{ secrets.GKE_PROJECT }}
27+
export_default_credentials: true
28+
# Get the GKE credentials so we can deploy to the cluster; Use either zone or region depending on cluster setup.
29+
- run: |-
30+
gcloud container clusters get-credentials "$GKE_CLUSTER" --zone "$GKE_ZONE"
31+
shell: bash
32+
33+
- name: Clean all mong hanging jobs
34+
run: |
35+
# Match jobs whose age matches patterns like '1h' or '1d', i.e. any job
36+
# that has been around longer than 1hr. First print all columns for
37+
# matches, then execute the delete.
38+
jobs_to_delete=$(kubectl get job | awk 'match($4,/[0-9]+[dh]/) {print $0}')
39+
echo $jobs_to_delete
40+
if [ ${#jobs_to_delete} -gt 1 ];
41+
then kubectl delete job $(kubectl get job | awk 'match($4,/[0-9]+[dh]/) {print $1}');
42+
fi

0 commit comments

Comments
 (0)