Skip to content

Commit 580a5bd

Browse files
authored
Use kubectl to get logs from TPU CI instead of gcloud logging. (#2918)
* Use kubectl to get logs from TPU CI instead of gcloud logging. * Update Github Action to read logs from kubectl rather than gcloud logging.
1 parent 69d241c commit 580a5bd

File tree

2 files changed

+6
-10
lines changed

2 files changed

+6
-10
lines changed

.circleci/config.yml

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -63,10 +63,9 @@ references:
6363
printf "Waiting for job to finish: " && \
6464
while [ $i -lt $MAX_CHECKS ]; do ((i++)); if kubectl get jobs $job_name -o jsonpath='Failed:{.status.failed}' | grep "Failed:1"; then status_code=1 && break; elif kubectl get jobs $job_name -o jsonpath='Succeeded:{.status.succeeded}' | grep "Succeeded:1" ; then status_code=0 && break; else printf "."; fi; sleep $CHECK_SPEEP; done && \
6565
echo "Done waiting. Job status code: $status_code" && \
66-
# Allow time for logs to flush.
67-
sleep 30 && \
68-
echo "JOB_NAME: $job_name" && \
69-
gcloud logging read "resource.type=k8s_container resource.labels.project_id=$GOOGLE_PROJECT_ID resource.labels.location=$GOOGLE_COMPUTE_ZONE resource.labels.cluster_name=$GKE_CLUSTER resource.labels.namespace_name=default resource.labels.pod_name:$job_name" --limit 10000000 --order asc --format 'value(textPayload)' --project=$GOOGLE_PROJECT_ID > /tmp/full_output.txt && \
66+
pod_name=$(kubectl get po -l controller-uid=`kubectl get job $job_name -o "jsonpath={.metadata.labels.controller-uid}"` | awk 'match($0,!/NAME/) {print $1}') && \
67+
echo "GKE pod name: $pod_name" && \
68+
kubectl logs -f $pod_name --container=train > /tmp/full_output.txt
7069
if grep -q '<?xml version="1.0" ?>' /tmp/full_output.txt ; then csplit /tmp/full_output.txt '/<?xml version="1.0" ?>/'; else mv /tmp/full_output.txt xx00; fi && \
7170
# First portion is the test logs. Print these to Github Action stdout.
7271
cat xx00 && \

.github/workflows/tpu-testing.yml

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -93,12 +93,9 @@ jobs:
9393
printf "Waiting for job to finish: " && \
9494
while [ $i -lt $MAX_CHECKS ]; do ((i++)); if kubectl get jobs $job_name -o jsonpath='Failed:{.status.failed}' | grep "Failed:1"; then status_code=1 && break; elif kubectl get jobs $job_name -o jsonpath='Succeeded:{.status.succeeded}' | grep "Succeeded:1" ; then status_code=0 && break; else printf "." ; fi; sleep $CHECK_SPEEP; done && \
9595
echo "Done waiting. Job status code: $status_code" && \
96-
# Allow time for logs to flush.
97-
sleep 30 && \
98-
echo "JOB_NAME: $job_name" && \
99-
echo "GKE_CLUSTER: $GKE_CLUSTER" && \
100-
echo "GKE_ZONE: $GKE_ZONE" && \
101-
gcloud logging read "resource.type=k8s_container resource.labels.project_id=$PROJECT_ID resource.labels.location=$GKE_ZONE resource.labels.cluster_name=$GKE_CLUSTER resource.labels.namespace_name=default resource.labels.pod_name:$job_name" --limit 10000000 --order asc --format 'value(textPayload)' --project=$PROJECT_ID > /tmp/full_output.txt && \
96+
pod_name=$(kubectl get po -l controller-uid=`kubectl get job $job_name -o "jsonpath={.metadata.labels.controller-uid}"` | awk 'match($0,!/NAME/) {print $1}') && \
97+
echo "GKE pod name: $pod_name" && \
98+
kubectl logs -f $pod_name --container=train > /tmp/full_output.txt
10299
if grep -q '<?xml version="1.0" ?>' /tmp/full_output.txt ; then csplit /tmp/full_output.txt '/<?xml version="1.0" ?>/'; else mv /tmp/full_output.txt xx00; fi && \
103100
# First portion is the test logs. Print these to Github Action stdout.
104101
cat xx00 && \

0 commit comments

Comments
 (0)