@@ -91,25 +91,26 @@ spec:
9191 expr : |
9292 sum(time() - kube_pod_deletion_timestamp{namespace="default", pod=~"^ws-.*", cluster!~"ephemeral.*"}) by (pod) > 24 * 60 * 60
9393
94- - alert : GitpodImagebuildSuccessRate
94+ - alert : GitpodImagebuildDoneSuccess
9595 labels :
96- severity : warning
96+ severity : critical
9797 team : workspace
98+ for : 4h
9899 annotations :
99- runbook_url : https://github.com/gitpod-io/runbooks/blob/main/runbooks/GitpodImagebuildSuccessRate .md
100- summary : imagebuild success rate is low in cluster {{ $labels.cluster }}.
101- description : imagebuild are failing at too high of a rate in cluster {{ $labels.cluster }}.
100+ runbook_url : https://github.com/gitpod-io/runbooks/blob/main/runbooks/GitpodImagebuildDoneSuccess .md
101+ summary : imagebuilds done are failing at a high rate in cluster {{ $labels.cluster }}.
102+ description : imagebuilds`s are not reaching done at too high of a rate in cluster {{ $labels.cluster }}.
102103 expr : |
103- (1 - (sum(rate(gitpod_image_builder_builds_done_total{success="false", cluster!~"ephemeral.*"}[4h])) / sum(rate(gitpod_image_builder_builds_done_total{cluster!~"ephemeral.*"}[4h])))) < 0.75
104+ (1 - (sum(rate(gitpod_image_builder_builds_done_total{success="false", cluster!~"ephemeral.*"}[4h])) / sum(rate(gitpod_image_builder_builds_done_total{cluster!~"ephemeral.*"}[4h])))) < 0.60
104105
105- - alert : GitpodImagebuildSuccessFailing
106+ - alert : GitpodImagebuildStartSuccess
106107 labels :
107108 severity : critical
108109 team : workspace
109- for : 3h
110+ for : 2h
110111 annotations :
111- runbook_url : https://github.com/gitpod-io/runbooks/blob/main/runbooks/GitpodImagebuildSuccessRate .md
112- summary : imagebuild success rate is failing in cluster {{ $labels.cluster }}.
113- description : imagebuild are failing at too high of a rate in cluster {{ $labels.cluster }}.
112+ runbook_url : https://github.com/gitpod-io/runbooks/blob/main/runbooks/GitpodImagebuildStartSuccess .md
113+ summary : imagebuild start success rate is failing in cluster {{ $labels.cluster }}.
114+ description : imagebuild starts are failing at too high of a rate in cluster {{ $labels.cluster }}.
114115 expr : |
115- (1 - (sum(rate(gitpod_image_builder_builds_done_total{success="false", cluster!~"ephemeral.*"}[4h])) / sum(rate(gitpod_image_builder_builds_done_total{ cluster!~"ephemeral.*"}[4h])))) < 0.50
116+ (1 - (sum(rate(gitpod_ws_manager_workspace_starts_failure_total{type="IMAGEBUILD", cluster!~"ephemeral.*"}[4h])) / sum(rate(gitpod_ws_manager_workspace_starts_total{type="IMAGEBUILD", cluster!~"ephemeral.*"}[4h])))) < 0.99
0 commit comments