Skip to content

Commit 1894e32

Browse files
authored
Merge pull request #399 from grafana/karsten/postgres
Import Postges Mixin
2 parents 9c3fb80 + 6369860 commit 1894e32

File tree

5 files changed

+1530
-0
lines changed

5 files changed

+1530
-0
lines changed

postgres-mixin/.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
/alerts.yaml
2+
/rules.yaml
3+
dashboards_out

postgres-mixin/README.md

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# Postgres Mixin
2+
3+
_This is a work in progress. We aim for it to become a good role model for alerts
4+
and dashboards eventually, but it is not quite there yet._
5+
6+
The Postgres Mixin is a set of configurable, reusable, and extensible alerts and
7+
dashboards based on the metrics exported by the Postgres Exporter. The mixin creates
8+
recording and alerting rules for Prometheus and suitable dashboard descriptions
9+
for Grafana.
10+
11+
To use them, you need to have `mixtool` and `jsonnetfmt` installed. If you
12+
have a working Go development environment, it's easiest to run the following:
13+
```bash
14+
$ go get github.com/monitoring-mixins/mixtool/cmd/mixtool
15+
$ go get github.com/google/go-jsonnet/cmd/jsonnetfmt
16+
```
17+
18+
For more advanced uses of mixins, see
19+
https://github.com/monitoring-mixins/docs.

postgres-mixin/alerts/alerts.yaml

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
---
2+
groups:
3+
- name: PostgreSQL
4+
rules:
5+
- alert: PostgreSQLMaxConnectionsReached
6+
expr: sum(pg_stat_activity_count) by (instance) >= sum(pg_settings_max_connections) by (instance) - sum(pg_settings_superuser_reserved_connections) by (instance)
7+
for: 1m
8+
labels:
9+
severity: email
10+
annotations:
11+
summary: "{{ $labels.instance }} has maxed out Postgres connections."
12+
description: "{{ $labels.instance }} is exceeding the currently configured maximum Postgres connection limit (current value: {{ $value }}s). Services may be degraded - please take immediate action (you probably need to increase max_connections in the Docker image and re-deploy."
13+
14+
- alert: PostgreSQLHighConnections
15+
expr: sum(pg_stat_activity_count) by (instance) > (sum(pg_settings_max_connections) by (instance) - sum(pg_settings_superuser_reserved_connections) by (instance)) * 0.8
16+
for: 10m
17+
labels:
18+
severity: email
19+
annotations:
20+
summary: "{{ $labels.instance }} is over 80% of max Postgres connections."
21+
description: "{{ $labels.instance }} is exceeding 80% of the currently configured maximum Postgres connection limit (current value: {{ $value }}s). Please check utilization graphs and confirm if this is normal service growth, abuse or an otherwise temporary condition or if new resources need to be provisioned (or the limits increased, which is mostly likely)."
22+
23+
- alert: PostgreSQLDown
24+
expr: pg_up != 1
25+
for: 1m
26+
labels:
27+
severity: email
28+
annotations:
29+
summary: "PostgreSQL is not processing queries: {{ $labels.instance }}"
30+
description: "{{ $labels.instance }} is rejecting query requests from the exporter, and thus probably not allowing DNS requests to work either. User services should not be effected provided at least 1 node is still alive."
31+
32+
- alert: PostgreSQLSlowQueries
33+
expr: avg(rate(pg_stat_activity_max_tx_duration{datname!~"template.*"}[2m])) by (datname) > 2 * 60
34+
for: 2m
35+
labels:
36+
severity: email
37+
annotations:
38+
summary: "PostgreSQL high number of slow on {{ $labels.cluster }} for database {{ $labels.datname }} "
39+
description: "PostgreSQL high number of slow queries {{ $labels.cluster }} for database {{ $labels.datname }} with a value of {{ $value }} "
40+
41+
- alert: PostgreSQLQPS
42+
expr: avg(irate(pg_stat_database_xact_commit{datname!~"template.*"}[5m]) + irate(pg_stat_database_xact_rollback{datname!~"template.*"}[5m])) by (datname) > 10000
43+
for: 5m
44+
labels:
45+
severity: email
46+
annotations:
47+
summary: "PostgreSQL high number of queries per second {{ $labels.cluster }} for database {{ $labels.datname }}"
48+
description: "PostgreSQL high number of queries per second on {{ $labels.cluster }} for database {{ $labels.datname }} with a value of {{ $value }}"
49+
50+
- alert: PostgreSQLCacheHitRatio
51+
expr: avg(rate(pg_stat_database_blks_hit{datname!~"template.*"}[5m]) / (rate(pg_stat_database_blks_hit{datname!~"template.*"}[5m]) + rate(pg_stat_database_blks_read{datname!~"template.*"}[5m]))) by (datname) < 0.98
52+
for: 5m
53+
labels:
54+
severity: email
55+
annotations:
56+
summary: "PostgreSQL low cache hit rate on {{ $labels.cluster }} for database {{ $labels.datname }}"
57+
description: "PostgreSQL low on cache hit rate on {{ $labels.cluster }} for database {{ $labels.datname }} with a value of {{ $value }}"

0 commit comments

Comments
 (0)