Skip to content

Commit 93e037b

Browse files
committed
chore: more lsn checkpoint handling
1 parent 185c98e commit 93e037b

File tree

3 files changed

+48
-14
lines changed

3 files changed

+48
-14
lines changed

docker/all-in-one/entrypoint.sh

Lines changed: 29 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ SUPERVISOR_CONF=/etc/supervisor/supervisord.conf
77
DATA_VOLUME_MOUNTPOINT=${DATA_VOLUME_MOUNTPOINT:-/data}
88
export CONFIGURED_FLAG_PATH=${CONFIGURED_FLAG_PATH:-$DATA_VOLUME_MOUNTPOINT/machine.configured}
99

10+
LSN_CHECKPOINT_FILE_PATH="${DATA_VOLUME_MOUNTPOINT}/latest-lsn-checkpoint"
11+
1012
# Ref: https://gist.github.com/sj26/88e1c6584397bb7c13bd11108a579746
1113
function retry {
1214
# Pass 0 for unlimited retries
@@ -51,22 +53,40 @@ function enable_swap {
5153
}
5254

5355
function create_lsn_checkpoint_file {
54-
if [ ! -f "${DATA_VOLUME_MOUNTPOINT}/latest-lsn-checkpoint" ]; then
55-
echo -n "0/0" > "${DATA_VOLUME_MOUNTPOINT}/latest-lsn-checkpoint"
56-
chown postgres:postgres "${DATA_VOLUME_MOUNTPOINT}/latest-lsn-checkpoint"
57-
chmod 0300 "${DATA_VOLUME_MOUNTPOINT}/latest-lsn-checkpoint"
56+
if [ ! -f "${LSN_CHECKPOINT_FILE_PATH}" ]; then
57+
echo -n "0/0" > "${LSN_CHECKPOINT_FILE_PATH}"
58+
touch "${LSN_CHECKPOINT_FILE_PATH}.previous"
59+
chown postgres:postgres "${LSN_CHECKPOINT_FILE_PATH}"
60+
61+
chmod 0300 "${LSN_CHECKPOINT_FILE_PATH}"
5862
fi
5963
}
6064

65+
function push_lsn_checkpoint_file {
66+
LSN_CHECKPOINT=$(cat "${LSN_CHECKPOINT_FILE_PATH}")
67+
PREVIOUS_LSN_CHECKPOINT=$(cat "${LSN_CHECKPOINT_FILE_PATH}.previous")
68+
69+
if [ "${LSN_CHECKPOINT}" == "${PREVIOUS_LSN_CHECKPOINT}" ]; then
70+
echo "LSN checkpoint file has not changed. Skipping push."
71+
return
72+
fi
73+
74+
if [ "${LSN_CHECKPOINT}" == "0/0" ]; then
75+
echo "LSN checkpoint file is empty. Skipping push."
76+
return
77+
fi
78+
79+
/usr/bin/admin-mgr lsn-checkpoint-push || echo "Failed to push LSN checkpoint"
80+
}
81+
6182
function graceful_shutdown {
6283
echo "$(date): Received SIGINT. Shutting down."
6384
supervisorctl stop postgresql
6485

6586
# Postgres ships the latest WAL file using archive_command during shutdown, in a blocking operation
6687
# This is to ensure that the WAL file is shipped, just in case
6788
sleep 0.2
68-
69-
/usr/bin/admin-mgr lsn-checkpoint-push || echo "Failed to push LSN checkpoint"
89+
push_lsn_checkpoint_file
7090

7191
kill -s TERM "$(supervisorctl pid)"
7292
}
@@ -180,6 +200,8 @@ function start_supervisor {
180200

181201
# Start supervisord
182202
/usr/bin/supervisord -c $SUPERVISOR_CONF
203+
204+
183205
}
184206

185207
# Increase max number of open connections
@@ -272,10 +294,6 @@ if [ "${PLATFORM_DEPLOYMENT:-}" ]; then
272294
trap graceful_shutdown SIGINT
273295
fi
274296

275-
if [ "${SHUTDOWN_GRACEFULLY:-}" ]; then
276-
/usr/bin/admin-mgr lsn-checkpoint-push || echo "Failed to push LSN checkpoint"
277-
exit 0
278-
fi
279-
280297
touch "$CONFIGURED_FLAG_PATH"
281298
start_supervisor
299+
push_lsn_checkpoint_file

docker/all-in-one/etc/supervisor/db-only/supa-shutdown.conf

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,4 @@ stdout_logfile=/var/log/services/supa-shutdown.log
77
redirect_stderr=true
88
stdout_logfile_maxbytes=10MB
99
priority=50
10+
environment=MAX_IDLE_TIME_MINUTES="%(ENV_MAX_IDLE_TIME_MINUTES)s"

docker/all-in-one/shutdown_event_listener.py

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,11 @@
22
import sys
33
import subprocess
44

5+
LSN_CHECKPOINT_SHIP_INTERVAL = 10
6+
7+
checkpointFile = '/data/latest-lsn-checkpoint'
8+
checkpointFilePrevious = '/data/latest-lsn-checkpoint.previous'
9+
510
def write_stdout(s):
611
sys.stdout.write(s)
712
sys.stdout.flush()
@@ -23,10 +28,20 @@ def main():
2328
write_stderr(data)
2429

2530
if headers['eventname'] == 'TICK_60':
26-
with open('/data/latest-lsn-checkpoint') as f:
27-
contents = f.read()
28-
if contents != '0/0':
31+
if os.path.getmtime(checkpointFile) < time.time() - 60 * LSN_CHECKPOINT_SHIP_INTERVAL:
32+
break
33+
34+
previousLSN = ''
35+
with open(checkpointFilePrevious) as f:
36+
previousLSN = f.read()
37+
38+
# If the current LSN is different from the previous one, persist it remotely
39+
with open(checkpointFile) as f:
40+
currentLSN = f.read()
41+
if currentLSN != '0/0' and currentLSN != previousLSN:
2942
subprocess.run(["/usr/bin/admin-mgr", "lsn-checkpoint-push"])
43+
with open(previousLSN, 'w') as f:
44+
f.write(currentLSN)
3045

3146
write_stdout('RESULT 2\nOK')
3247

0 commit comments

Comments
 (0)