Skip to content

Commit 918765f

Browse files
committed
update: graceful shutdown for recording sidecar container in K8s
Signed-off-by: Viet Nguyen Duc <[email protected]>
1 parent 75fb697 commit 918765f

21 files changed

+229
-65
lines changed

.github/workflows/docker-test.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ jobs:
135135
uses: nick-invision/retry@master
136136
if: matrix.build-all != true
137137
with:
138-
timeout_minutes: 20
138+
timeout_minutes: 30
139139
max_attempts: 3
140140
retry_wait_seconds: 60
141141
command: |
@@ -156,7 +156,7 @@ jobs:
156156
- name: Run Docker Compose to ${{ matrix.test-strategy }}
157157
uses: nick-invision/retry@master
158158
with:
159-
timeout_minutes: 20
159+
timeout_minutes: 40
160160
max_attempts: 2
161161
retry_wait_seconds: 60
162162
command: |

Base/check-grid.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,4 +27,4 @@ do
2727
esac
2828
done
2929

30-
curl -sSL http://${HOST}:${PORT}/wd/hub/status | jq -r '.value.ready' | grep -q "true" || exit 1
30+
curl -skSL ${SE_SERVER_PROTOCOL:-"http"}://${HOST}:${PORT}/wd/hub/status | jq -r '.value.ready' | grep -q "true" || exit 1

Base/entry_point.sh

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,11 @@ function shutdown {
2222
echo "Waiting for Selenium Node to shutdown gracefully..."
2323
bash ${NODE_CONFIG_DIRECTORY}/nodePreStop.sh
2424
fi
25+
if [ -n "${SE_VIDEO_CONTAINER_NAME}" ]; then
26+
# For K8s, when video sidecar container and shareProcessNamespace are enabled in pod spec
27+
echo "Shutting down ${SE_VIDEO_CONTAINER_NAME} container..."
28+
pkill -f "${SE_VIDEO_CONTAINER_NAME}"
29+
fi
2530
kill -s SIGTERM ${SUPERVISOR_PID}
2631
wait ${SUPERVISOR_PID}
2732
echo "Shutdown complete"

NodeBase/selenium.conf

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ priority=0
77
command=/opt/bin/start-xvfb.sh
88
autostart=true
99
autorestart=true
10+
killasgroup=true
1011

1112
;Logs
1213
redirect_stderr=false
@@ -24,6 +25,7 @@ priority=5
2425
command=/opt/bin/start-vnc.sh
2526
autostart=true
2627
autorestart=true
28+
killasgroup=true
2729

2830
;Logs
2931
redirect_stderr=false
@@ -41,6 +43,7 @@ priority=10
4143
command=/opt/bin/start-novnc.sh
4244
autostart=true
4345
autorestart=true
46+
killasgroup=true
4447

4548
;Logs
4649
redirect_stderr=false

Video/entry_point.sh

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,6 @@ SUPERVISOR_PID=$!
1717
function shutdown {
1818
echo "Trapped SIGTERM/SIGINT/x so shutting down supervisord..."
1919
kill -s SIGTERM ${SUPERVISOR_PID}
20-
wait `pgrep -f ffmpeg | tr '\n' ' '`
21-
wait `pgrep -f rclone | tr '\n' ' '`
2220
wait ${SUPERVISOR_PID}
2321
echo "Shutdown complete"
2422
}

Video/supervisord.conf

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ minprocs=200 ; (min. avail process descriptors;
1414
[program:video-recording]
1515
priority=0
1616
command=/opt/bin/video.sh
17-
stopasgroup = true
1817
killasgroup=true
1918
autostart=true
2019
startsecs=0
@@ -28,7 +27,6 @@ stdout_logfile_maxbytes=0
2827
[program:video-ready]
2928
priority=5
3029
command=python3 /opt/bin/video_ready.py
31-
stopasgroup = true
3230
killasgroup=true
3331
autostart=true
3432
autorestart=true
@@ -41,7 +39,6 @@ stdout_logfile_maxbytes=0
4139
[program:video-upload]
4240
priority=10
4341
command=/opt/bin/upload.sh
44-
stopasgroup = true
4542
killasgroup=true
4643
autostart=%(ENV_SE_VIDEO_INTERNAL_UPLOAD)s
4744
autorestart=%(ENV_SE_VIDEO_INTERNAL_UPLOAD)s

Video/upload.sh

Lines changed: 21 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ UPLOAD_OPTS=${SE_UPLOAD_OPTS:-"-P --cutoff-mode SOFT --metadata"}
88
UPLOAD_RETAIN_LOCAL_FILE=${SE_UPLOAD_RETAIN_LOCAL_FILE:-"false"}
99
UPLOAD_PIPE_FILE_NAME=${SE_UPLOAD_PIPE_FILE_NAME:-"uploadpipe"}
1010
SE_VIDEO_INTERNAL_UPLOAD=${SE_VIDEO_INTERNAL_UPLOAD:-"false"}
11-
VIDEO_UPLOAD_ENABLED=${SE_VIDEO_UPLOAD_ENABLED:-"false"}
1211
VIDEO_UPLOAD_BATCH_CHECK=${SE_VIDEO_UPLOAD_BATCH_CHECK:-"10"}
1312
process_name="video.uploader"
1413

@@ -44,24 +43,6 @@ function rename_rclone_env() {
4443
done
4544
}
4645

47-
function consume_pipe_file() {
48-
while read FILE DESTINATION < ${UPLOAD_PIPE_FILE};
49-
do
50-
if [ "${FILE}" = "exit" ];
51-
then
52-
FORCE_EXIT=true
53-
exit
54-
elif [ "$FILE" != "" ] && [ "$DESTINATION" != "" ];
55-
then
56-
rclone_upload "${FILE}" "${DESTINATION}"
57-
elif [ -f ${FORCE_EXIT_FILE} ];
58-
then
59-
echo "$(date +%FT%T%Z) [${process_name}] - Force exit signal detected"
60-
exit
61-
fi
62-
done
63-
}
64-
6546
list_rclone_pid=()
6647
function check_and_clear_background() {
6748
# Wait for a batch rclone processes to finish
@@ -79,7 +60,27 @@ function rclone_upload() {
7960
local source=$1
8061
local target=$2
8162
echo "$(date +%FT%T%Z) [${process_name}] - Uploading ${source} to ${target}"
82-
exec rclone --config ${UPLOAD_CONFIG_DIRECTORY}/${UPLOAD_CONFIG_FILE_NAME} ${UPLOAD_COMMAND} ${UPLOAD_OPTS} "${source}" "${target}" &
63+
rclone --config ${UPLOAD_CONFIG_DIRECTORY}/${UPLOAD_CONFIG_FILE_NAME} ${UPLOAD_COMMAND} ${UPLOAD_OPTS} "${source}" "${target}" &
64+
list_rclone_pid+=($!)
65+
check_and_clear_background
66+
}
67+
68+
function consume_pipe_file() {
69+
while read FILE DESTINATION < ${UPLOAD_PIPE_FILE};
70+
do
71+
if [ "${FILE}" = "exit" ];
72+
then
73+
FORCE_EXIT=true
74+
exit
75+
elif [ "$FILE" != "" ] && [ "$DESTINATION" != "" ];
76+
then
77+
rclone_upload "${FILE}" "${DESTINATION}"
78+
elif [ -f ${FORCE_EXIT_FILE} ];
79+
then
80+
echo "$(date +%FT%T%Z) [${process_name}] - Force exit signal detected"
81+
exit
82+
fi
83+
done
8384
}
8485

8586
function graceful_exit() {

Video/video.sh

Lines changed: 37 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ VIDEO_FOLDER=${VIDEO_FOLDER}
1111
VIDEO_UPLOAD_ENABLED=${VIDEO_UPLOAD_ENABLED:-$SE_VIDEO_UPLOAD_ENABLED}
1212
VIDEO_CONFIG_DIRECTORY=${VIDEO_CONFIG_DIRECTORY:-"/opt/bin"}
1313
UPLOAD_DESTINATION_PREFIX=${UPLOAD_DESTINATION_PREFIX:-$SE_UPLOAD_DESTINATION_PREFIX}
14-
UPLOAD_PIPE_FILE_NAME=${UPLOAD_PIPE_FILE_NAME:-"uploadpipe"}
14+
UPLOAD_PIPE_FILE_NAME=${SE_UPLOAD_PIPE_FILE_NAME:-"uploadpipe"}
1515
SE_VIDEO_INTERNAL_UPLOAD=${SE_VIDEO_INTERNAL_UPLOAD:-"false"}
1616
SE_SERVER_PROTOCOL=${SE_SERVER_PROTOCOL:-"http"}
1717
max_attempts=${SE_VIDEO_WAIT_ATTEMPTS:-50}
@@ -57,13 +57,34 @@ function create_pipe() {
5757
fi
5858
}
5959

60+
function wait_for_display() {
61+
export DISPLAY=${DISPLAY_CONTAINER_NAME}:${DISPLAY_NUM}.0
62+
attempts=0
63+
64+
echo "$(date +%FT%T%Z) [${process_name}] - Checking if the display is open"
65+
until xset b off || [[ $attempts = "$max_attempts" ]]
66+
do
67+
echo "$(date +%FT%T%Z) [${process_name}] - Waiting before next display check"
68+
sleep 0.5
69+
attempts=$((attempts+1))
70+
done
71+
if [[ $attempts = "$max_attempts" ]];
72+
then
73+
echo "$(date +%FT%T%Z) [${process_name}] - Can not open display, exiting."
74+
exit
75+
fi
76+
77+
VIDEO_SIZE=$(xdpyinfo | grep 'dimensions:' | awk '{print $2}')
78+
}
79+
6080
function wait_util_uploader_shutdown() {
6181
max_wait=5
6282
wait=0
6383
if [[ "${VIDEO_UPLOAD_ENABLED}" != "false" ]] && [[ -n "${UPLOAD_DESTINATION_PREFIX}" ]];
6484
then
6585
while [[ -f ${FORCE_EXIT_FILE} ]] && [[ ${wait} -lt ${max_wait} ]];
6686
do
87+
echo "exit" >> ${UPLOAD_PIPE_FILE} &
6788
echo "$(date +%FT%T%Z) [${process_name}] - Waiting for force exit file to be consumed by external upload container"
6889
sleep 1
6990
wait=$((wait+1))
@@ -73,6 +94,7 @@ function wait_util_uploader_shutdown() {
7394
then
7495
while [[ $(pgrep rclone | wc -l) -gt 0 ]]
7596
do
97+
echo "exit" >> ${UPLOAD_PIPE_FILE} &
7698
echo "$(date +%FT%T%Z) [${process_name}] - Recorder is waiting for RCLONE to finish"
7799
sleep 1
78100
done
@@ -134,6 +156,12 @@ function check_if_recording_inprogress() {
134156
fi
135157
}
136158

159+
function log_node_response() {
160+
if [[ -f "/tmp/graphQL_$session_id.json" ]]; then
161+
jq '.' "/tmp/graphQL_$session_id.json";
162+
fi
163+
}
164+
137165
function graceful_exit() {
138166
check_if_recording_inprogress
139167
send_exit_signal_to_uploader
@@ -160,27 +188,9 @@ if [[ "${VIDEO_UPLOAD_ENABLED}" != "true" ]] && [[ "${VIDEO_FILE_NAME}" != "auto
160188
-video_size ${VIDEO_SIZE} -r ${FRAME_RATE} -i ${DISPLAY_CONTAINER_NAME}:${DISPLAY_NUM}.0 -codec:v ${CODEC} ${PRESET} -pix_fmt yuv420p "$VIDEO_FOLDER/$VIDEO_FILE_NAME"
161189

162190
else
163-
create_pipe
164191
trap graceful_exit SIGTERM SIGINT EXIT
165-
export DISPLAY=${DISPLAY_CONTAINER_NAME}:${DISPLAY_NUM}.0
166-
167-
attempts=0
168-
169-
echo "$(date +%FT%T%Z) [${process_name}] - Checking if the display is open"
170-
until xset b off || [[ $attempts = "$max_attempts" ]]
171-
do
172-
echo "$(date +%FT%T%Z) [${process_name}] - Waiting before next display check"
173-
sleep 0.5
174-
attempts=$((attempts+1))
175-
done
176-
if [[ $attempts = "$max_attempts" ]];
177-
then
178-
echo "$(date +%FT%T%Z) [${process_name}] - Can not open display, exiting."
179-
exit
180-
fi
181-
182-
VIDEO_SIZE=$(xdpyinfo | grep 'dimensions:' | awk '{print $2}')
183-
192+
create_pipe
193+
wait_for_display
184194
recording_started="false"
185195
video_file_name=""
186196
video_file=""
@@ -201,7 +211,7 @@ else
201211
done
202212
if [[ $attempts = "$max_attempts" ]];
203213
then
204-
echo "$(date +%FT%T%Z) [${process_name}] - Can not reach node API, exiting."
214+
echo "$(date +%FT%T%Z) [${process_name}] - Can not reach node API, reach the max attempts $max_attempts, exiting."
205215
exit
206216
fi
207217
while curl --noproxy "*" -sk --request GET ${NODE_STATUS_ENDPOINT} > /tmp/status.json
@@ -214,15 +224,13 @@ else
214224
caps_se_video_record=${return_list[0]}
215225
video_file_name="${return_list[1]}.mp4"
216226
echo "$(date +%FT%T%Z) [${process_name}] - Start recording: $caps_se_video_record, video file name: $video_file_name"
217-
if [[ -f "/tmp/graphQL_$session_id.json" ]]; then
218-
jq '.' "/tmp/graphQL_$session_id.json";
219-
fi
227+
log_node_response
220228
fi
221229
if [[ "$session_id" != "null" && "$session_id" != "" && "$session_id" != "reserved" && "$recording_started" = "false" && "$caps_se_video_record" = "true" ]];
222230
then
223231
video_file="${VIDEO_FOLDER}/$video_file_name"
224232
echo "$(date +%FT%T%Z) [${process_name}] - Starting to record video"
225-
exec ffmpeg -hide_banner -loglevel warning -flags low_delay -threads 2 -fflags nobuffer+genpts -strict experimental -y -f x11grab \
233+
ffmpeg -hide_banner -loglevel warning -flags low_delay -threads 2 -fflags nobuffer+genpts -strict experimental -y -f x11grab \
226234
-video_size ${VIDEO_SIZE} -r ${FRAME_RATE} -i ${DISPLAY} -codec:v ${CODEC} ${PRESET} -pix_fmt yuv420p "$video_file" &
227235
recording_started="true"
228236
echo "$(date +%FT%T%Z) [${process_name}] - Video recording started"
@@ -244,6 +252,8 @@ else
244252
fi
245253
prev_session_id=$session_id
246254
done
247-
echo "$(date +%FT%T%Z) [${process_name}] - Node API is not responding, exiting."
255+
echo "$(date +%FT%T%Z) [${process_name}] - Last response from node API..."
256+
log_node_response
257+
echo "$(date +%FT%T%Z) [${process_name}] - Node API is not responding now, exiting..."
248258
exit
249259
fi

charts/selenium-grid/templates/_helpers.tpl

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,7 @@ template:
271271
{{- toYaml . | nindent 6 }}
272272
{{- end }}
273273
spec:
274+
shareProcessNamespace: {{ $.Values.global.seleniumGrid.stdoutProbeLog | ternary "false" .node.shareProcessNamespace }}
274275
serviceAccountName: {{ template "seleniumGrid.serviceAccount.fullname" . }}
275276
serviceAccount: {{ template "seleniumGrid.serviceAccount.fullname" . }}
276277
restartPolicy: {{ template "seleniumGrid.node.restartPolicy" . }}
@@ -422,7 +423,7 @@ template:
422423
{{- toYaml .node.sidecars | nindent 6 }}
423424
{{- end }}
424425
{{- if $.Values.videoRecorder.enabled }}
425-
- name: video
426+
- name: {{ $.Values.videoRecorder.name }}
426427
{{- $imageTag := default $.Values.global.seleniumGrid.videoImageTag $.Values.videoRecorder.imageTag }}
427428
{{- $imageRegistry := default $.Values.global.seleniumGrid.imageRegistry $.Values.videoRecorder.imageRegistry }}
428429
image: {{ printf "%s/%s:%s" $imageRegistry $.Values.videoRecorder.imageName $imageTag }}
@@ -482,7 +483,7 @@ template:
482483
lifecycle: {{- toYaml . | nindent 10 }}
483484
{{- end }}
484485
{{- if and $.Values.videoRecorder.uploader.enabled (not (empty $.Values.videoRecorder.uploader.name)) }}
485-
- name: uploader
486+
- name: {{ default "uploader" $.Values.videoRecorder.uploader.name }}
486487
{{- $imageTag := .uploader.imageTag }}
487488
{{- $imageRegistry := .uploader.imageRegistry }}
488489
image: {{ printf "%s/%s:%s" $imageRegistry .uploader.imageName $imageTag }}
@@ -725,6 +726,8 @@ Define terminationGracePeriodSeconds of the node pod.
725726
{{- $period := $nodePeriod -}}
726727
{{- if and (eq .Values.autoscaling.scalingType "deployment") (eq (include "seleniumGrid.useKEDA" $) "true") -}}
727728
{{- $period = ternary $nodePeriod $autoscalingPeriod (gt $nodePeriod $autoscalingPeriod) -}}
729+
{{- else if and (eq .Values.autoscaling.scalingType "job") (eq (include "seleniumGrid.useKEDA" $) "true") }}
730+
{{- $period = 30 -}}
728731
{{- end -}}
729732
{{- $period -}}
730733
{{- end -}}

charts/selenium-grid/templates/node-configmap.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@ data:
2626
SE_DRAIN_AFTER_SESSION_COUNT: '{{- and (eq (include "seleniumGrid.useKEDA" .) "true") (eq .Values.autoscaling.scalingType "job") | ternary "1" "0" -}}'
2727
SE_NODE_GRID_URL: '{{ include "seleniumGrid.url" $ }}'
2828
SE_NODE_GRID_GRAPHQL_URL: '{{ include "seleniumGrid.graphqlURL" $ }}'
29+
{{- if $.Values.videoRecorder.enabled }}
30+
SE_VIDEO_CONTAINER_NAME: {{ $.Values.videoRecorder.name | quote }}
31+
{{- end }}
2932
{{- if $.Values.nodeConfigMap.leftoversCleanup.enabled }}
3033
SE_ENABLE_BROWSER_LEFTOVERS_CLEANUP: 'true'
3134
{{- with $.Values.nodeConfigMap.leftoversCleanup.jobIntervalInSecs }}

0 commit comments

Comments
 (0)