Skip to content

Commit 78ac6b4

Browse files
committed
Retry function added to helper library
This function allows a command to be retried a limited number of times with an escalating delay between attempts. Related: conjurinc/ops#423
1 parent d195e08 commit 78ac6b4

File tree

5 files changed

+135
-5
lines changed

5 files changed

+135
-5
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,7 @@ files within it's directory.
136136
<ol>
137137
<li><b>die</b>: print message and exit 1</li>
138138
<li><b>spushd/spopd</b>: Safe verisons of pushd & popd that call die if the push/pop fails, they also drop stdout. </li>
139+
<li><b>retry</b>: Retry a command until it succeeds up to a user specified maximum number of attempts. Escalating delay between attempts.</li>
139140
</ol>
140141
</td>
141142
</tr>

helpers/lib

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,57 @@ function spushd(){
1818
function spopd(){
1919
popd >/dev/null || die "popd failed :("
2020
}
21+
22+
# Retry a command multiple times until it succeeds, with escalating
23+
# delay between attempts.
24+
# Delay is 2 * n + random up to 30s, then 30s + random after that.
25+
# For large numbers of retries the max delay is effectively the retry
26+
# in minutes.
27+
# Based on:
28+
# https://gist.github.com/sj26/88e1c6584397bb7c13bd11108a579746
29+
# but now quite heavily modified.
30+
function retry {
31+
# Maxiumum amount of fixed delay between attempts
32+
# a random value will still be added.
33+
local -r MAX_BACKOFF=30
34+
35+
if [[ ${#} -lt 2 ]]; then
36+
echo "retry usage: retry <retries> <command>"
37+
exit 1
38+
fi
39+
40+
local retries=$1
41+
shift
42+
43+
if ! [[ ${retries} =~ ^[0-9\.]*$ ]]; then
44+
echo "Invalid number of retries: ${retries} for command '${*}'".
45+
exit 1
46+
fi
47+
48+
local count=0
49+
until "$@"; do
50+
# Command failed, otherwise until would have skipped the loop
51+
52+
# Store return code so it can be reported to the user
53+
exit=$?
54+
count=$((count + 1))
55+
if [ "${count}" -lt "${retries}" ]; then
56+
# There are still retries left, calculate delay and notify user.
57+
backoff=$((2 * count))
58+
if [[ "${backoff}" -gt "${MAX_BACKOFF}" ]]; then
59+
backoff=${MAX_BACKOFF}
60+
fi;
61+
62+
# Add a random amount to the delay to prevent competing processes
63+
# from re-colliding.
64+
wait=$(( backoff + (RANDOM % count) ))
65+
echo "'${*}' Retry $count/$retries exited $exit, retrying in $wait seconds..."
66+
sleep $wait
67+
else
68+
# Out of retries :(
69+
echo "Retry $count/$retries exited $exit, no more retries left."
70+
return $exit
71+
fi
72+
done
73+
return 0
74+
}

tests-for-this-repo/helpers.bats

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,17 @@
33

44
. "${BASH_LIB_DIR}/init"
55

6+
# run before every test
7+
setup(){
8+
temp_dir="${BATS_TMPDIR}/testtemp"
9+
mkdir "${temp_dir}"
10+
afile="${temp_dir}/appendfile"
11+
}
12+
13+
teardown(){
14+
temp_dir="${BATS_TMPDIR}/testtemp"
15+
rm -rf "${temp_dir}"
16+
}
617

718
@test "die exits and prints message" {
819
run bash -c ". ${BASH_LIB_DIR}/init; die msg"
@@ -34,3 +45,70 @@
3445
assert_output --partial "stack empty"
3546
assert_failure
3647
}
48+
49+
@test "retry runs command only once if it succeeds the first time" {
50+
retryme(){
51+
date >> ${afile}
52+
}
53+
run retry 3 retryme
54+
assert_success
55+
assert_equal $(wc -l <${afile}) 1
56+
}
57+
58+
@test "retry doesn't introduce delay when the command succeeds first time" {
59+
retryme(){
60+
date >> ${afile}
61+
}
62+
start=$(date +%s)
63+
run retry 3 retryme
64+
end=$(date +%s)
65+
assert [ "$(( start + 1 ))" -ge "${end}" ]
66+
assert_success
67+
}
68+
69+
@test "retry runs n times on consecutive failure and waits between attempts" {
70+
retryme(){
71+
date >> ${afile}
72+
false
73+
}
74+
start=$(date +%s)
75+
run retry 2 retryme
76+
end=$(date +%s)
77+
# introduces at least a two second delay between attempts
78+
assert [ "$(( start + 2 ))" -le "${end}" ]
79+
assert_failure
80+
assert_equal $(wc -l <${afile}) 2
81+
}
82+
83+
@test "retry returns after first success" {
84+
retryme(){
85+
date >> "${afile}"
86+
case $(wc -l < ${afile}) in
87+
*1)
88+
return 1
89+
;;
90+
*)
91+
return 0
92+
;;
93+
esac
94+
}
95+
run retry 3 retryme
96+
assert_success
97+
assert_equal $(wc -l <${afile}) 2
98+
}
99+
100+
@test "retry fails with less than two arguments" {
101+
run retry 3
102+
assert_failure
103+
assert_output --partial usage
104+
assert [ ! -e "${temp_dir}/appendfile" ]
105+
}
106+
107+
@test "retry fails with non-integer retry count" {
108+
run retry "this" date
109+
assert_failure
110+
assert_output --partial number
111+
assert [ ! -e "${temp_dir}/appendfile" ]
112+
}
113+
114+

tests-for-this-repo/k8s.bats

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,7 @@
11
. "${BASH_LIB_DIR}/test-utils/bats-support/load.bash"
22
. "${BASH_LIB_DIR}/test-utils/bats-assert-1/load.bash"
33

4-
setup(){
5-
. "${BASH_LIB_DIR}/init"
6-
}
4+
. "${BASH_LIB_DIR}/init"
75

86
@test "gke-utils image builds" {
97
run build_gke_image

tests-for-this-repo/run-python-lint

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55

66
# shellcheck disable=SC2086,SC2046
77
. $(dirname ${BASH_SOURCE[0]})/../init
8-
. "${BASH_LIB_DIR}/helpers/lib"
98

109
rc=0
1110

@@ -17,4 +16,4 @@ spushd ${BASH_LIB_DIR}/tests-for-this-repo/python-lint
1716
mv "${BASH_LIB_DIR}/junit.xml" "${BASH_LIB_DIR}/python-lint-junit.xml"
1817
spopd
1918

20-
exit ${rc}
19+
exit ${rc}

0 commit comments

Comments
 (0)