Skip to content

Commit

Permalink
Retry fetching user data from secretmanager if first request fails
Browse files Browse the repository at this point in the history
Signed-off-by: Alexandr Demicev <[email protected]>
  • Loading branch information
alexander-demicev authored and fad3t committed Oct 8, 2024
1 parent d4130df commit 3b494b2
Showing 1 changed file with 51 additions and 14 deletions.
65 changes: 51 additions & 14 deletions pkg/cloud/services/secretsmanager/secret_fetch_script.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ SECRET_PREFIX="{{.SecretPrefix}}"
CHUNKS="{{.Chunks}}"
FILE="/etc/secret-userdata.txt"
FINAL_INDEX=$((CHUNKS - 1))
MAX_RETRIES=10
RETRY_DELAY=10 # in seconds
# Log an error and exit.
# Args:
Expand Down Expand Up @@ -115,6 +117,7 @@ check_aws_command() {
;;
esac
}
delete_secret_value() {
local id="${SECRET_PREFIX}-${1}"
local out
Expand All @@ -126,19 +129,27 @@ delete_secret_value() {
aws secretsmanager ${ENDPOINT} --region ${REGION} delete-secret --force-delete-without-recovery --secret-id "${id}" 2>&1
)
local delete_return=$?
set -o errexit
set -o nounset
set -o pipefail
check_aws_command "SecretsManager::DeleteSecret" "${delete_return}" "${out}"
if [ ${delete_return} -ne 0 ]; then
log::error_exit "Could not delete secret value" 2
log::error "Could not delete secret value"
return 1
fi
}
delete_secrets() {
for i in $(seq 0 ${FINAL_INDEX}); do
delete_secret_value "$i"
retry_delete_secret_value() {
local retries=0
while [ ${retries} -lt ${MAX_RETRIES} ]; do
delete_secret_value "$1"
local return_code=$?
if [ ${return_code} -eq 0 ]; then
return 0
else
((retries++))
log::info "Retrying in ${RETRY_DELAY} seconds..."
sleep ${RETRY_DELAY}
fi
done
return 1
}
get_secret_value() {
Expand All @@ -159,18 +170,33 @@ get_secret_value() {
)
local get_return=$?
check_aws_command "SecretsManager::GetSecretValue" "${get_return}" "${data}"
if [ ${get_return} -ne 0 ]; then
log::error "could not get secret value"
return 1
fi
set -o errexit
set -o nounset
set -o pipefail
if [ ${get_return} -ne 0 ]; then
log::error "could not get secret value, deleting secret"
delete_secrets
log::error_exit "could not get secret value, but secret was deleted" 1
fi
log::info "appending data to temporary file ${FILE}.gz"
echo "${data}" | base64 -d >>${FILE}.gz
}
retry_get_secret_value() {
local retries=0
while [ ${retries} -lt ${MAX_RETRIES} ]; do
get_secret_value "$1"
local return_code=$?
if [ ${return_code} -eq 0 ]; then
return 0
else
((retries++))
log::info "Retrying in ${RETRY_DELAY} seconds..."
sleep ${RETRY_DELAY}
fi
done
return 1
}
log::info "aws.cluster.x-k8s.io encrypted cloud-init script $0 started"
log::info "secret prefix: ${SECRET_PREFIX}"
log::info "secret count: ${CHUNKS}"
Expand All @@ -181,10 +207,21 @@ if test -f "${FILE}"; then
fi
for i in $(seq 0 "${FINAL_INDEX}"); do
get_secret_value "$i"
retry_get_secret_value "$i"
return_code=$?
if [ ${return_code} -ne 0 ]; then
log::error "Failed to get secret value after ${MAX_RETRIES} attempts"
fi
done
delete_secrets
for i in $(seq 0 ${FINAL_INDEX}); do
retry_delete_secret_value "$i"
return_code=$?
if [ ${return_code} -ne 0 ]; then
log::error "Failed to delete secret value after ${MAX_RETRIES} attempts"
log::error_exit "couldn't delete the secret value, exiting" 1
fi
done
log::info "decompressing userdata to ${FILE}"
gunzip "${FILE}.gz"
Expand Down

0 comments on commit 3b494b2

Please sign in to comment.