check_container_upgrade/check_container_upgrade
Benjamin Renard 73ebd4cddb
All checks were successful
Run tests / tests (push) Successful in 1m50s
Improve deploy cron to log duration (and log file in case of error)
2024-07-16 09:39:10 +02:00

581 lines
20 KiB
Bash
Executable file

#!/bin/bash
# Monitoring plugin to check if running containers are upgradable
#
# Author: Benjamin Renard <brenard@zionetrix.net>
# Date: Sun, 03 Mar 2024 16:40:19 +0100
# Source: https://gitea.zionetrix.net/bn8/check_container_upgrade
#
ENGINE="auto"
POSSIBLE_ENGINES=( "auto" "docker" "podman" )
DOCKERCOMPOSE_FILE=""
DEBUG=0
CONSOLE=0
LOG_FILE=0
MAX_PARALLEL_CHECKS=4
ONLY_CONTAINERS=()
EXCLUDED_CONTAINERS=( buildx_buildkit_default )
REBUILD=0
REBUILD_DATA_DIR="/var/log/$(basename "$0")"
REBUILD_CRON=0
DEPLOY_CRON=0
CHECK_CRON=0
declare -rA CHECK_PLUGINS=(
["/usr/lib/nagios/plugins/check_apt"]="/usr/lib/nagios/plugins/check_apt -u -U -t 60 -l"
["/usr/lib/nagios/plugins/check_apk"]="/usr/lib/nagios/plugins/check_apk"
)
function now() {
if [[ -z "$1" ]]; then date "+%F %H:%M:%S"; else date -d "@$1" "+%F %H:%M:%S"; fi
}
function current_time() { date "+%s"; }
function debug() {
[[ $DEBUG -eq 0 ]] && return
if [[ -n "$LOG_FILE" ]] && [[ $CONSOLE -eq 1 ]]; then
echo -e "$(now) - [DEBUG] $*" | tee -a "$LOG_FILE" 2>&1
elif [[ -n "$LOG_FILE" ]]; then
echo -e "$(now) - [DEBUG] $*" >> "$LOG_FILE"
else
>&2 echo -e "[DEBUG] $*"
fi
}
function log() {
[[ -n "$LOG_FILE" ]] && echo -e "$(now) - [$1] ${*:2}" >> "$LOG_FILE"
if [[ "$1" == "ERROR" ]]; then
>&2 echo -e "ERROR - ${*:2}"
else
echo -e "${*:2}"
fi
}
function message() { log INFO "$@" ; }
function error() { log ERROR "$@" ; }
function is_empty() {
[[ $# -gt 0 ]] && return 1
return 0
}
function in_array() {
local param=$1 elem;
shift;
for elem in "$@"; do
[[ "$param" = "$elem" ]] && return 0;
done;
return 1
}
function implode() {
local d=${1-} f=${2-}
if shift 2; then
printf %s "$f" "${@/#/$d}"
fi
}
function format_duration {
local t=$1
local d=$((t/60/60/24))
local h=$((t/60/60%24))
local m=$((t/60%60))
local s=$((t%60))
[[ $d -gt 0 ]] && printf '%d days and ' $d
printf '%02d:%02d:%02d' $h $m $s
}
REBUILD_STATUS_FILE=""
function rebuild_status_file() {
[[ ! -e "$REBUILD_DATA_DIR" ]] && mkdir -p "$REBUILD_DATA_DIR"
[[ -z "$REBUILD_STATUS_FILE" ]] && \
REBUILD_STATUS_FILE="$REBUILD_DATA_DIR/status.json"
[[ -e "$REBUILD_STATUS_FILE" ]] || echo '{}' > "$REBUILD_STATUS_FILE"
}
function rebuild_status() {
local output_var=$1
[[ -n "$REBUILD_STATUS_FILE" ]] || rebuild_status_file
if [[ -z "$2" ]]; then
declare -g "$output_var=$( cat "$REBUILD_STATUS_FILE" )"
else
declare -g "$output_var=$(
jq -r --arg container "$2" '.[$container]' < "$REBUILD_STATUS_FILE"
)"
fi
}
function update_rebuild_status() {
local data
if [[ "$1" == "-d" ]]; then
rebuild_status DATA
# shellcheck disable=SC2153
data=$(
jq \
--arg container "$2" \
'del(.[$container])' <<< "$DATA"
)
else
local args=( --arg container "$1" ) arg name value exp=( )
for arg in "${@:2}"; do
name=$( head -n 1 <<< "$arg" | cut -d'=' -f1 )
# shellcheck disable=SC2001
value=$( sed "s/$name=//" <<< "$arg" )
args+=( --arg "$name" "$value" )
exp+=( ".[\$container].${name}=\$${name}" )
done
rebuild_status DATA
data=$(
jq "${args[@]}" "$( implode ' | ' "${exp[@]}" )" <<< "$DATA"
)
fi
cat <<< "$data" > "$REBUILD_STATUS_FILE"
}
function remove_rebuild_status() {
[[ -n "$REBUILD_STATUS_FILE" ]] || rebuild_status_file
[[ -e "$REBUILD_STATUS_FILE" ]] || return 0
debug "Remove previous rebuild status file ($REBUILD_STATUS_FILE) and log files it contains"
for log in $( jq -r '.[] | .log' "$REBUILD_STATUS_FILE" ); do
debug " remove old container log file $log"
rm -f "$log"
done
debug " remove status file"
rm -f "$REBUILD_STATUS_FILE"
}
function usage() {
local error="$1"
[[ -n "$error" ]] && message "$error"
cat << EOF
Usage : $(basename "$0") [-d] [-E /path/to/engine] [container1,...]
-E [path] Force a specific engine (possible values: ${POSSIBLE_ENGINES[@]},
default: $ENGINE)
-x [container] Exclude specified container (could be repeat)
-M [integer] Max number of container checks to run in parallel
(default: $MAX_PARALLEL_CHECKS, 0=no limit)
-f [docker-compose.yml] To check upgrade on docker compose project, specified the path of the
docker-compose.yml file
-b|--build|--rebuild Trigger container build if upgrade detected (only possible if a docker
compose file if provided)
--rebuild-path Specify rebuild data directory path (default: ${REBUILD_DATA_DIR})
--rebuild-cron Start in rebuild cron mode: rebuild containers detected and mark to be
rebuilt on status file.
--deploy-cron Start in deploy cron mode: deploy containers known as rebuilt in status
file.
--check-cron Start in check cron node: check if containers need to be updated and
trigger their rebuild.
-d Debug mode
-l Log file
-C Console logging (even if log file is specify)
-X Enable bash tracing (=set -x)
-h Show this message
EOF
[[ -n "$error" ]] && exit 1
exit 0
}
idx=1
while [[ $idx -le $# ]]; do
OPT=${!idx}
case $OPT in
-d)
DEBUG=1
;;
-C)
CONSOLE=1
;;
-l)
((idx++))
LOG_FILE=${!idx}
;;
-h)
usage
;;
-E)
((idx++))
ENGINE=${!idx}
if [[ ! -x "$ENGINE" ]]; then
in_array "$ENGINE" "${POSSIBLE_ENGINES[@]}" || usage "Invalid engine $ENGINE"
fi
;;
-f)
((idx++))
DOCKERCOMPOSE_FILE=${!idx}
;;
-b|--build|--rebuild)
REBUILD=1
;;
--rebuild-path)
((idx++))
REBUILD_DATA_DIR="${!idx}"
;;
-x)
((idx++))
EXCLUDED_CONTAINERS+=( "${!idx}" )
;;
-M)
((idx++))
MAX_PARALLEL_CHECKS=${!idx}
;;
-X)
set -x
;;
--rebuild-cron)
REBUILD_CRON=1
;;
--deploy-cron)
DEPLOY_CRON=1
;;
--check-cron)
CHECK_CRON=1
;;
*)
ONLY_CONTAINERS+=( "$OPT" )
;;
esac
((idx++))
done
debug "Start with parameters: $*"
is_empty "${ONLY_CONTAINERS[@]}" || debug "Only containers: ${ONLY_CONTAINERS[*]}"
if [[ "$ENGINE" == "auto" ]]; then
debug "Auto-detect engine..."
for engine in "${POSSIBLE_ENGINES[@]}"; do
[[ "$engine" == "auto" ]] && continue
if which "$engine" > /dev/null 2>&1; then
ENGINE="$engine"
break
fi
debug "$engine not found"
done
if [[ -z "$ENGINE" ]]; then
message "UNKNOWN - Fail to auto-detect engine"
exit 3
fi
debug "Auto-detected engine: $ENGINE"
fi
if [[ -n "$DOCKERCOMPOSE_FILE" ]]; then
if [[ ! -e "$DOCKERCOMPOSE_FILE" ]]; then
message "UNKNOWN - Docker compose file not found ($DOCKERCOMPOSE_FILE)"
exit 3
fi
which "${ENGINE}-compose" > /dev/null 2>&1 && \
COMPOSE_BIN="${ENGINE}-compose" || \
COMPOSE_BIN="$ENGINE compose"
debug "Docker compose bin: $COMPOSE_BIN"
fi
if [[ $REBUILD_CRON -eq 1 ]]; then
rebuild_status DATA
mapfile -t to_rebuild < <(
jq -r -c 'to_entries[] | select(
(.value.start_date|not) and (.value.error|not)
) | .key' <<< "$DATA"
)
if [[ ${#to_rebuild[@]} -eq 0 ]]; then
debug "No container need to be rebuild"
exit 0
fi
message "${#to_rebuild[@]} container(s) to rebuild: ${to_rebuild[*]}"
error=0
for container in "${to_rebuild[@]}"; do
log="$REBUILD_DATA_DIR/$container.log"
message " $container: start building image (log=$log)"
start_time=$(current_time)
update_rebuild_status "$container" "start_date=$(now "$start_time")" "log=$log"
"$COMPOSE_BIN" -f "$DOCKERCOMPOSE_FILE" build --no-cache "$container" >> "$log" 2>&1
result=$?
end_time=$(current_time)
(( duration=end_time-start_time ))
duration=$(format_duration "$duration")
container_info=( "end_date=$(now "$end_time")" "duration=$duration" )
if [[ $result -eq 0 ]]; then
message " $container: rebuilt in $duration"
else
error " $container: fail to rebuild image"
container_info+=( "error=fail to rebuild image" )
fi
update_rebuild_status "$container" "${container_info[@]}"
done
message "No more container to rebuild, stop"
exit 0
fi
if [[ $DEPLOY_CRON -eq 1 ]]; then
rebuild_status DATA
mapfile -t to_deploy < <(
jq -r -c \
'to_entries[] | select((.value.end_date) and (.value.error|not)) | .key' <<< "$DATA"
)
if [[ ${#to_deploy[@]} -eq 0 ]]; then
debug "No container need to be deploy"
exit 0
fi
message "${#to_deploy[@]} container(s) to deploy: ${to_deploy[*]}"
error=0
for container in "${to_deploy[@]}"; do
message " $container: deploying..."
log="$REBUILD_DATA_DIR/$container.log"
start_time=$(current_time)
if $COMPOSE_BIN -f "$DOCKERCOMPOSE_FILE" up -d --no-deps "$container" >> "$log" 2>&1; then
message " $container: done (in $(format_duration "$(( $(current_time)-start_time ))"))"
update_rebuild_status -d "$container"
rm -f "$log"
else
error " $container: fail to deploy new container image (in" \
"$(format_duration "$(( $(current_time)-start_time ))"), see log: $log)"
update_rebuild_status "$container" "error=fail to deploy new container image"
error=1
fi
done
message "done"
exit $error
fi
EXIT_CODE=0
declare -A CONTAINER_STATUS_FILE
declare -A CONTAINER_PID
declare -A UP_TO_DATE
declare -A ERRORS
declare -a UNKNOWNS
declare -A UPGRADABLE_CONTAINERS
CHECKED_CONTAINERS=( )
debug "List running containers..."
if [[ -n "$DOCKERCOMPOSE_FILE" ]]; then
RUNNING_CONTAINERS=$(
$COMPOSE_BIN -f "$DOCKERCOMPOSE_FILE" ps --format '{{.Service}}' | tr '\n' ' '
)
else
RUNNING_CONTAINERS=$( $ENGINE ps --format '{{.Names}}' | tr '\n' ' ' )
fi
debug "Running containers: $RUNNING_CONTAINERS"
function exec_in_container() {
local container=$1
shift;
if [[ -n "$DOCKERCOMPOSE_FILE" ]]; then
$COMPOSE_BIN -f "$DOCKERCOMPOSE_FILE" exec "$container" "$@"
return $?
fi
$ENGINE exec "$container" "$@"
return $?
}
# Implement check inside a function to allow running it in parallel
# Parameters : [container] [output file]
function check_container() {
local container="$1" output_file="$2" status="" check_plugin check_plugin_cmd=() status ex
for check_plugin in "${!CHECK_PLUGINS[@]}"; do
if ! exec_in_container "$container" test -e "$check_plugin" > /dev/null 2>&1; then
debug "$container - Plugin $check_plugin not found"
continue
fi
debug "$container - Plugin $check_plugin found, use it"
read -ra check_plugin_cmd <<< "${CHECK_PLUGINS[${check_plugin}]}"
status="$( exec_in_container "$container" "${check_plugin_cmd[@]}" 2>&1 )"
ex=$?
debug "$container - Plugin output: $status"
debug "$container - Plugin exit code: $ex"
break
done
if [[ -z "$status" ]]; then
debug "$container - No check plugin found"
status="UNKNOWN - No check plugin available"
ex=3
fi
echo -e "$status" > "$output_file"
return $ex
}
debug "Trigger check of all selected containers..."
for container in $RUNNING_CONTAINERS; do
if ! is_empty "${ONLY_CONTAINERS[@]}" && ! in_array "$container" "${ONLY_CONTAINERS[@]}"; then
debug "$container - Ignored"
continue
fi
if in_array "$container" "${EXCLUDED_CONTAINERS[@]}"; then
debug "$container - Excluded"
continue
fi
if [[ "$MAX_PARALLEL_CHECKS" -gt 0 ]] && \
[[ "$(jobs | wc -l)" -ge "$MAX_PARALLEL_CHECKS" ]]; then
debug "Max parallel checks count reached. Waiting some check ending"
wait -n
debug "Some check ended, continue"
fi
CHECKED_CONTAINERS+=( "$container" )
CONTAINER_STATUS_FILE+=( ["$container"]=$( mktemp ) )
check_container "$container" "${CONTAINER_STATUS_FILE[$container]}" & CONTAINER_PID+=( ["$container"]=$! )
done
debug "Wait for each individual container check and handle their result..."
for container in "${!CONTAINER_PID[@]}"; do
pid=${CONTAINER_PID[$container]}
debug "$container - Waiting for PID ${pid}..."
wait "$pid"
ex=$?
debug "$container - Check return ${ex}"
STATUS=$( cat "${CONTAINER_STATUS_FILE[$container]}" )
rm -f "${CONTAINER_STATUS_FILE[$container]}"
if [[ $ex -eq 0 ]]; then
UP_TO_DATE+=( ["$container"]=$STATUS )
else
ERRORS+=( ["$container"]=$STATUS )
if [[ $ex -ge 3 ]]; then
UNKNOWNS+=( "$container" )
else
UPGRADABLE_CONTAINERS+=( ["$container"]="$STATUS" )
fi
fi
[[ $EXIT_CODE -ge $ex ]] && continue
[[ $ex -gt 3 ]] && ex=3
EXIT_CODE=$ex
done
NOTFOUNDS=()
if ! is_empty "${ONLY_CONTAINERS[@]}"; then
for container in "${ONLY_CONTAINERS[@]}"; do
if ! in_array "$container" "${CHECKED_CONTAINERS[@]}"; then
debug "$container - Container not found"
ERRORS+=( ["$container"]="Container not found" )
NOTFOUNDS+=( "$container" )
EXIT_CODE=3
fi
done
fi
debug "Final exit code: $EXIT_CODE"
debug "Check containers (${#CHECKED_CONTAINERS[@]}): $( implode ", " "${CHECKED_CONTAINERS[@]}" )"
debug "Up-to-date containers (${#UP_TO_DATE[@]}): $( implode ", " "${!UP_TO_DATE[@]}" )"
debug "Upgradable containers (${#UPGRADABLE_CONTAINERS[@]}): $( implode ", " "${!UPGRADABLE_CONTAINERS[@]}" )"
debug "Containers with errors (${#ERRORS[@]}): $( implode ", " "${!ERRORS[@]}" )"
debug "Not found containers (${#NOTFOUNDS[@]}): $( implode ", " "${NOTFOUNDS[@]}" )"
if [[ $CHECK_CRON -eq 0 ]]; then
# Compute performance data
(( CONTAINER_COUNTS=${#CHECKED_CONTAINERS[@]}+${#NOTFOUNDS[@]} ))
PERF_DATA=(
"uptodate_containers=${#UP_TO_DATE[@]};;;0;$CONTAINER_COUNTS"
"upgradable_containers=${#UPGRADABLE_CONTAINERS[@]};;;0;$CONTAINER_COUNTS"
"containers_with_errors=${#ERRORS[@]};1;;0;$CONTAINER_COUNTS"
"unknown_state_containers=${#UNKNOWNS[@]};;;0;$CONTAINER_COUNTS"
)
# Compute performance data as string
PERF_DATA_TXT="$( implode " " "${PERF_DATA[@]}" )"
# Display check result message
case $EXIT_CODE in
0)
message "OK - All ${#UP_TO_DATE[@]} container(s) are up-to-date |$PERF_DATA_TXT"
;;
1)
message "WARNING - ${#ERRORS[@]} container(s) need to be updated |$PERF_DATA_TXT"
;;
2)
message "CRITICAL - ${#ERRORS[@]} container(s) need to be updated |$PERF_DATA_TXT"
;;
*)
message "UNKNOWN - fail to retrieve status of ${#UNKNOWNS[@]} container(s) |$PERF_DATA_TXT"
;;
esac
fi
# Trigger container build (if need, enabled and docker compose file is provided)
if [[ $REBUILD -eq 1 ]]; then
[[ -n "$REBUILD_STATUS_FILE" ]] || rebuild_status_file
debug "Check if we have to trigger some rebuild (status file: $REBUILD_STATUS_FILE)"
if [ ${#UPGRADABLE_CONTAINERS[@]} -eq 0 ]; then
debug "No upgradable container to rebuild"
remove_rebuild_status
elif [[ -z "$DOCKERCOMPOSE_FILE" ]]; then
message
message "WARNING: No docker compose file provided, can't trigger rebuild of following" \
"container(s):"
message "- $( implode "\n- " "${UPGRADABLE_CONTAINERS[@]}" )"
else
message "Rebuilding containers:"
REBUILT_CONTAINERS=()
for container in "${!UPGRADABLE_CONTAINERS[@]}"; do
need_rebuild=0
rebuild_status CONTAINER_DATA "$container"
# shellcheck disable=SC2153
if [[ "$CONTAINER_DATA" != "null" ]]; then
debug "$container: data='$CONTAINER_DATA'"
trigger_date=$( jq -r .trigger_date <<< "$CONTAINER_DATA" )
start_date=$( jq -r .start_date <<< "$CONTAINER_DATA" )
end_date=$( jq -r .end_date <<< "$CONTAINER_DATA" )
log=$( jq -r .log <<< "$CONTAINER_DATA" )
if [[ "$start_date" == "null" ]]; then
debug "$container: build triggered but not yet started"
message "- $container: rebuild triggered on $trigger_date and not yet started"
elif [[ "$end_date" == "null" ]]; then
debug "$container: rebuild triggered on $trigger_date and started on" \
"$start_date, but not yet finish"
message "- $container: rebuild triggered on $trigger_date and started on" \
"$start_date, but not yet finish (log: $log)"
else
duration=$( jq -r .duration <<< "$CONTAINER_DATA" )
debug "$container: rebuilt in $duration on $start_date (finish on $end_date)"
prev_status=$( jq -r .status <<< "$CONTAINER_DATA" )
if [[ "$prev_status" == "${UPGRADABLE_CONTAINERS[$container]}" ]]; then
error=$( jq -r .error <<< "$CONTAINER_DATA" )
if [[ "$error" != "null" ]]; then
message "- $container: $error (log: $log)"
else
message "- $container: already rebuilt in $duration (rebuild" \
"triggered on $trigger_date, started on $start_date and finish" \
"at $end_date, log: $log)"
REBUILT_CONTAINERS+=( "$container" )
fi
else
update_rebuild_status -d "$container"
message "- $container: upgrade status change since last rebuild, rebuild" \
"it again"
need_rebuild=1
fi
fi
else
debug "$container: not found in status file"
need_rebuild=1
fi
if [[ $need_rebuild -eq 1 ]]; then
update_rebuild_status "$container" \
"trigger_date=$(now)" "status=${UPGRADABLE_CONTAINERS[$container]}"
message "- $container: rebuild triggered"
else
debug "$container: rebuild not need"
fi
done
# Handle rebuilt containers
if [[ ${#REBUILT_CONTAINERS[@]} -gt 0 ]] && [[ $CHECK_CRON -eq 0 ]]; then
message
message "Some containers are ready to be recreated and restarted."
message "Run the following command to do it:"
message
message " $COMPOSE_BIN -f $DOCKERCOMPOSE_FILE up -d --no-deps ${REBUILT_CONTAINERS[*]}"
fi
message
fi
fi
# Display details, starting by errors
for container in "${!ERRORS[@]}"; do
message "${container}" - "${ERRORS[${container}]}"
done
if [[ $CHECK_CRON -eq 0 ]]; then
for container in "${!UP_TO_DATE[@]}"; do
message "${container}" - "${UP_TO_DATE[${container}]}"
done
fi
exit $EXIT_CODE
# vim: shiftwidth=4 tabstop=4 expandtab