Code cleaning

This commit is contained in:
Benjamin Renard 2024-07-16 13:43:26 +02:00
parent 269b92415f
commit bc078f83e8
Signed by: bn8
GPG key ID: 3E2E1CE1907115BC

View file

@ -48,29 +48,37 @@ EXPECTED_MODE=auto
DEBUG=0
function usage () {
ERROR="$1"
ERROR="$*"
[[ -n "$ERROR" ]] && echo -e "$ERROR\n"
cat << EOF
Usage: $0 [-d] [-h] [options]
-u pg_user Specify local Postgres user (Default: try to auto-detect or use $DEFAULT_PG_USER)
-u pg_user Specify local Postgres user (Default: try to auto-detect or
use $DEFAULT_PG_USER)
-b psql_bin Specify psql binary path (Default: $PSQL_BIN)
-B pg_lsclusters_bin Specify pg_lsclusters binary path (Default: $PG_LSCLUSTER_BIN)
-V pg_version Specify Postgres version (Default: try to auto-detect or use $DEFAULT_PG_VERSION)
-m pg_main Specify Postgres main directory path (Default: try to auto-detect or use
$DEFAULT_PG_MAIN)
-V pg_version Specify Postgres version (Default: try to auto-detect or
use $DEFAULT_PG_VERSION)
-m pg_main Specify Postgres main directory path (Default: try to auto-detect or
use $DEFAULT_PG_MAIN)
-r recovery_conf Specify Postgres recovery configuration file path
(Default: [PG_MAIN]/recovery.conf on PG <= 11, [PG_MAIN]/postgresql.auto.conf on PG >= 12)
-U pg_master_user Specify Postgres user to use on master (Default: user from recovery.conf file)
-p pg_port Specify default Postgres master TCP port (Default: same as local PostgreSQL
port if detected or use $DEFAULT_PG_PORT)
-D dbname Specify DB name on Postgres master/slave to connect on (Default: PG_USER, must
match with .pgpass one is used)
-C 1/0 Enable or disable check if the current LSN of the master host is the same
of the last received LSN (Default: $CHECK_CUR_MASTER_LSN)
-w replay_warn_delay Specify the replay warning delay in second (Default: $REPLAY_WARNING_DELAY)
-c replay_crit_delay Specify the replay critical delay in second (Default: $REPLAY_CRITICAL_DELAY)
-e expected_sync_state The expected replication state ('sync' or 'async', default: $EXPECTED_SYNC_STATE)
-E expected_mode The expected mode ('master', 'hot-standby' or 'auto', default: '$EXPECTED_MODE')
(Default: [PG_MAIN]/recovery.conf on PG <= 11,
[PG_MAIN]/postgresql.auto.conf on PG >= 12)
-U pg_master_user Specify Postgres user to use on master (Default: user from recovery.conf
file)
-p pg_port Specify default Postgres master TCP port (Default: same as local
PostgreSQL port if detected or use $DEFAULT_PG_PORT)
-D dbname Specify DB name on Postgres master/slave to connect on (Default:
PG_USER, must match with .pgpass one is used)
-C 1/0 Enable or disable check if the current LSN of the master host is the
same of the last received LSN (Default: $CHECK_CUR_MASTER_LSN)
-w replay_warn_delay Specify the replay warning delay in second
(Default: $REPLAY_WARNING_DELAY)
-c replay_crit_delay Specify the replay critical delay in second
(Default: $REPLAY_CRITICAL_DELAY)
-e expected_sync_state The expected replication state ('sync' or 'async',
default: $EXPECTED_SYNC_STATE)
-E expected_mode The expected mode ('master', 'hot-standby' or 'auto',
default: '$EXPECTED_MODE')
-d Debug mode
-h Show this message
EOF
@ -117,12 +125,15 @@ while getopts "hu:b:B:V:m:r:U:p:D:C:w:c:e:E:d" OPTION; do
;;
e)
[[ "$OPTARG" != "sync" ]] && [[ "$OPTARG" != "async" ]] && \
usage "Invalid expected replication state '$OPTARG'. Possible values: sync or async."
usage "Invalid expected replication state '$OPTARG'." \
"Possible values: sync or async."
EXPECTED_SYNC_STATE=$OPTARG
;;
E)
[[ "$OPTARG" != "master" ]] && [[ "$OPTARG" != "hot-standby" ]] && [[ "$OPTARG" != "auto" ]] && \
usage "Invalid expected mode '$OPTARG'. Possible values: master, hot-standby or auto."
[[ "$OPTARG" != "master" ]] && [[ "$OPTARG" != "hot-standby" ]] && \
[[ "$OPTARG" != "auto" ]] && \
usage "Invalid expected mode '$OPTARG'. Possible values: master, hot-standby" \
"or auto."
EXPECTED_MODE=$OPTARG
;;
d)
@ -139,7 +150,7 @@ done
function debug() {
if [[ $DEBUG -eq 1 ]]; then
>&2 echo -e "[DEBUG] $1"
>&2 echo -e "[DEBUG] $*"
fi
}
@ -162,15 +173,19 @@ EXPECTED_MODE = $EXPECTED_MODE
# Auto-detect PostgreSQL information using pg_lsclusters
if [[ -x "$PG_LSCLUSTER_BIN" ]]; then
PG_CLUSTER=$( $PG_LSCLUSTER_BIN -h 2>/dev/null|head -n1 )
PG_CLUSTER=$( $PG_LSCLUSTER_BIN -h 2>/dev/null | head -n1 )
if [[ -n "$PG_CLUSTER" ]]; then
debug "pg_lsclusters output:\n\t$PG_CLUSTER"
# Output example:
# 9.6 main 5432 online,recovery postgres /var/lib/postgresql/9.6/main /var/log/postgresql/postgresql-9.6-main.log
[[ -z "$PG_VERSION" ]] && PG_VERSION=$( echo "$PG_CLUSTER"|awk -F ' +' '{print $1}' )
[[ -z "$PG_DEFAULT_PORT" ]] && PG_DEFAULT_PORT=$( echo "$PG_CLUSTER"|awk -F ' +' '{print $3}' )
[[ -z "$PG_USER" ]] && PG_USER=$( echo "$PG_CLUSTER"|awk -F ' +' '{print $5}' )
[[ -z "$PG_MAIN" ]] && PG_MAIN=$( echo "$PG_CLUSTER"|awk -F ' +' '{print $6}' )
# 9.6 main 5432 online,recovery postgres /var/lib/postgresql/9.6/main \
# /var/log/postgresql/postgresql-9.6-main.log
# 13 main 5432 online,recovery,pacemaker postgres /var/lib/postgresql/13/main \
# /var/log/postgresql/postgresql-13-main.log
[[ -z "$PG_VERSION" ]] && PG_VERSION=$( awk -F ' +' '{print $1}' <<< "$PG_CLUSTER" )
[[ -z "$PG_DEFAULT_PORT" ]] && \
PG_DEFAULT_PORT=$( awk -F ' +' '{print $3}' <<< "$PG_CLUSTER" )
[[ -z "$PG_USER" ]] && PG_USER=$( awk -F ' +' '{print $5}' <<< "$PG_CLUSTER" )
[[ -z "$PG_MAIN" ]] && PG_MAIN=$( awk -F ' +' '{print $6}' <<< "$PG_CLUSTER" )
fi
else
debug "pg_lsclusters not found ($PG_LSCLUSTER_BIN): parameters auto-detection disabled"
@ -194,7 +209,11 @@ id "$PG_USER" > /dev/null 2>&1 || { echo "UNKNOWN: Invalid Postgres user ($PG_US
# Check RECOVERY_CONF
if [[ -z "$RECOVERY_CONF" ]]; then
[[ $PG_VERSION -le 11 ]] && RECOVERY_CONF_FILENAME="recovery.conf" || RECOVERY_CONF_FILENAME="postgresql.auto.conf"
if [[ $PG_VERSION -le 11 ]]; then
RECOVERY_CONF_FILENAME="recovery.conf"
else
RECOVERY_CONF_FILENAME="postgresql.auto.conf"
fi
RECOVERY_CONF="$PG_MAIN/$RECOVERY_CONF_FILENAME"
else
RECOVERY_CONF_FILENAME=$( basename "$RECOVERY_CONF" )
@ -208,15 +227,17 @@ fi
[[ -z "$PG_DB" ]] && PG_DB="$PG_USER"
function psql_get () {
sql="$1"
debug "Exec 'echo \"$sql\"|sudo -u $PG_USER $PSQL_BIN -d \"$PG_DB\" -w -t -P format=unaligned"
local sql="$*"
debug "Exec 'sudo -u $PG_USER $PSQL_BIN -d \"$PG_DB\" -w -t -P format=unaligned <<< \"$sql\""
sudo -u "$PG_USER" "$PSQL_BIN" -d "$PG_DB" -w -t -P format=unaligned <<< "$sql"
}
function psql_master_get () {
sql="$1"
debug "Exec 'echo \"$sql\"|sudo -u $PG_USER $PSQL_BIN -U $M_USER -h $M_HOST -w -p $M_PORT -d $PG_DB -t -P format=unaligned"
sudo -u "$PG_USER" "$PSQL_BIN" -U "$M_USER" -h "$M_HOST" -w -p "$M_PORT" -d "$PG_DB" -t -P format=unaligned <<< "$sql"
local sql="$*"
debug "Exec 'sudo -u $PG_USER $PSQL_BIN -U $M_USER -h $M_HOST -w -p $M_PORT -d $PG_DB -t" \
"-P format=unaligned <<< \"$sql\""
sudo -u "$PG_USER" "$PSQL_BIN" \
-U "$M_USER" -h "$M_HOST" -w -p "$M_PORT" -d "$PG_DB" -t -P format=unaligned <<< "$sql"
}
debug "Running options:
@ -273,7 +294,8 @@ if [[ "$EXPECTED_MODE" == "auto" ]]; then
if [[ $RECOVERY_MODE -eq 1 ]]; then
debug "Postgres is in recovery mode. Hot-standby mode."
EXPECTED_MODE="hot-standby"
elif [[ -f $RECOVERY_CONF ]] && [[ $( grep -cE '^\s*primary_conninfo' "$RECOVERY_CONF" ) -gt 0 ]]; then
elif [[ -f $RECOVERY_CONF ]] && \
[[ $( grep -cE '^\s*primary_conninfo' "$RECOVERY_CONF" ) -gt 0 ]]; then
debug "File $RECOVERY_CONF_FILENAME found and contain primary_conninfo. Hot-standby mode."
EXPECTED_MODE="hot-standby"
else
@ -301,19 +323,24 @@ if [[ "$EXPECTED_MODE" == "hot-standby" ]]; then
# Get master connection information from primary_conninfo configuration parameter
MASTER_CONN_INFOS=$( psql_get "SHOW primary_conninfo" )
if [[ -z "$MASTER_CONN_INFOS" ]]; then
echo "UNKNOWN: Can't retrieve master connection information from primary_conninfo configuration parameter"
echo "UNKNOWN: Can't retrieve master connection information from primary_conninfo" \
"configuration parameter"
exit 3
fi
debug "Master connection information: $MASTER_CONN_INFOS"
M_HOST=$( grep 'host=' <<< "$MASTER_CONN_INFOS" | sed 's/^.*host= *\([0-9a-zA-Z.-]\+\) *.*$/\1/' )
M_HOST=$(
grep 'host=' <<< "$MASTER_CONN_INFOS" | sed 's/^.*host= *\([0-9a-zA-Z.-]\+\) *.*$/\1/'
)
if [[ -z "$M_HOST" ]]; then
echo "UNKNOWN: Can't retrieve master host from primary_conninfo configuration parameter"
exit 3
fi
debug "Master host: $M_HOST"
M_PORT=$( grep 'port=' <<< "$MASTER_CONN_INFOS" | sed 's/^.*port= *\([0-9a-zA-Z.-]\+\) *.*$/\1/' )
M_PORT=$(
grep 'port=' <<< "$MASTER_CONN_INFOS" | sed 's/^.*port= *\([0-9a-zA-Z.-]\+\) *.*$/\1/'
)
if [[ -z "$M_PORT" ]]; then
debug "Master port not specified, use default: $PG_DEFAULT_PORT"
M_PORT=$PG_DEFAULT_PORT
@ -325,7 +352,9 @@ if [[ "$EXPECTED_MODE" == "hot-standby" ]]; then
debug "Master user provided by command-line, use it: $PG_MASTER_USER"
M_USER="$PG_MASTER_USER"
else
M_USER=$( grep 'user=' <<< "$MASTER_CONN_INFOS" | sed 's/^.*user= *\([0-9a-zA-Z.-]\+\) *.*$/\1/' )
M_USER=$(
grep 'user=' <<< "$MASTER_CONN_INFOS" | sed 's/^.*user= *\([0-9a-zA-Z.-]\+\) *.*$/\1/'
)
if [[ -z "$M_USER" ]]; then
debug "Master user not specified, use default: $PG_USER"
M_USER=$PG_USER
@ -334,7 +363,10 @@ if [[ "$EXPECTED_MODE" == "hot-standby" ]]; then
fi
fi
M_APP_NAME=$( grep 'application_name=' <<< "$MASTER_CONN_INFOS" | sed "s/^.*application_name=[ \'\"]*\([^ \'\"]\+\)[ \'\"]*.*$/\1/" )
M_APP_NAME=$(
grep 'application_name=' <<< "$MASTER_CONN_INFOS" |
sed "s/^.*application_name=[ \'\"]*\([^ \'\"]\+\)[ \'\"]*.*$/\1/"
)
if [[ -z "$M_APP_NAME" ]]; then
if [[ $PG_VERSION -ge 12 ]]; then
debug "Master application name not specified, use cluster_name if defined"
@ -355,24 +387,31 @@ if [[ "$EXPECTED_MODE" == "hot-standby" ]]; then
fi
# Get current replication state information from master
M_CUR_REPL_STATE_INFO="$( psql_master_get "SELECT state, sync_state, $sent_lsn AS sent_lsn, $write_lsn AS write_lsn FROM pg_stat_replication WHERE application_name='$M_APP_NAME';" )"
M_CUR_REPL_STATE_INFO="$(
psql_master_get \
"SELECT state, sync_state, $sent_lsn AS sent_lsn, $write_lsn AS write_lsn" \
"FROM pg_stat_replication WHERE application_name='$M_APP_NAME';"
)"
if [[ -z "$M_CUR_REPL_STATE_INFO" ]]; then
echo "UNKNOWN: Can't retrieve current replication state information from master server"
exit 3
fi
debug "Master current replication state:\n\tstate|sync_state|sent_lsn|write_lsn\n\t$M_CUR_REPL_STATE_INFO"
debug "Master current replication state:\n" \
"\tstate|sync_state|sent_lsn|write_lsn\n\t$M_CUR_REPL_STATE_INFO"
M_CUR_STATE=$( cut -d'|' -f1 <<< "$M_CUR_REPL_STATE_INFO" )
debug "Master current state: $M_CUR_STATE"
if [[ "$M_CUR_STATE" != "streaming" ]]; then
echo "CRITICAL: this host is not in streaming state according to master host (current state = '$M_CUR_STATE')"
echo "CRITICAL: this host is not in streaming state according to master host" \
"(current state = '$M_CUR_STATE')"
exit 2
fi
M_CUR_SYNC_STATE=$( cut -d'|' -f2 <<< "$M_CUR_REPL_STATE_INFO" )
debug "Master current sync state: $M_CUR_SYNC_STATE"
if [[ "$M_CUR_SYNC_STATE" != "$EXPECTED_SYNC_STATE" ]]; then
echo "CRITICAL: unexpected replication state '$M_CUR_SYNC_STATE' (expected state = '$EXPECTED_SYNC_STATE')"
echo "CRITICAL: unexpected replication state '$M_CUR_SYNC_STATE'" \
"(expected state = '$EXPECTED_SYNC_STATE')"
exit 2
fi
@ -400,15 +439,22 @@ if [[ "$EXPECTED_MODE" == "hot-standby" ]]; then
# The last received LSN is the last replayed ?
if [[ "$LAST_RECEIVED_LSN" != "$LAST_REPLAYED_LSN" ]]; then
debug "/!\ The last received LSN is NOT the last replayed LSN ('$M_CUR_LSN' / '$LAST_REPLAYED_LSN')"
REPLAY_DELAY="$( psql_get 'SELECT EXTRACT(EPOCH FROM now() - pg_last_xact_replay_timestamp());' )"
debug "/!\ The last received LSN is NOT the last replayed LSN" \
"('$M_CUR_LSN' / '$LAST_REPLAYED_LSN')"
REPLAY_DELAY="$(
psql_get 'SELECT EXTRACT(EPOCH FROM now() - pg_last_xact_replay_timestamp());'
)"
debug "Replay delay is $REPLAY_DELAY second(s)"
if [[ $( bc -l <<< "$REPLAY_DELAY >= $REPLAY_CRITICAL_DELAY" ) -gt 0 ]]; then
echo "CRITICAL: last received LSN is not the last replayed ('$LAST_RECEIVED_LSN' / '$LAST_REPLAYED_LSN') and replay delay is $REPLAY_DELAY second(s)"
echo "CRITICAL: last received LSN is not the last replayed" \
"('$LAST_RECEIVED_LSN' / '$LAST_REPLAYED_LSN') and" \
"replay delay is $REPLAY_DELAY second(s)"
exit 2
fi
if [[ $( bc -l <<< "$REPLAY_DELAY >= $REPLAY_WARNING_DELAY" ) -gt 0 ]]; then
echo "WARNING: last received LSN is not the last replay file ('$LAST_RECEIVED_LSN' / '$LAST_REPLAYED_LSN') and replay delay is $REPLAY_DELAY second(s)"
echo "WARNING: last received LSN is not the last replay file" \
"('$LAST_RECEIVED_LSN' / '$LAST_REPLAYED_LSN') and" \
"replay delay is $REPLAY_DELAY second(s)"
exit 1
fi
debug "Replay delay is not worrying"
@ -417,7 +463,8 @@ if [[ "$EXPECTED_MODE" == "hot-standby" ]]; then
# The master last sent LSN is the last received (and synced) ?
if [[ "$M_CUR_SENT_LSN" != "$LAST_RECEIVED_LSN" ]]; then
echo "WARNING: master last sent LSN is not already received (and synced to disk) by slave. May be we have some network delay or load on slave"
echo "WARNING: master last sent LSN is not already received (and synced to disk) by slave."
echo "May be we have some network delay or load on slave"
echo "Master last sent LSN: $M_CUR_SENT_LSN"
echo "Slave last received (and synced to disk) LSN: $LAST_RECEIVED_LSN"
exit 1
@ -445,27 +492,32 @@ elif [[ "$EXPECTED_MODE" == "master" ]]; then
debug "Current LSN: $CURRENT_LSN"
# Check standby client
STANDBY_CLIENTS=$( psql_get "SELECT application_name, client_addr, sent_lsn, write_lsn, state, sync_state, current_lag
FROM (
SELECT application_name, client_addr, sent_lsn, write_lsn, state, sync_state, current_lag
FROM (
SELECT application_name, client_addr, $sent_lsn AS sent_lsn, $write_lsn AS write_lsn, state, sync_state,
$pg_wal_lsn_diff($pg_current_wal_lsn, $write_lsn) AS current_lag
FROM pg_stat_replication
) AS s2
) AS s1" )
STANDBY_CLIENTS=$(
psql_get \
"SELECT
application_name, client_addr, sent_lsn, write_lsn, state, sync_state, current_lag
FROM (
SELECT
application_name, client_addr, sent_lsn, write_lsn, state, sync_state,
current_lag
FROM (
SELECT
application_name, client_addr, $sent_lsn AS sent_lsn,
$write_lsn AS write_lsn, state, sync_state,
$pg_wal_lsn_diff($pg_current_wal_lsn, $write_lsn) AS current_lag
FROM pg_stat_replication
) AS s2
) AS s1"
)
if [[ -z "$STANDBY_CLIENTS" ]]; then
echo "WARNING: no stand-by client connected"
exit 1
fi
debug "Stand-by client(s):\n\t${STANDBY_CLIENTS//$'\n'/\\n\\t}"
STANDBY_CLIENTS_TXT=""
STANDBY_CLIENTS_COUNT=0
STANDBY_CLIENTS_ROWS=()
CURRENT_LSN_IS_LAST_SENT=1
for line in $STANDBY_CLIENTS; do
(( STANDBY_CLIENTS_COUNT+=1 ))
NAME=$( cut -d '|' -f 1 <<< "$line" )
IP=$( cut -d '|' -f 2 <<< "$line" )
SENT_LSN=$( cut -d '|' -f 3 <<< "$line" )
@ -473,20 +525,24 @@ elif [[ "$EXPECTED_MODE" == "master" ]]; then
STATE=$( cut -d '|' -f 5 <<< "$line" )
SYNC_STATE=$( cut -d '|' -f 6 <<< "$line" )
LAG=$( cut -d '|' -f 7 <<< "$line" )
STANDBY_CLIENTS_TXT="$STANDBY_CLIENTS_TXT\n$NAME ($IP): $STATE/$SYNC_STATE (LSN: sent='$SENT_LSN' / writed='$WRITED_LSN', Lag: ${LAG}b)"
STANDBY_CLIENTS_ROW="$NAME ($IP): $STATE/$SYNC_STATE"
STANDBY_CLIENTS_ROW+=" (LSN: sent='$SENT_LSN' / writed='$WRITED_LSN', Lag: ${LAG}b"
STANDBY_CLIENTS_ROWS+=( "$STANDBY_CLIENTS_ROW" )
[[ "$SENT_LSN" != "$CURRENT_LSN" ]] && CURRENT_LSN_IS_LAST_SENT=0
done
if [[ $CURRENT_LSN_IS_LAST_SENT -eq 1 ]]; then
echo "OK: $STANDBY_CLIENTS_COUNT stand-by client(s) connected"
echo "OK: ${#STANDBY_CLIENTS_ROWS[@]} stand-by client(s) connected"
EXIT_CODE=0
else
echo "WARNING: current master LSN is not the last sent to stand-by client(s) connected. May be we have some load ?"
echo "WARNING: current master LSN is not the last sent to stand-by client(s) connected." \
"May be we have some load ?"
EXIT_CODE=1
fi
echo "Current master LSN: $CURRENT_LSN"
echo -e "$STANDBY_CLIENTS_TXT"
IFS=$'\n'
echo "${STANDBY_CLIENTS_ROWS[*]}"
exit $EXIT_CODE
else
echo "UNKNOWN - Invalid mode '$EXPECTED_MODE'"