Compare commits

...

4 commits

Author SHA1 Message Date
Benjamin Renard
97f4875aee
Configure CI to run tests
All checks were successful
Run tests / tests (push) Successful in 1m33s
2024-07-15 15:25:40 +02:00
Benjamin Renard
255430803f
Code cleaning using shellexec and add pre-commit hook 2024-07-15 15:25:23 +02:00
Benjamin Renard
3aaa4a6b18
Fix wording 2024-07-15 12:50:27 +02:00
Benjamin Renard
a099e2f833
Retrieve master connection info using "SHOW primary_conninfo" SQL query instead of parsing recovery.conf 2024-07-15 12:49:55 +02:00
3 changed files with 110 additions and 123 deletions

View file

@ -0,0 +1,14 @@
---
name: Run tests
on: [push]
jobs:
tests:
runs-on: docker
container:
image: docker.io/brenard/python-pre-commit:latest
options: "--workdir /src"
steps:
- name: Check out repository code
uses: actions/checkout@v4
- name: Run pre-commit
run: pre-commit run --all-files

View file

@ -13,13 +13,17 @@ repos:
- --ignore-regex=.*codespell-ignore$ - --ignore-regex=.*codespell-ignore$
# - --write-changes # Uncomment to write changes # - --write-changes # Uncomment to write changes
exclude_types: [csv, json] exclude_types: [csv, json]
- repo: https://github.com/adrienverge/yamllint
rev: v1.32.0
hooks:
- id: yamllint
ignore: .github/
- repo: https://github.com/pre-commit/mirrors-prettier - repo: https://github.com/pre-commit/mirrors-prettier
rev: v2.7.1 rev: v2.7.1
hooks: hooks:
- id: prettier - id: prettier
args: ["--print-width", "100"] args: ["--print-width", "100"]
- repo: https://github.com/adrienverge/yamllint
rev: v1.32.0
hooks:
- id: yamllint
ignore: .github/
- repo: https://github.com/shellcheck-py/shellcheck-py
rev: v0.10.0.1
hooks:
- id: shellcheck

View file

@ -49,7 +49,7 @@ DEBUG=0
function usage () { function usage () {
ERROR="$1" ERROR="$1"
[ -n "$ERROR" ] && echo -e "$ERROR\n" [[ -n "$ERROR" ]] && echo -e "$ERROR\n"
cat << EOF cat << EOF
Usage: $0 [-d] [-h] [options] Usage: $0 [-d] [-h] [options]
-u pg_user Specify local Postgres user (Default: try to auto-detect or use $DEFAULT_PG_USER) -u pg_user Specify local Postgres user (Default: try to auto-detect or use $DEFAULT_PG_USER)
@ -74,11 +74,10 @@ Usage: $0 [-d] [-h] [options]
-d Debug mode -d Debug mode
-h Show this message -h Show this message
EOF EOF
[ -n "$ERROR" ] && exit 1 || exit 0 [[ -n "$ERROR" ]] && exit 1 || exit 0
} }
while getopts "hu:b:B:V:m:r:U:p:D:C:w:c:e:E:d" OPTION while getopts "hu:b:B:V:m:r:U:p:D:C:w:c:e:E:d" OPTION; do
do
case $OPTION in case $OPTION in
u) u)
PG_USER=$OPTARG PG_USER=$OPTARG
@ -117,12 +116,12 @@ do
REPLAY_CRITICAL_DELAY=$OPTARG REPLAY_CRITICAL_DELAY=$OPTARG
;; ;;
e) e)
[ "$OPTARG" != "sync" -a "$OPTARG" != "async" ] && \ [[ "$OPTARG" != "sync" ]] && [[ "$OPTARG" != "async" ]] && \
usage "Invalid expected replication state '$OPTARG'. Possible values: sync or async." usage "Invalid expected replication state '$OPTARG'. Possible values: sync or async."
EXPECTED_SYNC_STATE=$OPTARG EXPECTED_SYNC_STATE=$OPTARG
;; ;;
E) E)
[ "$OPTARG" != "master" -a "$OPTARG" != "hot-standby" -a "$OPTARG" != "auto" ] && \ [[ "$OPTARG" != "master" ]] && [[ "$OPTARG" != "hot-standby" ]] && [[ "$OPTARG" != "auto" ]] && \
usage "Invalid expected mode '$OPTARG'. Possible values: master, hot-standby or auto." usage "Invalid expected mode '$OPTARG'. Possible values: master, hot-standby or auto."
EXPECTED_MODE=$OPTARG EXPECTED_MODE=$OPTARG
;; ;;
@ -139,8 +138,7 @@ do
done done
function debug() { function debug() {
if [ $DEBUG -eq 1 ] if [[ $DEBUG -eq 1 ]]; then
then
>&2 echo -e "[DEBUG] $1" >&2 echo -e "[DEBUG] $1"
fi fi
} }
@ -163,64 +161,62 @@ EXPECTED_MODE = $EXPECTED_MODE
" "
# Auto-detect PostgreSQL information using pg_lsclusters # Auto-detect PostgreSQL information using pg_lsclusters
if [ -x "$PG_LSCLUSTER_BIN" ] if [[ -x "$PG_LSCLUSTER_BIN" ]]; then
then
PG_CLUSTER=$( $PG_LSCLUSTER_BIN -h 2>/dev/null|head -n1 ) PG_CLUSTER=$( $PG_LSCLUSTER_BIN -h 2>/dev/null|head -n1 )
if [ -n "$PG_CLUSTER" ] if [[ -n "$PG_CLUSTER" ]]; then
then
debug "pg_lsclusters output:\n\t$PG_CLUSTER" debug "pg_lsclusters output:\n\t$PG_CLUSTER"
# Output example: # Output example:
# 9.6 main 5432 online,recovery postgres /var/lib/postgresql/9.6/main /var/log/postgresql/postgresql-9.6-main.log # 9.6 main 5432 online,recovery postgres /var/lib/postgresql/9.6/main /var/log/postgresql/postgresql-9.6-main.log
[ -z "$PG_VERSION" ] && PG_VERSION=$( echo "$PG_CLUSTER"|awk -F ' +' '{print $1}' ) [[ -z "$PG_VERSION" ]] && PG_VERSION=$( echo "$PG_CLUSTER"|awk -F ' +' '{print $1}' )
[ -z "$PG_DEFAULT_PORT" ] && PG_DEFAULT_PORT=$( echo "$PG_CLUSTER"|awk -F ' +' '{print $3}' ) [[ -z "$PG_DEFAULT_PORT" ]] && PG_DEFAULT_PORT=$( echo "$PG_CLUSTER"|awk -F ' +' '{print $3}' )
[ -z "$PG_USER" ] && PG_USER=$( echo "$PG_CLUSTER"|awk -F ' +' '{print $5}' ) [[ -z "$PG_USER" ]] && PG_USER=$( echo "$PG_CLUSTER"|awk -F ' +' '{print $5}' )
[ -z "$PG_MAIN" ] && PG_MAIN=$( echo "$PG_CLUSTER"|awk -F ' +' '{print $6}' ) [[ -z "$PG_MAIN" ]] && PG_MAIN=$( echo "$PG_CLUSTER"|awk -F ' +' '{print $6}' )
fi fi
else else
debug "pg_lsclusters not found ($PG_LSCLUSTER_BIN): parameters auto-detection disabled" debug "pg_lsclusters not found ($PG_LSCLUSTER_BIN): parameters auto-detection disabled"
fi fi
# If auto-detection failed, use default values # If auto-detection failed, use default values
[ -z "$PG_USER" ] && PG_USER="$DEFAULT_PG_USER" [[ -z "$PG_USER" ]] && PG_USER="$DEFAULT_PG_USER"
[ -z "$PG_VERSION" ] && PG_VERSION="$DEFAULT_PG_VERSION" [[ -z "$PG_VERSION" ]] && PG_VERSION="$DEFAULT_PG_VERSION"
[ -z "$PG_MAIN" ] && PG_MAIN="$DEFAULT_PG_MAIN" [[ -z "$PG_MAIN" ]] && PG_MAIN="$DEFAULT_PG_MAIN"
[ -z "$PG_DEFAULT_PORT" ] && PG_DEFAULT_PORT="$DEFAULT_PG_PORT" [[ -z "$PG_DEFAULT_PORT" ]] && PG_DEFAULT_PORT="$DEFAULT_PG_PORT"
# Check PG_USER # Check PG_USER
[ -z "$PG_USER" ] && echo "UNKNOWN: Postgres user not specified" && exit 3 [[ -z "$PG_USER" ]] && echo "UNKNOWN: Postgres user not specified" && exit 3
id "$PG_USER" > /dev/null 2>&1 id "$PG_USER" > /dev/null 2>&1 || { echo "UNKNOWN: Invalid Postgres user ($PG_USER)"; exit 3; }
[ $? -ne 0 ] && echo "UNKNOWN: Invalid Postgres user ($PG_USER)" && exit 3
# Check PSQL_BIN # Check PSQL_BIN
[ ! -x "$PSQL_BIN" ] && echo "UNKNOWN: Invalid psql bin path ($PSQL_BIN)" && exit 3 [[ ! -x "$PSQL_BIN" ]] && echo "UNKNOWN: Invalid psql bin path ($PSQL_BIN)" && exit 3
# Check PG_MAIN # Check PG_MAIN
[ ! -d "$PG_MAIN/" ] && echo "UNKNOWN: Invalid Postgres main directory path ($PG_MAIN)" && exit 3 [[ ! -d "$PG_MAIN/" ]] && echo "UNKNOWN: Invalid Postgres main directory path ($PG_MAIN)" && exit 3
# Check RECOVERY_CONF # Check RECOVERY_CONF
if [ -z "$RECOVERY_CONF" ]; then if [[ -z "$RECOVERY_CONF" ]]; then
[ $PG_VERSION -le 11 ] && RECOVERY_CONF_FILENAME="recovery.conf" || RECOVERY_CONF_FILENAME="postgresql.auto.conf" [[ $PG_VERSION -le 11 ]] && RECOVERY_CONF_FILENAME="recovery.conf" || RECOVERY_CONF_FILENAME="postgresql.auto.conf"
RECOVERY_CONF="$PG_MAIN/$RECOVERY_CONF_FILENAME" RECOVERY_CONF="$PG_MAIN/$RECOVERY_CONF_FILENAME"
else else
RECOVERY_CONF_FILENAME=$( basename "$RECOVERY_CONF" ) RECOVERY_CONF_FILENAME=$( basename "$RECOVERY_CONF" )
fi fi
# Check PG_DEFAULT_PORT # Check PG_DEFAULT_PORT
[ $( echo "$PG_DEFAULT_PORT"|grep -c -E '^[0-9]*$' ) -ne 1 ] && "UNKNOWN: Postgres default master TCP port must be an integer." && exit 3 [[ $( grep -c -E '^[0-9]*$' <<< "$PG_DEFAULT_PORT" ) -ne 1 ]] && \
echo "UNKNOWN: Postgres default master TCP port must be an integer." && exit 3
# If PG_DB is not provided with -D parameter, use PG_USER as default value # If PG_DB is not provided with -D parameter, use PG_USER as default value
[ -z "$PG_DB" ] && PG_DB="$PG_USER" [[ -z "$PG_DB" ]] && PG_DB="$PG_USER"
function psql_get () { function psql_get () {
sql="$1" sql="$1"
debug "Exec 'echo \"$sql\"|sudo -u $PG_USER $PSQL_BIN -d \"$PG_DB\" -w -t -P format=unaligned" debug "Exec 'echo \"$sql\"|sudo -u $PG_USER $PSQL_BIN -d \"$PG_DB\" -w -t -P format=unaligned"
echo "$sql"|sudo -u $PG_USER $PSQL_BIN -d "$PG_DB" -w -t -P format=unaligned sudo -u "$PG_USER" "$PSQL_BIN" -d "$PG_DB" -w -t -P format=unaligned <<< "$sql"
} }
function psql_master_get () { function psql_master_get () {
sql="$1" sql="$1"
debug "Exec 'echo \"$sql\"|sudo -u $PG_USER $PSQL_BIN -U $M_USER -h $M_HOST -w -p $M_PORT -d $PG_DB -t -P format=unaligned" debug "Exec 'echo \"$sql\"|sudo -u $PG_USER $PSQL_BIN -U $M_USER -h $M_HOST -w -p $M_PORT -d $PG_DB -t -P format=unaligned"
echo "$sql"|sudo -u $PG_USER $PSQL_BIN -U $M_USER -h $M_HOST -w -p $M_PORT -d $PG_DB -t -P format=unaligned sudo -u "$PG_USER" "$PSQL_BIN" -U "$M_USER" -h "$M_HOST" -w -p "$M_PORT" -d "$PG_DB" -t -P format=unaligned <<< "$sql"
} }
debug "Running options: debug "Running options:
@ -239,8 +235,7 @@ REPLAY_CRITICAL_DELAY = $REPLAY_CRITICAL_DELAY
" "
# Set some stuff to PostgreSQL version # Set some stuff to PostgreSQL version
if [ $( echo "$PG_VERSION < 10" |bc -l ) -eq 1 ] if [[ $( bc -l <<< "$PG_VERSION < 10" ) -eq 1 ]]; then
then
pg_last_wal_receive_lsn='pg_last_xlog_receive_location()' pg_last_wal_receive_lsn='pg_last_xlog_receive_location()'
pg_last_wal_replay_lsn='pg_last_xlog_replay_location()' pg_last_wal_replay_lsn='pg_last_xlog_replay_location()'
pg_current_wal_lsn='pg_current_xlog_location()' pg_current_wal_lsn='pg_current_xlog_location()'
@ -257,28 +252,28 @@ else
fi fi
# Postgres is running ? # Postgres is running ?
if [ $DEBUG -eq 0 ] if [[ $DEBUG -eq 0 ]]; then
then
psql_get '\q' 2> /dev/null psql_get '\q' 2> /dev/null
is_running=$?
else else
psql_get '\q' psql_get '\q'
is_running=$?
fi fi
if [ $? -ne 0 ] if [[ $is_running -ne 0 ]]; then
then
echo "CRITICAL: Postgres is not running !" echo "CRITICAL: Postgres is not running !"
exit 2 exit 2
fi fi
debug "Postgres is running" debug "Postgres is running"
RECOVERY_MODE=0 RECOVERY_MODE=0
[ "$( psql_get 'SELECT pg_is_in_recovery();' )" == "t" ] && RECOVERY_MODE=1 [[ "$( psql_get 'SELECT pg_is_in_recovery();' )" == "t" ]] && RECOVERY_MODE=1
if [ "$EXPECTED_MODE" == "auto" ]; then if [[ "$EXPECTED_MODE" == "auto" ]]; then
debug "Auto-detect mode" debug "Auto-detect mode"
if [[ $RECOVERY_MODE -eq 1 ]]; then if [[ $RECOVERY_MODE -eq 1 ]]; then
debug "Postgres is in recovery mode. Hot-standby mode." debug "Postgres is in recovery mode. Hot-standby mode."
EXPECTED_MODE="hot-standby" EXPECTED_MODE="hot-standby"
elif [ -f $RECOVERY_CONF -a $( grep -cE '^\s*primary_conninfo' $RECOVERY_CONF ) -gt 0 ]; then elif [[ -f $RECOVERY_CONF ]] && [[ $( grep -cE '^\s*primary_conninfo' "$RECOVERY_CONF" ) -gt 0 ]]; then
debug "File $RECOVERY_CONF_FILENAME found and contain primary_conninfo. Hot-standby mode." debug "File $RECOVERY_CONF_FILENAME found and contain primary_conninfo. Hot-standby mode."
EXPECTED_MODE="hot-standby" EXPECTED_MODE="hot-standby"
else else
@ -288,12 +283,10 @@ if [ "$EXPECTED_MODE" == "auto" ]; then
fi fi
fi fi
if [ "$EXPECTED_MODE" == "hot-standby" ] if [[ "$EXPECTED_MODE" == "hot-standby" ]]; then
then
# Check recovery mode # Check recovery mode
if [ $RECOVERY_MODE -ne 1 ] if [[ $RECOVERY_MODE -ne 1 ]]; then
then echo "CRITICAL: Not in recovery mode while $RECOVERY_CONF_FILENAME file found !"
echo "CRITICAL: Not in recovery mode while recovery.conf file found !"
exit 2 exit 2
fi fi
debug "Postgres is in recovery mode" debug "Postgres is in recovery mode"
@ -305,40 +298,35 @@ then
debug "Last replayed LSN: $LAST_REPLAYED_LSN" debug "Last replayed LSN: $LAST_REPLAYED_LSN"
# Get master connection information from recovery.conf file # Get master connection information from primary_conninfo configuration parameter
MASTER_CONN_INFOS=$( egrep '^ *primary_conninfo' $RECOVERY_CONF|sed "s/^ *primary_conninfo *= *\(.\+\) *$/\1/" ) MASTER_CONN_INFOS=$( psql_get "SHOW primary_conninfo" )
if [ ! -n "$MASTER_CONN_INFOS" ] if [[ -z "$MASTER_CONN_INFOS" ]]; then
then echo "UNKNOWN: Can't retrieve master connection information from primary_conninfo configuration parameter"
echo "UNKNOWN: Can't retrieve master connection information form recovery.conf file"
exit 3 exit 3
fi fi
debug "Master connection information: $MASTER_CONN_INFOS" debug "Master connection information: $MASTER_CONN_INFOS"
M_HOST=$( echo "$MASTER_CONN_INFOS"| grep 'host=' | sed 's/^.*host= *\([0-9a-zA-Z.-]\+\) *.*$/\1/' ) M_HOST=$( grep 'host=' <<< "$MASTER_CONN_INFOS" | sed 's/^.*host= *\([0-9a-zA-Z.-]\+\) *.*$/\1/' )
if [ ! -n "$M_HOST" ] if [[ -z "$M_HOST" ]]; then
then echo "UNKNOWN: Can't retrieve master host from primary_conninfo configuration parameter"
echo "UNKNOWN: Can't retrieve master host from recovery.conf file"
exit 3 exit 3
fi fi
debug "Master host: $M_HOST" debug "Master host: $M_HOST"
M_PORT=$( echo "$MASTER_CONN_INFOS"| grep 'port=' | sed 's/^.*port= *\([0-9a-zA-Z.-]\+\) *.*$/\1/' ) M_PORT=$( grep 'port=' <<< "$MASTER_CONN_INFOS" | sed 's/^.*port= *\([0-9a-zA-Z.-]\+\) *.*$/\1/' )
if [ ! -n "$M_PORT" ] if [[ -z "$M_PORT" ]]; then
then
debug "Master port not specified, use default: $PG_DEFAULT_PORT" debug "Master port not specified, use default: $PG_DEFAULT_PORT"
M_PORT=$PG_DEFAULT_PORT M_PORT=$PG_DEFAULT_PORT
else else
debug "Master port: $M_PORT" debug "Master port: $M_PORT"
fi fi
if [ -n "$PG_MASTER_USER" ] if [[ -n "$PG_MASTER_USER" ]]; then
then
debug "Master user provided by command-line, use it: $PG_MASTER_USER" debug "Master user provided by command-line, use it: $PG_MASTER_USER"
M_USER="$PG_MASTER_USER" M_USER="$PG_MASTER_USER"
else else
M_USER=$( echo "$MASTER_CONN_INFOS"| grep 'user=' | sed 's/^.*user= *\([0-9a-zA-Z.-]\+\) *.*$/\1/' ) M_USER=$( grep 'user=' <<< "$MASTER_CONN_INFOS" | sed 's/^.*user= *\([0-9a-zA-Z.-]\+\) *.*$/\1/' )
if [ ! -n "$M_USER" ] if [[ -z "$M_USER" ]]; then
then
debug "Master user not specified, use default: $PG_USER" debug "Master user not specified, use default: $PG_USER"
M_USER=$PG_USER M_USER=$PG_USER
else else
@ -346,16 +334,13 @@ then
fi fi
fi fi
M_APP_NAME=$( echo "$MASTER_CONN_INFOS"| grep 'application_name=' | sed "s/^.*application_name=[ \'\"]*\([^ \'\"]\+\)[ \'\"]*.*$/\1/" ) M_APP_NAME=$( grep 'application_name=' <<< "$MASTER_CONN_INFOS" | sed "s/^.*application_name=[ \'\"]*\([^ \'\"]\+\)[ \'\"]*.*$/\1/" )
if [ ! -n "$M_APP_NAME" ] if [[ -z "$M_APP_NAME" ]]; then
then if [[ $PG_VERSION -ge 12 ]]; then
if [ $PG_VERSION -ge 12 ]
then
debug "Master application name not specified, use cluster_name if defined" debug "Master application name not specified, use cluster_name if defined"
CLUSTER_NAME=$( psql_get "SELECT current_setting('cluster_name')" ) CLUSTER_NAME=$( psql_get "SELECT current_setting('cluster_name')" )
debug "Cluster name: $CLUSTER_NAME" debug "Cluster name: $CLUSTER_NAME"
if [ -n "$CLUSTER_NAME" ] if [[ -n "$CLUSTER_NAME" ]]; then
then
M_APP_NAME=$CLUSTER_NAME M_APP_NAME=$CLUSTER_NAME
else else
debug "Cluster name not defined, use default: $PG_DEFAULT_APP_NAME" debug "Cluster name not defined, use default: $PG_DEFAULT_APP_NAME"
@ -371,48 +356,42 @@ then
# Get current replication state information from master # Get current replication state information from master
M_CUR_REPL_STATE_INFO="$( psql_master_get "SELECT state, sync_state, $sent_lsn AS sent_lsn, $write_lsn AS write_lsn FROM pg_stat_replication WHERE application_name='$M_APP_NAME';" )" M_CUR_REPL_STATE_INFO="$( psql_master_get "SELECT state, sync_state, $sent_lsn AS sent_lsn, $write_lsn AS write_lsn FROM pg_stat_replication WHERE application_name='$M_APP_NAME';" )"
if [ ! -n "$M_CUR_REPL_STATE_INFO" ] if [[ -z "$M_CUR_REPL_STATE_INFO" ]]; then
then
echo "UNKNOWN: Can't retrieve current replication state information from master server" echo "UNKNOWN: Can't retrieve current replication state information from master server"
exit 3 exit 3
fi fi
debug "Master current replication state:\n\tstate|sync_state|sent_lsn|write_lsn\n\t$M_CUR_REPL_STATE_INFO" debug "Master current replication state:\n\tstate|sync_state|sent_lsn|write_lsn\n\t$M_CUR_REPL_STATE_INFO"
M_CUR_STATE=$( echo "$M_CUR_REPL_STATE_INFO"|cut -d'|' -f1 ) M_CUR_STATE=$( cut -d'|' -f1 <<< "$M_CUR_REPL_STATE_INFO" )
debug "Master current state: $M_CUR_STATE" debug "Master current state: $M_CUR_STATE"
if [ "$M_CUR_STATE" != "streaming" ] if [[ "$M_CUR_STATE" != "streaming" ]]; then
then
echo "CRITICAL: this host is not in streaming state according to master host (current state = '$M_CUR_STATE')" echo "CRITICAL: this host is not in streaming state according to master host (current state = '$M_CUR_STATE')"
exit 2 exit 2
fi fi
M_CUR_SYNC_STATE=$( echo "$M_CUR_REPL_STATE_INFO"|cut -d'|' -f2 ) M_CUR_SYNC_STATE=$( cut -d'|' -f2 <<< "$M_CUR_REPL_STATE_INFO" )
debug "Master current sync state: $M_CUR_SYNC_STATE" debug "Master current sync state: $M_CUR_SYNC_STATE"
if [ "$M_CUR_SYNC_STATE" != "$EXPECTED_SYNC_STATE" ] if [[ "$M_CUR_SYNC_STATE" != "$EXPECTED_SYNC_STATE" ]]; then
then
echo "CRITICAL: unexpected replication state '$M_CUR_SYNC_STATE' (expected state = '$EXPECTED_SYNC_STATE')" echo "CRITICAL: unexpected replication state '$M_CUR_SYNC_STATE' (expected state = '$EXPECTED_SYNC_STATE')"
exit 2 exit 2
fi fi
M_CUR_SENT_LSN=$( echo "$M_CUR_REPL_STATE_INFO"|cut -d'|' -f3 ) M_CUR_SENT_LSN=$( cut -d'|' -f3 <<< "$M_CUR_REPL_STATE_INFO" )
M_CUR_WRITED_LSN=$( echo "$M_CUR_REPL_STATE_INFO"|cut -d'|' -f4 ) M_CUR_WRITED_LSN=$( cut -d'|' -f4 <<< "$M_CUR_REPL_STATE_INFO" )
debug "Master current last sent/writed LSN: '$M_CUR_SENT_LSN' / '$M_CUR_WRITED_LSN'" debug "Master current last sent/writed LSN: '$M_CUR_SENT_LSN' / '$M_CUR_WRITED_LSN'"
# Check current master LSN vs last received LSN # Check current master LSN vs last received LSN
if [ "$CHECK_CUR_MASTER_LSN" == "1" ] if [[ "$CHECK_CUR_MASTER_LSN" == "1" ]]; then
then
# Get current LSN from master # Get current LSN from master
M_CUR_LSN="$( psql_master_get "SELECT $pg_current_wal_lsn" )" M_CUR_LSN="$( psql_master_get "SELECT $pg_current_wal_lsn" )"
if [ ! -n "$M_CUR_LSN" ] if [[ -z "$M_CUR_LSN" ]]; then
then
echo "UNKNOWN: Can't retrieve current LSN from master server" echo "UNKNOWN: Can't retrieve current LSN from master server"
exit 3 exit 3
fi fi
debug "Master current LSN: $M_CUR_LSN" debug "Master current LSN: $M_CUR_LSN"
# Master current LSN is the last received LSN ? # Master current LSN is the last received LSN ?
if [ "$M_CUR_LSN" != "$LAST_RECEIVED_LSN" ] if [[ "$M_CUR_LSN" != "$LAST_RECEIVED_LSN" ]]; then
then
echo "CRITICAL: Master current LSN is not the last received LSN" echo "CRITICAL: Master current LSN is not the last received LSN"
exit 2 exit 2
fi fi
@ -420,18 +399,15 @@ then
fi fi
# The last received LSN is the last replayed ? # The last received LSN is the last replayed ?
if [ "$LAST_RECEIVED_LSN" != "$LAST_REPLAYED_LSN" ] if [[ "$LAST_RECEIVED_LSN" != "$LAST_REPLAYED_LSN" ]]; then
then
debug "/!\ The last received LSN is NOT the last replayed LSN ('$M_CUR_LSN' / '$LAST_REPLAYED_LSN')" debug "/!\ The last received LSN is NOT the last replayed LSN ('$M_CUR_LSN' / '$LAST_REPLAYED_LSN')"
REPLAY_DELAY="$( psql_get 'SELECT EXTRACT(EPOCH FROM now() - pg_last_xact_replay_timestamp());' )" REPLAY_DELAY="$( psql_get 'SELECT EXTRACT(EPOCH FROM now() - pg_last_xact_replay_timestamp());' )"
debug "Replay delay is $REPLAY_DELAY second(s)" debug "Replay delay is $REPLAY_DELAY second(s)"
if [ $( echo "$REPLAY_DELAY >= $REPLAY_CRITICAL_DELAY"|bc -l ) -gt 0 ] if [[ $( bc -l <<< "$REPLAY_DELAY >= $REPLAY_CRITICAL_DELAY" ) -gt 0 ]]; then
then
echo "CRITICAL: last received LSN is not the last replayed ('$LAST_RECEIVED_LSN' / '$LAST_REPLAYED_LSN') and replay delay is $REPLAY_DELAY second(s)" echo "CRITICAL: last received LSN is not the last replayed ('$LAST_RECEIVED_LSN' / '$LAST_REPLAYED_LSN') and replay delay is $REPLAY_DELAY second(s)"
exit 2 exit 2
fi fi
if [ $( echo "$REPLAY_DELAY >= $REPLAY_WARNING_DELAY"|bc -l ) -gt 0 ] if [[ $( bc -l <<< "$REPLAY_DELAY >= $REPLAY_WARNING_DELAY" ) -gt 0 ]]; then
then
echo "WARNING: last received LSN is not the last replay file ('$LAST_RECEIVED_LSN' / '$LAST_REPLAYED_LSN') and replay delay is $REPLAY_DELAY second(s)" echo "WARNING: last received LSN is not the last replay file ('$LAST_RECEIVED_LSN' / '$LAST_REPLAYED_LSN') and replay delay is $REPLAY_DELAY second(s)"
exit 1 exit 1
fi fi
@ -440,8 +416,7 @@ then
debug "Last received LSN is the last replayed file" debug "Last received LSN is the last replayed file"
# The master last sent LSN is the last received (and synced) ? # The master last sent LSN is the last received (and synced) ?
if [ "$M_CUR_SENT_LSN" != "$LAST_RECEIVED_LSN" ] if [[ "$M_CUR_SENT_LSN" != "$LAST_RECEIVED_LSN" ]]; then
then
echo "WARNING: master last sent LSN is not already received (and synced to disk) by slave. May be we have some network delay or load on slave" echo "WARNING: master last sent LSN is not already received (and synced to disk) by slave. May be we have some network delay or load on slave"
echo "Master last sent LSN: $M_CUR_SENT_LSN" echo "Master last sent LSN: $M_CUR_SENT_LSN"
echo "Slave last received (and synced to disk) LSN: $LAST_RECEIVED_LSN" echo "Slave last received (and synced to disk) LSN: $LAST_RECEIVED_LSN"
@ -451,22 +426,19 @@ then
echo "OK: Hot-standby server is up-to-date" echo "OK: Hot-standby server is up-to-date"
echo "Replication state: $M_CUR_SYNC_STATE" echo "Replication state: $M_CUR_SYNC_STATE"
echo "Last sent/writed LSN: '$M_CUR_SENT_LSN' / '$M_CUR_WRITED_LSN'" echo "Last sent/writed LSN: '$M_CUR_SENT_LSN' / '$M_CUR_WRITED_LSN'"
[ "$LAST_RECEIVED_LSN" != "$LAST_REPLAYED_LSN" ] && echo "Replay delay: ${REPLAY_DELAY}s" [[ "$LAST_RECEIVED_LSN" != "$LAST_REPLAYED_LSN" ]] && echo "Replay delay: ${REPLAY_DELAY}s"
exit 0 exit 0
elif [ "$EXPECTED_MODE" == "master" ] elif [[ "$EXPECTED_MODE" == "master" ]]; then
then
# Check recovery mode # Check recovery mode
if [ $RECOVERY_MODE -eq 1 ] if [[ $RECOVERY_MODE -eq 1 ]]; then
then echo "CRITICAL: In recovery mode while expected mode is master!"
echo "CRITICAL: In recovery mode while recovery.conf file not found !"
exit 2 exit 2
fi fi
debug "Postgres is not in recovery mode" debug "Postgres is not in recovery mode"
# Retrieve current lsn # Retrieve current lsn
CURRENT_LSN=$( psql_get "SELECT $pg_current_wal_lsn" ) CURRENT_LSN=$( psql_get "SELECT $pg_current_wal_lsn" )
if [ -z "$CURRENT_LSN" ] if [[ -z "$CURRENT_LSN" ]]; then
then
echo "UNKNOWN: Fail to retrieve current LSN (Log Sequence Number)" echo "UNKNOWN: Fail to retrieve current LSN (Log Sequence Number)"
exit 3 exit 3
fi fi
@ -482,33 +454,30 @@ then
FROM pg_stat_replication FROM pg_stat_replication
) AS s2 ) AS s2
) AS s1" ) ) AS s1" )
if [ ! -n "$STANDBY_CLIENTS" ] if [[ -z "$STANDBY_CLIENTS" ]]; then
then
echo "WARNING: no stand-by client connected" echo "WARNING: no stand-by client connected"
exit 1 exit 1
fi fi
debug "Stand-by client(s):\n\t$( echo -e "$STANDBY_CLIENTS"|sed 's/\n/\n\t/' )" debug "Stand-by client(s):\n\t${STANDBY_CLIENTS//$'\n'/\\n\\t}"
STANDBY_CLIENTS_TXT="" STANDBY_CLIENTS_TXT=""
STANDBY_CLIENTS_COUNT=0 STANDBY_CLIENTS_COUNT=0
CURRENT_LSN_IS_LAST_SENT=1 CURRENT_LSN_IS_LAST_SENT=1
for line in $STANDBY_CLIENTS for line in $STANDBY_CLIENTS; do
do (( STANDBY_CLIENTS_COUNT+=1 ))
let STANDBY_CLIENTS_COUNT=STANDBY_CLIENTS_COUNT+1
NAME=$( echo $line|cut -d '|' -f 1 ) NAME=$( cut -d '|' -f 1 <<< "$line" )
IP=$( echo $line|cut -d '|' -f 2 ) IP=$( cut -d '|' -f 2 <<< "$line" )
SENT_LSN=$( echo $line|cut -d '|' -f 3 ) SENT_LSN=$( cut -d '|' -f 3 <<< "$line" )
WRITED_LSN=$( echo $line|cut -d '|' -f 4 ) WRITED_LSN=$( cut -d '|' -f 4 <<< "$line" )
STATE=$( echo $line|cut -d '|' -f 5 ) STATE=$( cut -d '|' -f 5 <<< "$line" )
SYNC_STATE=$( echo $line|cut -d '|' -f 6 ) SYNC_STATE=$( cut -d '|' -f 6 <<< "$line" )
LAG=$( echo $line|cut -d '|' -f 7 ) LAG=$( cut -d '|' -f 7 <<< "$line" )
STANDBY_CLIENTS_TXT="$STANDBY_CLIENTS_TXT\n$NAME ($IP): $STATE/$SYNC_STATE (LSN: sent='$SENT_LSN' / writed='$WRITED_LSN', Lag: ${LAG}b)" STANDBY_CLIENTS_TXT="$STANDBY_CLIENTS_TXT\n$NAME ($IP): $STATE/$SYNC_STATE (LSN: sent='$SENT_LSN' / writed='$WRITED_LSN', Lag: ${LAG}b)"
[ "$SENT_LSN" != "$CURRENT_LSN" ] && CURRENT_LSN_IS_LAST_SENT=0 [[ "$SENT_LSN" != "$CURRENT_LSN" ]] && CURRENT_LSN_IS_LAST_SENT=0
done done
if [ $CURRENT_LSN_IS_LAST_SENT -eq 1 ] if [[ $CURRENT_LSN_IS_LAST_SENT -eq 1 ]]; then
then
echo "OK: $STANDBY_CLIENTS_COUNT stand-by client(s) connected" echo "OK: $STANDBY_CLIENTS_COUNT stand-by client(s) connected"
EXIT_CODE=0 EXIT_CODE=0
else else