check_pg_streaming_replication/check_pg_streaming_replication

551 lines
20 KiB
Text
Raw Normal View History

2012-03-14 16:04:14 +01:00
#!/bin/bash
# vim: tabstop=4 shiftwidth=4 softtabstop=4 expandtab
2012-03-14 16:04:14 +01:00
#
# Nagios plugin to check Postgresql streamin replication state
2020-11-04 15:37:18 +01:00
#
2012-03-14 16:04:14 +01:00
# Could be use on Master or on standby node
#
2020-11-04 15:37:18 +01:00
# Requirements:
#
2020-11-04 19:19:22 +01:00
# Some CLI tools: sudo, awk, sed, bc, psql and pg_lscluster
2012-03-14 16:04:14 +01:00
#
# On master node: Slave nodes must be able to connect with user from recovery.conf /
# `postgresql.auto.conf` (or user specify using -U) to database with the same
# name (or another specified with -D) as trust (or using password specified in
# ~/.pgpass). This user must have SUPERUSER privilege (need to get replication
# details).
2012-03-14 16:04:14 +01:00
#
# On standby node: PG_USER must be able to connect locally on the database with the same name
# (or another specified with -D) as trust (or using password specified in
# ~/.pgpass).
2012-03-14 16:04:14 +01:00
#
2020-11-04 15:37:18 +01:00
# Author: Benjamin Renard <brenard@easter-eggs.com>
# Date: Mon, 03 Jun 2024 15:31:29 +0200
# Source: https://gitea.zionetrix.net/bn8/check_pg_streaming_replication
2020-11-04 15:37:18 +01:00
# SPDX-License-Identifier: GPL-3.0-or-later
2012-03-14 16:04:14 +01:00
#
DEFAULT_PG_USER=postgres
DEFAULT_PG_VERSION=9.1
DEFAULT_PG_MAIN=/var/lib/postgresql/$PG_VERSION/main
DEFAULT_PG_PORT=5432
PG_USER=""
PG_VERSION=""
PG_MAIN=""
PG_MASTER_USER=""
2012-03-14 16:04:14 +01:00
PSQL_BIN=/usr/bin/psql
PG_LSCLUSTER_BIN=/usr/bin/pg_lsclusters
2015-04-27 16:24:21 +02:00
RECOVERY_CONF=""
PG_DEFAULT_PORT=""
PG_DEFAULT_APP_NAME=$( hostname )
PG_DB=""
2020-11-04 16:20:41 +01:00
CHECK_CUR_MASTER_LSN=1
REPLAY_WARNING_DELAY=3
REPLAY_CRITICAL_DELAY=5
EXPECTED_SYNC_STATE=sync
EXPECTED_MODE=auto
2012-03-14 16:04:14 +01:00
DEBUG=0
2015-04-27 16:24:21 +02:00
function usage () {
2024-07-16 13:43:26 +02:00
ERROR="$*"
[[ -n "$ERROR" ]] && echo -e "$ERROR\n"
cat << EOF
2020-11-04 15:41:04 +01:00
Usage: $0 [-d] [-h] [options]
2024-07-16 13:43:26 +02:00
-u pg_user Specify local Postgres user (Default: try to auto-detect or
use $DEFAULT_PG_USER)
-b psql_bin Specify psql binary path (Default: $PSQL_BIN)
-B pg_lsclusters_bin Specify pg_lsclusters binary path (Default: $PG_LSCLUSTER_BIN)
2024-07-16 13:43:26 +02:00
-V pg_version Specify Postgres version (Default: try to auto-detect or
use $DEFAULT_PG_VERSION)
-m pg_main Specify Postgres main directory path (Default: try to auto-detect or
use $DEFAULT_PG_MAIN)
-r recovery_conf Specify Postgres recovery configuration file path
2024-07-16 13:43:26 +02:00
(Default: [PG_MAIN]/recovery.conf on PG <= 11,
[PG_MAIN]/postgresql.auto.conf on PG >= 12)
-U pg_master_user Specify Postgres user to use on master (Default: user from recovery.conf
file)
-p pg_port Specify default Postgres master TCP port (Default: same as local
PostgreSQL port if detected or use $DEFAULT_PG_PORT)
-D dbname Specify DB name on Postgres master/slave to connect on (Default:
PG_USER, must match with .pgpass one is used)
-C 1/0 Enable or disable check if the current LSN of the master host is the
same of the last received LSN (Default: $CHECK_CUR_MASTER_LSN)
-w replay_warn_delay Specify the replay warning delay in second
(Default: $REPLAY_WARNING_DELAY)
-c replay_crit_delay Specify the replay critical delay in second
(Default: $REPLAY_CRITICAL_DELAY)
-e expected_sync_state The expected replication state ('sync' or 'async',
default: $EXPECTED_SYNC_STATE)
-E expected_mode The expected mode ('master', 'hot-standby' or 'auto',
default: '$EXPECTED_MODE')
-d Debug mode
-h Show this message
2015-04-27 16:24:21 +02:00
EOF
[[ -n "$ERROR" ]] && exit 1 || exit 0
2015-04-27 16:24:21 +02:00
}
while getopts "hu:b:B:V:m:r:U:p:D:C:w:c:e:E:d" OPTION; do
case $OPTION in
u)
PG_USER=$OPTARG
;;
b)
PSQL_BIN=$OPTARG
;;
B)
PG_LSCLUSTER_BIN=$OPTARG
;;
V)
PG_VERSION=$OPTARG
;;
m)
PG_MAIN=$OPTARG
;;
r)
RECOVERY_CONF=$OPTARG
;;
U)
PG_MASTER_USER=$OPTARG
;;
p)
PG_DEFAULT_PORT=$OPTARG
;;
D)
PG_DB=$OPTARG
;;
C)
CHECK_CUR_MASTER_LSN=$OPTARG
;;
w)
REPLAY_WARNING_DELAY=$OPTARG
;;
c)
REPLAY_CRITICAL_DELAY=$OPTARG
;;
e)
[[ "$OPTARG" != "sync" ]] && [[ "$OPTARG" != "async" ]] && \
2024-07-16 13:43:26 +02:00
usage "Invalid expected replication state '$OPTARG'." \
"Possible values: sync or async."
EXPECTED_SYNC_STATE=$OPTARG
;;
E)
2024-07-16 13:43:26 +02:00
[[ "$OPTARG" != "master" ]] && [[ "$OPTARG" != "hot-standby" ]] && \
[[ "$OPTARG" != "auto" ]] && \
usage "Invalid expected mode '$OPTARG'. Possible values: master, hot-standby" \
"or auto."
EXPECTED_MODE=$OPTARG
;;
d)
DEBUG=1
;;
h)
usage
;;
\?)
echo -n "Unknown option"
usage
esac
2015-04-27 16:24:21 +02:00
done
function debug() {
if [[ $DEBUG -eq 1 ]]; then
2024-07-16 13:43:26 +02:00
>&2 echo -e "[DEBUG] $*"
fi
}
2020-11-04 15:41:04 +01:00
debug "Starting options (before handling auto-detection/default values):
PG_VERSION = $PG_VERSION
PG_DB = $PG_DB
PG_USER = $PG_USER
PSQL_BIN = $PSQL_BIN
PG_LSCLUSTER_BIN = $PG_LSCLUSTER_BIN
PG_MAIN = $PG_MAIN
RECOVERY_CONF = $RECOVERY_CONF
PG_DEFAULT_PORT = $PG_DEFAULT_PORT
PG_DEFAULT_APP_NAME = $PG_DEFAULT_APP_NAME
2020-11-04 16:20:41 +01:00
CHECK_CUR_MASTER_LSN = $CHECK_CUR_MASTER_LSN
REPLAY_WARNING_DELAY = $REPLAY_WARNING_DELAY
REPLAY_CRITICAL_DELAY = $REPLAY_CRITICAL_DELAY
EXPECTED_SYNC_STATE = $EXPECTED_SYNC_STATE
EXPECTED_MODE = $EXPECTED_MODE
"
# Auto-detect PostgreSQL information using pg_lsclusters
if [[ -x "$PG_LSCLUSTER_BIN" ]]; then
2024-07-16 13:43:26 +02:00
PG_CLUSTER=$( $PG_LSCLUSTER_BIN -h 2>/dev/null | head -n1 )
if [[ -n "$PG_CLUSTER" ]]; then
debug "pg_lsclusters output:\n\t$PG_CLUSTER"
# Output example:
2024-07-16 13:43:26 +02:00
# 9.6 main 5432 online,recovery postgres /var/lib/postgresql/9.6/main \
# /var/log/postgresql/postgresql-9.6-main.log
# 13 main 5432 online,recovery,pacemaker postgres /var/lib/postgresql/13/main \
# /var/log/postgresql/postgresql-13-main.log
[[ -z "$PG_VERSION" ]] && PG_VERSION=$( awk -F ' +' '{print $1}' <<< "$PG_CLUSTER" )
[[ -z "$PG_DEFAULT_PORT" ]] && \
PG_DEFAULT_PORT=$( awk -F ' +' '{print $3}' <<< "$PG_CLUSTER" )
[[ -z "$PG_USER" ]] && PG_USER=$( awk -F ' +' '{print $5}' <<< "$PG_CLUSTER" )
[[ -z "$PG_MAIN" ]] && PG_MAIN=$( awk -F ' +' '{print $6}' <<< "$PG_CLUSTER" )
fi
else
debug "pg_lsclusters not found ($PG_LSCLUSTER_BIN): parameters auto-detection disabled"
fi
# If auto-detection failed, use default values
[[ -z "$PG_USER" ]] && PG_USER="$DEFAULT_PG_USER"
[[ -z "$PG_VERSION" ]] && PG_VERSION="$DEFAULT_PG_VERSION"
[[ -z "$PG_MAIN" ]] && PG_MAIN="$DEFAULT_PG_MAIN"
[[ -z "$PG_DEFAULT_PORT" ]] && PG_DEFAULT_PORT="$DEFAULT_PG_PORT"
2015-04-27 16:24:21 +02:00
# Check PG_USER
[[ -z "$PG_USER" ]] && echo "UNKNOWN: Postgres user not specified" && exit 3
id "$PG_USER" > /dev/null 2>&1 || { echo "UNKNOWN: Invalid Postgres user ($PG_USER)"; exit 3; }
2015-04-27 16:24:21 +02:00
# Check PSQL_BIN
[[ ! -x "$PSQL_BIN" ]] && echo "UNKNOWN: Invalid psql bin path ($PSQL_BIN)" && exit 3
2015-04-27 16:24:21 +02:00
# Check PG_MAIN
[[ ! -d "$PG_MAIN/" ]] && echo "UNKNOWN: Invalid Postgres main directory path ($PG_MAIN)" && exit 3
2015-04-27 16:24:21 +02:00
# Check RECOVERY_CONF
if [[ -z "$RECOVERY_CONF" ]]; then
2024-07-16 13:43:26 +02:00
if [[ $PG_VERSION -le 11 ]]; then
RECOVERY_CONF_FILENAME="recovery.conf"
else
RECOVERY_CONF_FILENAME="postgresql.auto.conf"
fi
RECOVERY_CONF="$PG_MAIN/$RECOVERY_CONF_FILENAME"
else
RECOVERY_CONF_FILENAME=$( basename "$RECOVERY_CONF" )
fi
2015-04-27 16:24:21 +02:00
# Check PG_DEFAULT_PORT
[[ $( grep -c -E '^[0-9]*$' <<< "$PG_DEFAULT_PORT" ) -ne 1 ]] && \
echo "UNKNOWN: Postgres default master TCP port must be an integer." && exit 3
2012-03-14 16:04:14 +01:00
# If PG_DB is not provided with -D parameter, use PG_USER as default value
[[ -z "$PG_DB" ]] && PG_DB="$PG_USER"
2012-03-14 16:04:14 +01:00
function psql_get () {
2024-07-16 13:43:26 +02:00
local sql="$*"
debug "Exec 'sudo -u $PG_USER $PSQL_BIN -d \"$PG_DB\" -w -t -P format=unaligned <<< \"$sql\""
sudo -u "$PG_USER" "$PSQL_BIN" -d "$PG_DB" -w -t -P format=unaligned <<< "$sql"
2012-03-14 16:04:14 +01:00
}
function psql_master_get () {
2024-07-16 13:43:26 +02:00
local sql="$*"
debug "Exec 'sudo -u $PG_USER $PSQL_BIN -U $M_USER -h $M_HOST -w -p $M_PORT -d $PG_DB -t" \
"-P format=unaligned <<< \"$sql\""
sudo -u "$PG_USER" "$PSQL_BIN" \
-U "$M_USER" -h "$M_HOST" -w -p "$M_PORT" -d "$PG_DB" -t -P format=unaligned <<< "$sql"
}
2020-11-04 15:41:04 +01:00
debug "Running options:
PG_VERSION = $PG_VERSION
PG_DB = $PG_DB
2015-04-27 16:24:21 +02:00
PG_USER = $PG_USER
PSQL_BIN = $PSQL_BIN
PG_LSCLUSTER_BIN = $PG_LSCLUSTER_BIN
2015-04-27 16:24:21 +02:00
PG_MAIN = $PG_MAIN
RECOVERY_CONF = $RECOVERY_CONF
PG_DEFAULT_PORT = $PG_DEFAULT_PORT
PG_DEFAULT_APP_NAME = $PG_DEFAULT_APP_NAME
2020-11-04 16:20:41 +01:00
CHECK_CUR_MASTER_LSN = $CHECK_CUR_MASTER_LSN
REPLAY_WARNING_DELAY = $REPLAY_WARNING_DELAY
REPLAY_CRITICAL_DELAY = $REPLAY_CRITICAL_DELAY
"
2015-04-27 16:24:21 +02:00
2020-11-04 15:04:26 +01:00
# Set some stuff to PostgreSQL version
if [[ $( bc -l <<< "$PG_VERSION < 10" ) -eq 1 ]]; then
pg_last_wal_receive_lsn='pg_last_xlog_receive_location()'
pg_last_wal_replay_lsn='pg_last_xlog_replay_location()'
pg_current_wal_lsn='pg_current_xlog_location()'
pg_wal_lsn_diff='pg_xlog_location_diff'
sent_lsn='sent_location'
write_lsn='write_location'
2020-11-04 15:04:26 +01:00
else
pg_last_wal_receive_lsn='pg_last_wal_receive_lsn()'
pg_last_wal_replay_lsn='pg_last_wal_replay_lsn()'
pg_current_wal_lsn='pg_current_wal_lsn()'
pg_wal_lsn_diff='pg_wal_lsn_diff'
sent_lsn='sent_lsn'
write_lsn='write_lsn'
2020-11-04 15:04:26 +01:00
fi
2012-03-14 16:04:14 +01:00
# Postgres is running ?
if [[ $DEBUG -eq 0 ]]; then
psql_get '\q' 2> /dev/null
is_running=$?
2012-03-14 16:04:14 +01:00
else
psql_get '\q'
is_running=$?
2012-03-14 16:04:14 +01:00
fi
if [[ $is_running -ne 0 ]]; then
echo "CRITICAL: Postgres is not running !"
exit 2
2012-03-14 16:04:14 +01:00
fi
debug "Postgres is running"
RECOVERY_MODE=0
[[ "$( psql_get 'SELECT pg_is_in_recovery();' )" == "t" ]] && RECOVERY_MODE=1
2012-03-14 16:04:14 +01:00
if [[ "$EXPECTED_MODE" == "auto" ]]; then
debug "Auto-detect mode"
if [[ $RECOVERY_MODE -eq 1 ]]; then
debug "Postgres is in recovery mode. Hot-standby mode."
EXPECTED_MODE="hot-standby"
2024-07-16 13:43:26 +02:00
elif [[ -f $RECOVERY_CONF ]] && \
[[ $( grep -cE '^\s*primary_conninfo' "$RECOVERY_CONF" ) -gt 0 ]]; then
debug "File $RECOVERY_CONF_FILENAME found and contain primary_conninfo. Hot-standby mode."
EXPECTED_MODE="hot-standby"
else
debug "Postgres not in recovery mode and file $RECOVERY_CONF_FILENAME not found " \
"(or does not contain primary_conninfo). Master mode."
EXPECTED_MODE="master"
fi
fi
if [[ "$EXPECTED_MODE" == "hot-standby" ]]; then
# Check recovery mode
if [[ $RECOVERY_MODE -ne 1 ]]; then
2024-07-15 12:50:27 +02:00
echo "CRITICAL: Not in recovery mode while $RECOVERY_CONF_FILENAME file found !"
exit 2
fi
debug "Postgres is in recovery mode"
# Get local current last received/replayed LSN
LAST_RECEIVED_LSN=$( psql_get "SELECT $pg_last_wal_receive_lsn" )
debug "Last received LSN: $LAST_RECEIVED_LSN"
LAST_REPLAYED_LSN=$( psql_get "SELECT $pg_last_wal_replay_lsn" )
debug "Last replayed LSN: $LAST_REPLAYED_LSN"
# Get master connection information from primary_conninfo configuration parameter
MASTER_CONN_INFOS=$( psql_get "SHOW primary_conninfo" )
if [[ -z "$MASTER_CONN_INFOS" ]]; then
2024-07-16 13:43:26 +02:00
echo "UNKNOWN: Can't retrieve master connection information from primary_conninfo" \
"configuration parameter"
exit 3
fi
debug "Master connection information: $MASTER_CONN_INFOS"
2024-07-16 13:43:26 +02:00
M_HOST=$(
grep 'host=' <<< "$MASTER_CONN_INFOS" | sed 's/^.*host= *\([0-9a-zA-Z.-]\+\) *.*$/\1/'
)
if [[ -z "$M_HOST" ]]; then
2024-07-15 12:50:27 +02:00
echo "UNKNOWN: Can't retrieve master host from primary_conninfo configuration parameter"
exit 3
fi
debug "Master host: $M_HOST"
2024-07-16 13:43:26 +02:00
M_PORT=$(
grep 'port=' <<< "$MASTER_CONN_INFOS" | sed 's/^.*port= *\([0-9a-zA-Z.-]\+\) *.*$/\1/'
)
if [[ -z "$M_PORT" ]]; then
debug "Master port not specified, use default: $PG_DEFAULT_PORT"
M_PORT=$PG_DEFAULT_PORT
else
debug "Master port: $M_PORT"
fi
if [[ -n "$PG_MASTER_USER" ]]; then
debug "Master user provided by command-line, use it: $PG_MASTER_USER"
M_USER="$PG_MASTER_USER"
else
2024-07-16 13:43:26 +02:00
M_USER=$(
grep 'user=' <<< "$MASTER_CONN_INFOS" | sed 's/^.*user= *\([0-9a-zA-Z.-]\+\) *.*$/\1/'
)
if [[ -z "$M_USER" ]]; then
debug "Master user not specified, use default: $PG_USER"
M_USER=$PG_USER
else
debug "Master user: $M_USER"
fi
fi
2024-07-16 13:43:26 +02:00
M_APP_NAME=$(
grep 'application_name=' <<< "$MASTER_CONN_INFOS" |
sed "s/^.*application_name=[ \'\"]*\([^ \'\"]\+\)[ \'\"]*.*$/\1/"
)
if [[ -z "$M_APP_NAME" ]]; then
if [[ $PG_VERSION -ge 12 ]]; then
debug "Master application name not specified, use cluster_name if defined"
CLUSTER_NAME=$( psql_get "SELECT current_setting('cluster_name')" )
debug "Cluster name: $CLUSTER_NAME"
if [[ -n "$CLUSTER_NAME" ]]; then
M_APP_NAME=$CLUSTER_NAME
else
debug "Cluster name not defined, use default: $PG_DEFAULT_APP_NAME"
M_APP_NAME=$PG_DEFAULT_APP_NAME
fi
else
debug "Master application name not specified, use default: $PG_DEFAULT_APP_NAME"
M_APP_NAME=$PG_DEFAULT_APP_NAME
fi
else
debug "Master application name: $M_APP_NAME"
fi
# Get current replication state information from master
2024-07-16 13:43:26 +02:00
M_CUR_REPL_STATE_INFO="$(
psql_master_get \
"SELECT state, sync_state, $sent_lsn AS sent_lsn, $write_lsn AS write_lsn" \
"FROM pg_stat_replication WHERE application_name='$M_APP_NAME';"
)"
if [[ -z "$M_CUR_REPL_STATE_INFO" ]]; then
echo "UNKNOWN: Can't retrieve current replication state information from master server"
exit 3
fi
2024-07-16 13:43:26 +02:00
debug "Master current replication state:\n" \
"\tstate|sync_state|sent_lsn|write_lsn\n\t$M_CUR_REPL_STATE_INFO"
M_CUR_STATE=$( cut -d'|' -f1 <<< "$M_CUR_REPL_STATE_INFO" )
debug "Master current state: $M_CUR_STATE"
if [[ "$M_CUR_STATE" != "streaming" ]]; then
2024-07-16 13:43:26 +02:00
echo "CRITICAL: this host is not in streaming state according to master host" \
"(current state = '$M_CUR_STATE')"
exit 2
fi
M_CUR_SYNC_STATE=$( cut -d'|' -f2 <<< "$M_CUR_REPL_STATE_INFO" )
debug "Master current sync state: $M_CUR_SYNC_STATE"
if [[ "$M_CUR_SYNC_STATE" != "$EXPECTED_SYNC_STATE" ]]; then
2024-07-16 13:43:26 +02:00
echo "CRITICAL: unexpected replication state '$M_CUR_SYNC_STATE'" \
"(expected state = '$EXPECTED_SYNC_STATE')"
exit 2
fi
M_CUR_SENT_LSN=$( cut -d'|' -f3 <<< "$M_CUR_REPL_STATE_INFO" )
M_CUR_WRITED_LSN=$( cut -d'|' -f4 <<< "$M_CUR_REPL_STATE_INFO" )
debug "Master current last sent/writed LSN: '$M_CUR_SENT_LSN' / '$M_CUR_WRITED_LSN'"
# Check current master LSN vs last received LSN
if [[ "$CHECK_CUR_MASTER_LSN" == "1" ]]; then
# Get current LSN from master
M_CUR_LSN="$( psql_master_get "SELECT $pg_current_wal_lsn" )"
if [[ -z "$M_CUR_LSN" ]]; then
echo "UNKNOWN: Can't retrieve current LSN from master server"
exit 3
fi
debug "Master current LSN: $M_CUR_LSN"
# Master current LSN is the last received LSN ?
if [[ "$M_CUR_LSN" != "$LAST_RECEIVED_LSN" ]]; then
echo "CRITICAL: Master current LSN is not the last received LSN"
exit 2
fi
debug "Master current LSN is the last received LSN"
fi
# The last received LSN is the last replayed ?
if [[ "$LAST_RECEIVED_LSN" != "$LAST_REPLAYED_LSN" ]]; then
2024-07-16 13:43:26 +02:00
debug "/!\ The last received LSN is NOT the last replayed LSN" \
"('$M_CUR_LSN' / '$LAST_REPLAYED_LSN')"
REPLAY_DELAY="$(
psql_get 'SELECT EXTRACT(EPOCH FROM now() - pg_last_xact_replay_timestamp());'
)"
debug "Replay delay is $REPLAY_DELAY second(s)"
if [[ $( bc -l <<< "$REPLAY_DELAY >= $REPLAY_CRITICAL_DELAY" ) -gt 0 ]]; then
2024-07-16 13:43:26 +02:00
echo "CRITICAL: last received LSN is not the last replayed" \
"('$LAST_RECEIVED_LSN' / '$LAST_REPLAYED_LSN') and" \
"replay delay is $REPLAY_DELAY second(s)"
exit 2
fi
if [[ $( bc -l <<< "$REPLAY_DELAY >= $REPLAY_WARNING_DELAY" ) -gt 0 ]]; then
2024-07-16 13:43:26 +02:00
echo "WARNING: last received LSN is not the last replay file" \
"('$LAST_RECEIVED_LSN' / '$LAST_REPLAYED_LSN') and" \
"replay delay is $REPLAY_DELAY second(s)"
exit 1
fi
debug "Replay delay is not worrying"
fi
debug "Last received LSN is the last replayed file"
# The master last sent LSN is the last received (and synced) ?
if [[ "$M_CUR_SENT_LSN" != "$LAST_RECEIVED_LSN" ]]; then
2024-07-16 13:43:26 +02:00
echo "WARNING: master last sent LSN is not already received (and synced to disk) by slave."
echo "May be we have some network delay or load on slave"
echo "Master last sent LSN: $M_CUR_SENT_LSN"
echo "Slave last received (and synced to disk) LSN: $LAST_RECEIVED_LSN"
exit 1
fi
echo "OK: Hot-standby server is up-to-date"
echo "Replication state: $M_CUR_SYNC_STATE"
echo "Last sent/writed LSN: '$M_CUR_SENT_LSN' / '$M_CUR_WRITED_LSN'"
[[ "$LAST_RECEIVED_LSN" != "$LAST_REPLAYED_LSN" ]] && echo "Replay delay: ${REPLAY_DELAY}s"
exit 0
elif [[ "$EXPECTED_MODE" == "master" ]]; then
# Check recovery mode
if [[ $RECOVERY_MODE -eq 1 ]]; then
2024-07-15 12:50:27 +02:00
echo "CRITICAL: In recovery mode while expected mode is master!"
exit 2
fi
debug "Postgres is not in recovery mode"
# Retrieve current lsn
CURRENT_LSN=$( psql_get "SELECT $pg_current_wal_lsn" )
if [[ -z "$CURRENT_LSN" ]]; then
echo "UNKNOWN: Fail to retrieve current LSN (Log Sequence Number)"
exit 3
fi
debug "Current LSN: $CURRENT_LSN"
# Check standby client
2024-07-16 13:43:26 +02:00
STANDBY_CLIENTS=$(
psql_get \
"SELECT
application_name, client_addr, sent_lsn, write_lsn, state, sync_state, current_lag
FROM (
SELECT
application_name, client_addr, sent_lsn, write_lsn, state, sync_state,
current_lag
FROM (
SELECT
application_name, client_addr, $sent_lsn AS sent_lsn,
$write_lsn AS write_lsn, state, sync_state,
$pg_wal_lsn_diff($pg_current_wal_lsn, $write_lsn) AS current_lag
FROM pg_stat_replication
) AS s2
) AS s1"
)
if [[ -z "$STANDBY_CLIENTS" ]]; then
echo "WARNING: no stand-by client connected"
exit 1
fi
debug "Stand-by client(s):\n\t${STANDBY_CLIENTS//$'\n'/\\n\\t}"
2024-07-16 13:43:26 +02:00
STANDBY_CLIENTS_ROWS=()
CURRENT_LSN_IS_LAST_SENT=1
for line in $STANDBY_CLIENTS; do
NAME=$( cut -d '|' -f 1 <<< "$line" )
IP=$( cut -d '|' -f 2 <<< "$line" )
SENT_LSN=$( cut -d '|' -f 3 <<< "$line" )
WRITED_LSN=$( cut -d '|' -f 4 <<< "$line" )
STATE=$( cut -d '|' -f 5 <<< "$line" )
SYNC_STATE=$( cut -d '|' -f 6 <<< "$line" )
LAG=$( cut -d '|' -f 7 <<< "$line" )
2024-07-16 13:43:26 +02:00
STANDBY_CLIENTS_ROW="$NAME ($IP): $STATE/$SYNC_STATE"
STANDBY_CLIENTS_ROW+=" (LSN: sent='$SENT_LSN' / writed='$WRITED_LSN', Lag: ${LAG}b"
STANDBY_CLIENTS_ROWS+=( "$STANDBY_CLIENTS_ROW" )
[[ "$SENT_LSN" != "$CURRENT_LSN" ]] && CURRENT_LSN_IS_LAST_SENT=0
done
if [[ $CURRENT_LSN_IS_LAST_SENT -eq 1 ]]; then
2024-07-16 13:43:26 +02:00
echo "OK: ${#STANDBY_CLIENTS_ROWS[@]} stand-by client(s) connected"
EXIT_CODE=0
else
2024-07-16 13:43:26 +02:00
echo "WARNING: current master LSN is not the last sent to stand-by client(s) connected." \
"May be we have some load ?"
EXIT_CODE=1
fi
echo "Current master LSN: $CURRENT_LSN"
2024-07-16 13:43:26 +02:00
IFS=$'\n'
echo "${STANDBY_CLIENTS_ROWS[*]}"
exit $EXIT_CODE
else
echo "UNKNOWN - Invalid mode '$EXPECTED_MODE'"
exit 3
2012-03-14 16:04:14 +01:00
fi