From e5514e587f42f50f7f595b614f6abee9fac60374 Mon Sep 17 00:00:00 2001 From: Benjamin Renard Date: Wed, 17 Jul 2024 10:56:43 +0200 Subject: [PATCH] Fix taking care of synchronous_commit master configuration and removed useless -C parameter and its check --- README.md | 39 +++++++++------- check_pg_streaming_replication | 82 +++++++++++++++++++++------------- 2 files changed, 73 insertions(+), 48 deletions(-) diff --git a/README.md b/README.md index fd91398..7205cd0 100644 --- a/README.md +++ b/README.md @@ -60,27 +60,34 @@ ln -s /usr/local/src/check_pg_streaming_replication/check_pg_streaming_replicati ``` Usage: ./check_pg_streaming_replication [-d] [-h] [options] - -u pg_user Specify local Postgres user (Default: try to auto-detect or use postgres) + -u pg_user Specify local Postgres user (Default: try to auto-detect or + use postgres) -b psql_bin Specify psql binary path (Default: /usr/bin/psql) -B pg_lsclusters_bin Specify pg_lsclusters binary path (Default: /usr/bin/pg_lsclusters) - -V pg_version Specify Postgres version (Default: try to auto-detect or use 9.1) - -m pg_main Specify Postgres main directory path (Default: try to auto-detect or use - /var/lib/postgresql//main) + -V pg_version Specify Postgres version (Default: try to auto-detect or + use 9.1) + -m pg_main Specify Postgres main directory path (Default: try to auto-detect or + use /var/lib/postgresql//main) -r recovery_conf Specify Postgres recovery configuration file path - ( Default: [PG_MAIN]/recovery.conf on PG <= 11, [PG_MAIN]/postgresql.auto.conf on PG >= 12) - -U pg_master_user Specify Postgres user to use on master (Default: user from recovery.conf file) - -p pg_port Specify default Postgres master TCP port (Default: same as local PostgreSQL - port if detected or use 5432) - -D dbname Specify DB name on Postgres master/slave to connect on (Default: PG_USER, must - match with .pgpass one is used) - -C 1/0 Enable or disable check if the current LSN of the master host is the same - of the last received LSN (Default: 1) - -w replay_warn_delay Specify the replay warning delay in second (Default: 3) - -c replay_crit_delay Specify the replay critical delay in second (Default: 5) - -e expected_sync_state The expected replication state ('sync' or 'async', default: sync) - -E expected_mode The expected mode ('master', 'hot-standby' or 'auto', default: 'auto') + (Default: [PG_MAIN]/recovery.conf on PG <= 11, + [PG_MAIN]/postgresql.auto.conf on PG >= 12) + -U pg_master_user Specify Postgres user to use on master (Default: user from recovery.conf + file) + -p pg_port Specify default Postgres master TCP port (Default: same as local + PostgreSQL port if detected or use 5432) + -D dbname Specify DB name on Postgres master/slave to connect on (Default: + PG_USER, must match with .pgpass one is used) + -w replay_warn_delay Specify the replay warning delay in second + (Default: 3) + -c replay_crit_delay Specify the replay critical delay in second + (Default: 5) + -e expected_sync_state The expected replication state ('sync' or 'async', + default: sync) + -E expected_mode The expected mode ('master', 'hot-standby' or 'auto', + default: 'auto') -d Debug mode -h Show this message + ``` ## Copyright diff --git a/check_pg_streaming_replication b/check_pg_streaming_replication index f4c6b7e..3a52bca 100755 --- a/check_pg_streaming_replication +++ b/check_pg_streaming_replication @@ -20,7 +20,7 @@ # ~/.pgpass). # # Author: Benjamin Renard -# Date: Mon, 03 Jun 2024 15:31:29 +0200 +# Date: Wed Jul 17 10:56:43 2024 +0200 # Source: https://gitea.zionetrix.net/bn8/check_pg_streaming_replication # SPDX-License-Identifier: GPL-3.0-or-later # @@ -39,7 +39,6 @@ RECOVERY_CONF="" PG_DEFAULT_PORT="" PG_DEFAULT_APP_NAME=$( hostname ) PG_DB="" -CHECK_CUR_MASTER_LSN=1 REPLAY_WARNING_DELAY=3 REPLAY_CRITICAL_DELAY=5 EXPECTED_SYNC_STATE=sync @@ -69,8 +68,6 @@ Usage: $0 [-d] [-h] [options] PostgreSQL port if detected or use $DEFAULT_PG_PORT) -D dbname Specify DB name on Postgres master/slave to connect on (Default: PG_USER, must match with .pgpass one is used) - -C 1/0 Enable or disable check if the current LSN of the master host is the - same of the last received LSN (Default: $CHECK_CUR_MASTER_LSN) -w replay_warn_delay Specify the replay warning delay in second (Default: $REPLAY_WARNING_DELAY) -c replay_crit_delay Specify the replay critical delay in second @@ -85,7 +82,7 @@ EOF [[ -n "$ERROR" ]] && exit 1 || exit 0 } -while getopts "hu:b:B:V:m:r:U:p:D:C:w:c:e:E:d" OPTION; do +while getopts "hu:b:B:V:m:r:U:p:D:w:c:e:E:d" OPTION; do case $OPTION in u) PG_USER=$OPTARG @@ -114,9 +111,6 @@ while getopts "hu:b:B:V:m:r:U:p:D:C:w:c:e:E:d" OPTION; do D) PG_DB=$OPTARG ;; - C) - CHECK_CUR_MASTER_LSN=$OPTARG - ;; w) REPLAY_WARNING_DELAY=$OPTARG ;; @@ -164,7 +158,6 @@ PG_MAIN = $PG_MAIN RECOVERY_CONF = $RECOVERY_CONF PG_DEFAULT_PORT = $PG_DEFAULT_PORT PG_DEFAULT_APP_NAME = $PG_DEFAULT_APP_NAME -CHECK_CUR_MASTER_LSN = $CHECK_CUR_MASTER_LSN REPLAY_WARNING_DELAY = $REPLAY_WARNING_DELAY REPLAY_CRITICAL_DELAY = $REPLAY_CRITICAL_DELAY EXPECTED_SYNC_STATE = $EXPECTED_SYNC_STATE @@ -250,7 +243,6 @@ PG_MAIN = $PG_MAIN RECOVERY_CONF = $RECOVERY_CONF PG_DEFAULT_PORT = $PG_DEFAULT_PORT PG_DEFAULT_APP_NAME = $PG_DEFAULT_APP_NAME -CHECK_CUR_MASTER_LSN = $CHECK_CUR_MASTER_LSN REPLAY_WARNING_DELAY = $REPLAY_WARNING_DELAY REPLAY_CRITICAL_DELAY = $REPLAY_CRITICAL_DELAY " @@ -386,6 +378,19 @@ if [[ "$EXPECTED_MODE" == "hot-standby" ]]; then debug "Master application name: $M_APP_NAME" fi + # Check if master is configured for synchronous commit + SYNC_MODE="$( + psql_master_get "SELECT setting from pg_settings WHERE name = 'synchronous_commit';" + )" + debug "Master synchronous_commit=$SYNC_MODE" + if [[ "$SYNC_MODE" == "on" ]] || [[ "$SYNC_MODE" == "remote_apply" ]]; then + debug "Master is configured for synchronous commit" + SYNCHRONOUS_COMMIT=1 + else + debug "Master is not configured for synchronous commit" + SYNCHRONOUS_COMMIT=0 + fi + # Get current replication state information from master M_CUR_REPL_STATE_INFO="$( psql_master_get \ @@ -419,24 +424,6 @@ if [[ "$EXPECTED_MODE" == "hot-standby" ]]; then M_CUR_WRITED_LSN=$( cut -d'|' -f4 <<< "$M_CUR_REPL_STATE_INFO" ) debug "Master current last sent/writed LSN: '$M_CUR_SENT_LSN' / '$M_CUR_WRITED_LSN'" - # Check current master LSN vs last received LSN - if [[ "$CHECK_CUR_MASTER_LSN" == "1" ]]; then - # Get current LSN from master - M_CUR_LSN="$( psql_master_get "SELECT $pg_current_wal_lsn" )" - if [[ -z "$M_CUR_LSN" ]]; then - echo "UNKNOWN: Can't retrieve current LSN from master server" - exit 3 - fi - debug "Master current LSN: $M_CUR_LSN" - - # Master current LSN is the last received LSN ? - if [[ "$M_CUR_LSN" != "$LAST_RECEIVED_LSN" ]]; then - echo "CRITICAL: Master current LSN is not the last received LSN" - exit 2 - fi - debug "Master current LSN is the last received LSN" - fi - # The last received LSN is the last replayed ? if [[ "$LAST_RECEIVED_LSN" != "$LAST_REPLAYED_LSN" ]]; then debug "/!\ The last received LSN is NOT the last replayed LSN" \ @@ -462,11 +449,27 @@ if [[ "$EXPECTED_MODE" == "hot-standby" ]]; then debug "Last received LSN is the last replayed file" # The master last sent LSN is the last received (and synced) ? - if [[ "$M_CUR_SENT_LSN" != "$LAST_RECEIVED_LSN" ]]; then - echo "WARNING: master last sent LSN is not already received (and synced to disk) by slave." - echo "May be we have some network delay or load on slave" + if [[ $SYNCHRONOUS_COMMIT -eq 1 ]] && [[ "$M_CUR_SENT_LSN" != "$LAST_RECEIVED_LSN" ]]; then + LSN_DIFF=$( + psql_master_get "SELECT $pg_wal_lsn_diff('$M_CUR_SENT_LSN', '$LAST_RECEIVED_LSN');" + ) + debug "LSN diff ('$M_CUR_SENT_LSN' vs '$LAST_RECEIVED_LSN'): $LSN_DIFF bytes" + echo "WARNING: master last sent LSN is not already received (and synced to disk) by slave" \ + "(diff: $LSN_DIFF bytes). May be we have some network delay or load on slave" echo "Master last sent LSN: $M_CUR_SENT_LSN" echo "Slave last received (and synced to disk) LSN: $LAST_RECEIVED_LSN" + echo "Diff: $LSN_DIFF bytes" + exit 1 + elif [[ $SYNCHRONOUS_COMMIT -eq 0 ]] && [ "$M_CUR_SENT_LSN" != "$M_CUR_WRITED_LSN" ];then + LSN_DIFF=$( + psql_master_get "SELECT pg_wal_lsn_diff('$M_CUR_SENT_LSN', '$M_CUR_WRITED_LSN');" + ) + debug "LSN diff ('$M_CUR_SENT_LSN' vs '$M_CUR_WRITED_LSN'): $LSN_DIFF bytes" + echo "WARNING: master last sent LSN is not already received by slave " \ + "(diff: $LSN_DIFF bytes). May be we have some network delay or load on slave" + echo "Master last sent LSN: $M_CUR_SENT_LSN" + echo "Slave last received LSN: $M_CUR_WRITED_LSN" + echo "Diff: $LSN_DIFF bytes" exit 1 fi @@ -491,6 +494,17 @@ elif [[ "$EXPECTED_MODE" == "master" ]]; then fi debug "Current LSN: $CURRENT_LSN" + # Check if master is configured for synchronous commit + SYNC_MODE="$( psql_get "SELECT setting from pg_settings WHERE name = 'synchronous_commit';" )" + debug "synchronous_commit=$SYNC_MODE" + if [[ "$SYNC_MODE" == "on" ]] || [[ "$SYNC_MODE" == "remote_apply" ]]; then + debug "Master is configured for synchronous commit" + SYNCHRONOUS_COMMIT=1 + else + debug "Master is not configured for synchronous commit" + SYNCHRONOUS_COMMIT=0 + fi + # Check standby client STANDBY_CLIENTS=$( psql_get \ @@ -528,7 +542,11 @@ elif [[ "$EXPECTED_MODE" == "master" ]]; then STANDBY_CLIENTS_ROW="$NAME ($IP): $STATE/$SYNC_STATE" STANDBY_CLIENTS_ROW+=" (LSN: sent='$SENT_LSN' / writed='$WRITED_LSN', Lag: ${LAG}b" STANDBY_CLIENTS_ROWS+=( "$STANDBY_CLIENTS_ROW" ) - [[ "$SENT_LSN" != "$CURRENT_LSN" ]] && CURRENT_LSN_IS_LAST_SENT=0 + if [[ $SYNCHRONOUS_COMMIT -eq 1 ]] && [[ "$SENT_LSN" != "$CURRENT_LSN" ]]; then + CURRENT_LSN_IS_LAST_SENT=0 + elif [[ $SYNCHRONOUS_COMMIT -eq 0 ]] && [[ "$SENT_LSN" != "$WRITED_LSN" ]]; then + CURRENT_LSN_IS_LAST_SENT=0 + fi done if [[ $CURRENT_LSN_IS_LAST_SENT -eq 1 ]]; then