Compare commits
7 commits
2020.02.20
...
master
Author | SHA1 | Date | |
---|---|---|---|
d33476ce8d | |||
0443f56b1d | |||
d4cbdb3c79 | |||
c8e6c80dc0 | |||
ea72d09399 | |||
50c3363cbc | |||
dc07eccd09 |
62
README.md
62
README.md
|
@ -24,55 +24,53 @@ This script :
|
|||
- check if stand-by client(s) is connected (_WARNING_ raise if not)
|
||||
- Return _OK_ state with list and count of stand-by client(s)
|
||||
|
||||
**Note :** This script was originally write for PostgreSQL 9.1 and test on 9.1, 9.5 and 9.6 but it could be compatible with other versions of PostgreSQL. Do not hesitate to tell me how this script work with other versions and share some fix. All contributions are welcome !
|
||||
**Note :** This script was originally write for PostgreSQL 9.1 and test on 9.1, 9.5 and 9.6 but it could be compatible with other versions of PostgreSQL. Some adjustments have been made for PostgreSQL >= 10 (without testing it). Do not hesitate to tell me how this script work with other versions and share some fix. All contributions are welcome !
|
||||
|
||||
Requirements
|
||||
------------
|
||||
|
||||
* **On master node :** Slaves must be able to connect with user from `recovery.conf` to database with the same name (or another specify with `-D`) as trust (or via md5 using password specify in `~/.pgpass`).
|
||||
* Some CLI tools: `sudo`, `awk`, `sed`, `bc`, `psql` and `pg_lscluster`
|
||||
|
||||
* **On standby node :** `PG_USER` must be able to connect localy on the database with the same name (or another specify with `-D`) as trust (or via md5 using password specify in `~/.pgpass`).
|
||||
* **On master node:** Slaves must be able to connect with user from `recovery.conf` (or user specify using `-U`) to database with the same name (or another specified with `-D`) as `trust` (or via `md5` using password specified in `~/.pgpass`). This user must have `SUPERUSER` privilege (need to get replication details).
|
||||
|
||||
* `sudo` must be install on each nodes.
|
||||
* **On standby node:** `PG_USER` must be able to connect localy on the database with the same name `(or another specified with -D)` as `trust` (or via `md5` using password specified in `~/.pgpass`).
|
||||
|
||||
Usage
|
||||
-----
|
||||
|
||||
Usage : check_pg_streaming_replication [-d] [-h] [options]
|
||||
-u pg_user Specify Postgres user (Default : postgres)
|
||||
-b psql_bin Specify psql binary path (Default : /usr/bin/psql)
|
||||
-m pg_main Specify Postgres main directory path
|
||||
(By default, try to auto-detect it, on your system it :
|
||||
/var/lib/postgresql/9.6/main)
|
||||
-r recovery_conf Specify Postgres recovery configuration file path
|
||||
(Default : [PG_MAIN]/recovery.conf)
|
||||
-U pg_master_user Specify Postgres user to use on master (Default : user from recovery.conf file)
|
||||
-p pg_port Specify default Postgres master TCP port (Default : 5432)
|
||||
-D dbname Specify DB name on Postgres master/slave to connect on (Default : PG_USER)
|
||||
-C 1/0 Enable or disable check if the current XLOG file of the master host is the same
|
||||
of the last replay XLOG file (Default : 1)
|
||||
-w replay_warn_delay Specify the replay warning delay in second (Default : 3)
|
||||
-c replay_crit_delay Specify the replay critical delay in second (Default : 5)
|
||||
-d Debug mode
|
||||
-h Show this message
|
||||
```
|
||||
Usage: check_pg_streaming_replication [-d] [-h] [options]
|
||||
-u pg_user Specify local Postgres user (Default: try to auto-detect or use postgres)
|
||||
-b psql_bin Specify psql binary path (Default: /usr/bin/psql)
|
||||
-B pg_lsclusters_bin Specify pg_lsclusters binary path (Default: /usr/bin/pg_lsclusters)
|
||||
-V pg_version Specify Postgres version (Default: try to auto-detect or use 9.1)
|
||||
-m pg_main Specify Postgres main directory path (Default: try to auto-detect or use
|
||||
/var/lib/postgresql//main)
|
||||
-r recovery_conf Specify Postgres recovery configuration file path
|
||||
(Default: [PG_MAIN]/recovery.conf)
|
||||
-U pg_master_user Specify Postgres user to use on master (Default: user from recovery.conf file)
|
||||
-p pg_port Specify default Postgres master TCP port (Default: same as local PostgreSQL
|
||||
port if detected or use 5432)
|
||||
-D dbname Specify DB name on Postgres master/slave to connect on (Default: PG_USER, must
|
||||
match with .pgpass one is used)
|
||||
-C 1/0 Enable or disable check if the current LSN of the master host is the same
|
||||
of the last received LSN (Default: 1)
|
||||
-w replay_warn_delay Specify the replay warning delay in second (Default: 3)
|
||||
-c replay_crit_delay Specify the replay critical delay in second (Default: 5)
|
||||
-d Debug mode
|
||||
-h Show this message
|
||||
```
|
||||
|
||||
Copyright
|
||||
---------
|
||||
|
||||
Copyright (c) 2014-2019 Benjamin Renard
|
||||
Copyright (c) 2014-2020 Benjamin Renard
|
||||
|
||||
License
|
||||
-------
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License version 2
|
||||
as published by the Free Software Foundation.
|
||||
This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
You should have received a copy of the GNU General Public License along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
|
|
@ -1,43 +1,45 @@
|
|||
#!/bin/bash
|
||||
#
|
||||
# Nagios plugin to check Postgresql streamin replication state
|
||||
#
|
||||
#
|
||||
# Could be use on Master or on standby node
|
||||
#
|
||||
# Requirement :
|
||||
# Requirements:
|
||||
#
|
||||
# On master node : Slaves must be able to connect with user from recovery.conf
|
||||
# to database with the same name (or another specified with -D)
|
||||
# as trust (or via md5 using password specified in ~/.pgpass).
|
||||
# Some CLI tools: sudo, awk, sed, bc, psql and pg_lscluster
|
||||
#
|
||||
# On standby node : PG_USER must be able to connect localy on the database
|
||||
# with the same name (or another specified with -D) as trust
|
||||
# (or via md5 using password specified in ~/.pgpass).
|
||||
# On master node: Slaves must be able to connect with user from recovery.conf
|
||||
# (or user specify using -U) to database with the same name
|
||||
# (or another specified with -D) as trust (or via md5 using
|
||||
# password specified in ~/.pgpass). This user must have
|
||||
# SUPERUSER privilege (need to get replication details).
|
||||
#
|
||||
# Author : Benjamin Renard <brenard@easter-eggs.com>
|
||||
# Date : Fri, 25 Aug 2017 15:57:57 +0200
|
||||
# Source : http://git.zionetrix.net/check_pg_streaming_replication
|
||||
# On standby node: PG_USER must be able to connect localy on the database
|
||||
# with the same name (or another specified with -D) as trust
|
||||
# (or via md5 using password specified in ~/.pgpass).
|
||||
#
|
||||
# Author: Benjamin Renard <brenard@easter-eggs.com>
|
||||
# Date: Wed, 04 Nov 2020 15:31:13 +0100
|
||||
# Source: https://gogs.zionetrix.net/bn8/check_pg_streaming_replication
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
|
||||
PG_USER=postgres
|
||||
DEFAULT_PG_USER=postgres
|
||||
DEFAULT_PG_VERSION=9.1
|
||||
DEFAULT_PG_MAIN=/var/lib/postgresql/$PG_VERSION/main
|
||||
DEFAULT_PG_PORT=5432
|
||||
PG_USER=""
|
||||
PG_VERSION=""
|
||||
PG_MAIN=""
|
||||
PG_MASTER_USER=""
|
||||
PSQL_BIN=/usr/bin/psql
|
||||
PG_MAIN=/var/lib/postgresql/9.1/main
|
||||
if [ -f /etc/debian_version ]
|
||||
then
|
||||
AUTO_PG_MAIN=$( ls -1d /var/lib/postgresql/9*/main 2> /dev/null|sort -n|tail -n 1 )
|
||||
[ -n "$AUTO_PG_MAIN" -a -d "$AUTO_PG_MAIN" ] && PG_MAIN=$AUTO_PG_MAIN
|
||||
elif [ -f /etc/redhat-release ]
|
||||
then
|
||||
AUTO_PG_MAIN=$( ls -1d /var/lib/pgsql/9*/data 2> /dev/null|sort -n|tail -n 1 )
|
||||
[ -n "$AUTO_PG_MAIN" -a -d "$AUTO_PG_MAIN" ] && PG_MAIN=$AUTO_PG_MAIN
|
||||
fi
|
||||
PG_LSCLUSTER_BIN=/usr/bin/pg_lsclusters
|
||||
RECOVERY_CONF_FILENAME=recovery.conf
|
||||
RECOVERY_CONF=""
|
||||
PG_DEFAULT_PORT=5432
|
||||
PG_DEFAULT_PORT=""
|
||||
PG_DEFAULT_APP_NAME=$( hostname )
|
||||
PG_DB=""
|
||||
CHECK_CUR_MASTER_XLOG=1
|
||||
CHECK_CUR_MASTER_LSN=1
|
||||
REPLAY_WARNING_DELAY=3
|
||||
REPLAY_CRITICAL_DELAY=5
|
||||
|
||||
|
@ -45,28 +47,31 @@ DEBUG=0
|
|||
|
||||
function usage () {
|
||||
cat << EOF
|
||||
Usage : $0 [-d] [-h] [options]
|
||||
-u pg_user Specify Postgres user (Default : $PG_USER)
|
||||
-b psql_bin Specify psql binary path (Default : $PSQL_BIN)
|
||||
-m pg_main Specify Postgres main directory path
|
||||
(By default, try to auto-detect it, on your system it :
|
||||
$PG_MAIN)
|
||||
Usage: $0 [-d] [-h] [options]
|
||||
-u pg_user Specify local Postgres user (Default: try to auto-detect or use $DEFAULT_PG_USER)
|
||||
-b psql_bin Specify psql binary path (Default: $PSQL_BIN)
|
||||
-B pg_lsclusters_bin Specify pg_lsclusters binary path (Default: $PG_LSCLUSTER_BIN)
|
||||
-V pg_version Specify Postgres version (Default: try to auto-detect or use $DEFAULT_PG_VERSION)
|
||||
-m pg_main Specify Postgres main directory path (Default: try to auto-detect or use
|
||||
$DEFAULT_PG_MAIN)
|
||||
-r recovery_conf Specify Postgres recovery configuration file path
|
||||
(Default : [PG_MAIN]/$RECOVERY_CONF_FILENAME)
|
||||
-U pg_master_user Specify Postgres user to use on master (Default : user from recovery.conf file)
|
||||
-p pg_port Specify default Postgres master TCP port (Default : $PG_DEFAULT_PORT)
|
||||
-D dbname Specify DB name on Postgres master/slave to connect on (Default : PG_USER)
|
||||
-C 1/0 Enable or disable check if the current XLOG file of the master host is the same
|
||||
of the last replay XLOG file (Default : $CHECK_CUR_MASTER_XLOG)
|
||||
-w replay_warn_delay Specify the replay warning delay in second (Default : $REPLAY_WARNING_DELAY)
|
||||
-c replay_crit_delay Specify the replay critical delay in second (Default : $REPLAY_CRITICAL_DELAY)
|
||||
(Default: [PG_MAIN]/$RECOVERY_CONF_FILENAME)
|
||||
-U pg_master_user Specify Postgres user to use on master (Default: user from recovery.conf file)
|
||||
-p pg_port Specify default Postgres master TCP port (Default: same as local PostgreSQL
|
||||
port if detected or use $DEFAULT_PG_PORT)
|
||||
-D dbname Specify DB name on Postgres master/slave to connect on (Default: PG_USER, must
|
||||
match with .pgpass one is used)
|
||||
-C 1/0 Enable or disable check if the current LSN of the master host is the same
|
||||
of the last received LSN (Default: $CHECK_CUR_MASTER_LSN)
|
||||
-w replay_warn_delay Specify the replay warning delay in second (Default: $REPLAY_WARNING_DELAY)
|
||||
-c replay_crit_delay Specify the replay critical delay in second (Default: $REPLAY_CRITICAL_DELAY)
|
||||
-d Debug mode
|
||||
-h Show this message
|
||||
EOF
|
||||
exit 0
|
||||
}
|
||||
|
||||
while getopts "hu:b:m:r:U:p:D:C:w:c:d" OPTION
|
||||
while getopts "hu:b:B:V:m:r:U:p:D:C:w:c:d" OPTION
|
||||
do
|
||||
case $OPTION in
|
||||
u)
|
||||
|
@ -75,6 +80,12 @@ do
|
|||
b)
|
||||
PSQL_BIN=$OPTARG
|
||||
;;
|
||||
B)
|
||||
PG_LSCLUSTER_BIN=$OPTARG
|
||||
;;
|
||||
V)
|
||||
PG_VERSION=$OPTARG
|
||||
;;
|
||||
m)
|
||||
PG_MAIN=$OPTARG
|
||||
;;
|
||||
|
@ -91,7 +102,7 @@ do
|
|||
PG_DB=$OPTARG
|
||||
;;
|
||||
C)
|
||||
CHECK_CUR_MASTER_XLOG=$OPTARG
|
||||
CHECK_CUR_MASTER_LSN=$OPTARG
|
||||
;;
|
||||
w)
|
||||
REPLAY_WARNING_DELAY=$OPTARG
|
||||
|
@ -111,22 +122,68 @@ do
|
|||
esac
|
||||
done
|
||||
|
||||
function debug() {
|
||||
if [ $DEBUG -eq 1 ]
|
||||
then
|
||||
>&2 echo -e "[DEBUG] $1"
|
||||
fi
|
||||
}
|
||||
|
||||
debug "Starting options (before handling auto-detection/default values):
|
||||
PG_VERSION = $PG_VERSION
|
||||
PG_DB = $PG_DB
|
||||
PG_USER = $PG_USER
|
||||
PSQL_BIN = $PSQL_BIN
|
||||
PG_LSCLUSTER_BIN = $PG_LSCLUSTER_BIN
|
||||
PG_MAIN = $PG_MAIN
|
||||
RECOVERY_CONF = $RECOVERY_CONF
|
||||
PG_DEFAULT_PORT = $PG_DEFAULT_PORT
|
||||
PG_DEFAULT_APP_NAME = $PG_DEFAULT_APP_NAME
|
||||
CHECK_CUR_MASTER_LSN = $CHECK_CUR_MASTER_LSN
|
||||
REPLAY_WARNING_DELAY = $REPLAY_WARNING_DELAY
|
||||
REPLAY_CRITICAL_DELAY = $REPLAY_CRITICAL_DELAY
|
||||
"
|
||||
|
||||
# Auto-detect PostgreSQL information using pg_lsclusters
|
||||
if [ -x "$PG_LSCLUSTER_BIN" ]
|
||||
then
|
||||
PG_CLUSTER=$( $PG_LSCLUSTER_BIN -h 2>/dev/null|head -n1 )
|
||||
if [ -n "$PG_CLUSTER" ]
|
||||
then
|
||||
debug "pg_lsclusters output:\n\t$PG_CLUSTER"
|
||||
# Output example:
|
||||
# 9.6 main 5432 online,recovery postgres /var/lib/postgresql/9.6/main /var/log/postgresql/postgresql-9.6-main.log
|
||||
[ -z "$PG_VERSION" ] && PG_VERSION=$( echo "$PG_CLUSTER"|awk -F ' +' '{print $1}' )
|
||||
[ -z "$PG_DEFAULT_PORT" ] && PG_DEFAULT_PORT=$( echo "$PG_CLUSTER"|awk -F ' +' '{print $3}' )
|
||||
[ -z "$PG_USER" ] && PG_USER=$( echo "$PG_CLUSTER"|awk -F ' +' '{print $5}' )
|
||||
[ -z "$PG_MAIN" ] && PG_MAIN=$( echo "$PG_CLUSTER"|awk -F ' +' '{print $6}' )
|
||||
fi
|
||||
else
|
||||
debug "pg_lsclusters not found ($PG_LSCLUSTER_BIN): parameters auto-detection disabled"
|
||||
fi
|
||||
|
||||
# If auto-detection failed, use default values
|
||||
[ -z "$PG_USER" ] && PG_USER="$DEFAULT_PG_USER"
|
||||
[ -z "$PG_VERSION" ] && PG_VERSION="$DEFAULT_PG_VERSION"
|
||||
[ -z "$PG_MAIN" ] && PG_MAIN="$DEFAULT_PG_MAIN"
|
||||
[ -z "$PG_DEFAULT_PORT" ] && PG_DEFAULT_PORT="$DEFAULT_PG_PORT"
|
||||
|
||||
# Check PG_USER
|
||||
[ -z "$PG_USER" ] && echo "UNKNOWN : Postgres user not specified" && exit 3
|
||||
[ -z "$PG_USER" ] && echo "UNKNOWN: Postgres user not specified" && exit 3
|
||||
id "$PG_USER" > /dev/null 2>&1
|
||||
[ $? -ne 0 ] && echo "UNKNOWN : Invalid Postgres user ($PG_USER)" && exit 3
|
||||
[ $? -ne 0 ] && echo "UNKNOWN: Invalid Postgres user ($PG_USER)" && exit 3
|
||||
|
||||
# Check PSQL_BIN
|
||||
[ ! -x "$PSQL_BIN" ] && echo "UNKNOWN : Invalid psql bin path ($PSQL_BIN)" && exit 3
|
||||
[ ! -x "$PSQL_BIN" ] && echo "UNKNOWN: Invalid psql bin path ($PSQL_BIN)" && exit 3
|
||||
|
||||
# Check PG_MAIN
|
||||
[ ! -d "$PG_MAIN/" ] && echo "UNKNOWN : Invalid Postgres main directory path ($PG_MAIN)" && exit 3
|
||||
[ ! -d "$PG_MAIN/" ] && echo "UNKNOWN: Invalid Postgres main directory path ($PG_MAIN)" && exit 3
|
||||
|
||||
# Check RECOVERY_CONF
|
||||
[ -z "$RECOVERY_CONF" ] && RECOVERY_CONF="$PG_MAIN/$RECOVERY_CONF_FILENAME"
|
||||
|
||||
# Check PG_DEFAULT_PORT
|
||||
[ $( echo "$PG_DEFAULT_PORT"|grep -c -E '^[0-9]*$' ) -ne 1 ] && "UNKNOWN : Postgres default master TCP port must be an integer." && exit 3
|
||||
[ $( echo "$PG_DEFAULT_PORT"|grep -c -E '^[0-9]*$' ) -ne 1 ] && "UNKNOWN: Postgres default master TCP port must be an integer." && exit 3
|
||||
|
||||
# If PG_DB is not provided with -D parameter, use PG_USER as default value
|
||||
[ -z "$PG_DB" ] && PG_DB="$PG_USER"
|
||||
|
@ -143,26 +200,39 @@ function psql_master_get () {
|
|||
echo "$sql"|sudo -u $PG_USER $PSQL_BIN -U $M_USER -h $M_HOST -w -p $M_PORT -d $PG_DB -t -P format=unaligned
|
||||
}
|
||||
|
||||
function debug() {
|
||||
if [ $DEBUG -eq 1 ]
|
||||
then
|
||||
>&2 echo "[DEBUG] $1"
|
||||
fi
|
||||
}
|
||||
|
||||
debug "Running options :
|
||||
debug "Running options:
|
||||
PG_VERSION = $PG_VERSION
|
||||
PG_DB = $PG_DB
|
||||
PG_USER = $PG_USER
|
||||
PSQL_BIN = $PSQL_BIN
|
||||
PG_LSCLUSTER_BIN = $PG_LSCLUSTER_BIN
|
||||
PG_MAIN = $PG_MAIN
|
||||
RECOVERY_CONF = $RECOVERY_CONF
|
||||
PG_DEFAULT_PORT = $PG_DEFAULT_PORT
|
||||
PG_DEFAULT_APP_NAME = $PG_DEFAULT_APP_NAME
|
||||
CHECK_CUR_MASTER_XLOG = $CHECK_CUR_MASTER_XLOG
|
||||
CHECK_CUR_MASTER_LSN = $CHECK_CUR_MASTER_LSN
|
||||
REPLAY_WARNING_DELAY = $REPLAY_WARNING_DELAY
|
||||
REPLAY_CRITICAL_DELAY = $REPLAY_CRITICAL_DELAY
|
||||
"
|
||||
|
||||
# Set some stuff to PostgreSQL version
|
||||
if [ $( echo "$PG_VERSION < 10" |bc -l ) -eq 1 ]
|
||||
then
|
||||
pg_last_wal_receive_lsn='pg_last_xlog_receive_location()'
|
||||
pg_last_wal_replay_lsn='pg_last_xlog_replay_location()'
|
||||
pg_current_wal_lsn='pg_current_xlog_location()'
|
||||
pg_wal_lsn_diff='pg_xlog_location_diff'
|
||||
sent_lsn='sent_location'
|
||||
write_lsn='write_location'
|
||||
else
|
||||
pg_last_wal_receive_lsn='pg_last_wal_receive_lsn()'
|
||||
pg_last_wal_replay_lsn='pg_last_wal_replay_lsn()'
|
||||
pg_current_wal_lsn='pg_current_wal_lsn()'
|
||||
pg_wal_lsn_diff='pg_wal_lsn_diff'
|
||||
sent_lsn='sent_lsn'
|
||||
write_lsn='write_lsn'
|
||||
fi
|
||||
|
||||
# Postgres is running ?
|
||||
if [ $DEBUG -eq 0 ]
|
||||
then
|
||||
|
@ -172,7 +242,7 @@ else
|
|||
fi
|
||||
if [ $? -ne 0 ]
|
||||
then
|
||||
echo "CRITICAL : Postgres is not running !"
|
||||
echo "CRITICAL: Postgres is not running !"
|
||||
exit 2
|
||||
fi
|
||||
debug "Postgres is running"
|
||||
|
@ -187,134 +257,148 @@ then
|
|||
# Check recovery mode
|
||||
if [ $RECOVERY_MODE -ne 1 ]
|
||||
then
|
||||
echo "CRITICAL : Not in recovery mode while recovery.conf file found !"
|
||||
echo "CRITICAL: Not in recovery mode while recovery.conf file found !"
|
||||
exit 2
|
||||
fi
|
||||
debug "Postgres is in recovery mode"
|
||||
|
||||
LAST_XLOG_RECEIVE=$( psql_get "SELECT pg_last_xlog_receive_location()" )
|
||||
debug "Last xlog file receive : $LAST_XLOG_RECEIVE"
|
||||
LAST_XLOG_REPLAY=$( psql_get "SELECT pg_last_xlog_replay_location()" )
|
||||
debug "Last xlog file replay : $LAST_XLOG_REPLAY"
|
||||
# Get local current last received/replayed LSN
|
||||
LAST_RECEIVED_LSN=$( psql_get "SELECT $pg_last_wal_receive_lsn" )
|
||||
debug "Last received LSN: $LAST_RECEIVED_LSN"
|
||||
LAST_REPLAYED_LSN=$( psql_get "SELECT $pg_last_wal_replay_lsn" )
|
||||
debug "Last replayed LSN: $LAST_REPLAYED_LSN"
|
||||
|
||||
|
||||
# Get master connection informations from recovery.conf file
|
||||
MASTER_CONN_INFOS=$( egrep '^ *primary_conninfo' $RECOVERY_CONF|sed "s/^ *primary_conninfo *= *\(.\+\) *$/\1/" )
|
||||
if [ ! -n "$MASTER_CONN_INFOS" ]
|
||||
then
|
||||
echo "UNKNOWN : Can't retreive master connection informations form recovery.conf file"
|
||||
echo "UNKNOWN: Can't retreive master connection informations form recovery.conf file"
|
||||
exit 3
|
||||
fi
|
||||
debug "Master connection informations : $MASTER_CONN_INFOS"
|
||||
debug "Master connection informations: $MASTER_CONN_INFOS"
|
||||
|
||||
M_HOST=$( echo "$MASTER_CONN_INFOS"| grep 'host=' | sed 's/^.*host= *\([0-9a-zA-Z.-]\+\) *.*$/\1/' )
|
||||
if [ ! -n "$M_HOST" ]
|
||||
then
|
||||
echo "UNKNOWN : Can't retreive master host from recovery.conf file"
|
||||
echo "UNKNOWN: Can't retreive master host from recovery.conf file"
|
||||
exit 3
|
||||
fi
|
||||
debug "Master host : $M_HOST"
|
||||
debug "Master host: $M_HOST"
|
||||
|
||||
M_PORT=$( echo "$MASTER_CONN_INFOS"| grep 'port=' | sed 's/^.*port= *\([0-9a-zA-Z.-]\+\) *.*$/\1/' )
|
||||
if [ ! -n "$M_PORT" ]
|
||||
then
|
||||
debug "Master port not specified, use default : $PG_DEFAULT_PORT"
|
||||
debug "Master port not specified, use default: $PG_DEFAULT_PORT"
|
||||
M_PORT=$PG_DEFAULT_PORT
|
||||
else
|
||||
debug "Master port : $M_PORT"
|
||||
debug "Master port: $M_PORT"
|
||||
fi
|
||||
|
||||
if [ -n "$PG_MASTER_USER" ]
|
||||
then
|
||||
debug "Master user provided by command-line, use it : $PG_MASTER_USER"
|
||||
debug "Master user provided by command-line, use it: $PG_MASTER_USER"
|
||||
M_USER="$PG_MASTER_USER"
|
||||
else
|
||||
M_USER=$( echo "$MASTER_CONN_INFOS"| grep 'user=' | sed 's/^.*user= *\([0-9a-zA-Z.-]\+\) *.*$/\1/' )
|
||||
if [ ! -n "$M_USER" ]
|
||||
then
|
||||
debug "Master user not specified, use default : $PG_USER"
|
||||
debug "Master user not specified, use default: $PG_USER"
|
||||
M_USER=$PG_USER
|
||||
else
|
||||
debug "Master user : $M_USER"
|
||||
debug "Master user: $M_USER"
|
||||
fi
|
||||
fi
|
||||
|
||||
M_APP_NAME=$( echo "$MASTER_CONN_INFOS"| grep 'application_name=' | sed "s/^.*application_name=[ \'\"]*\([^ \'\"]\+\)[ \'\"]*.*$/\1/" )
|
||||
if [ ! -n "$M_APP_NAME" ]
|
||||
then
|
||||
debug "Master application name not specified, use default : $PG_DEFAULT_APP_NAME"
|
||||
debug "Master application name not specified, use default: $PG_DEFAULT_APP_NAME"
|
||||
M_APP_NAME=$PG_DEFAULT_APP_NAME
|
||||
else
|
||||
debug "Master application name : $M_APP_NAME"
|
||||
debug "Master application name: $M_APP_NAME"
|
||||
fi
|
||||
|
||||
# Get current state/sync_state from master
|
||||
M_CUR_STATE_SYNC_STATE="$( psql_master_get "SELECT state,sync_state FROM pg_stat_replication WHERE application_name='$M_APP_NAME';" )"
|
||||
if [ ! -n "$M_CUR_STATE_SYNC_STATE" ]
|
||||
# Get current replication state information from master
|
||||
M_CUR_REPL_STATE_INFO="$( psql_master_get "SELECT state, sync_state, $sent_lsn AS sent_lsn, $write_lsn AS write_lsn FROM pg_stat_replication WHERE application_name='$M_APP_NAME';" )"
|
||||
if [ ! -n "$M_CUR_REPL_STATE_INFO" ]
|
||||
then
|
||||
echo "UNKNOWN : Can't retreive current state and sync state from master server"
|
||||
echo "UNKNOWN: Can't retreive current replication state information from master server"
|
||||
exit 3
|
||||
fi
|
||||
debug "Master current state / sync_state : $M_CUR_STATE_SYNC_STATE"
|
||||
debug "Master current replication state:\n\tstate|sync_state|sent_lsn|write_lsn\n\t$M_CUR_REPL_STATE_INFO"
|
||||
|
||||
M_CUR_STATE=$( echo "$M_CUR_STATE_SYNC_STATE"|cut -d'|' -f1 )
|
||||
debug "Master current state : $M_CUR_STATE"
|
||||
M_CUR_STATE=$( echo "$M_CUR_REPL_STATE_INFO"|cut -d'|' -f1 )
|
||||
debug "Master current state: $M_CUR_STATE"
|
||||
if [ "$M_CUR_STATE" != "streaming" ]
|
||||
then
|
||||
echo "CRITICAL : this host is not in streaming state according to master host (current state = '$M_CUR_STATE')"
|
||||
echo "CRITICAL: this host is not in streaming state according to master host (current state = '$M_CUR_STATE')"
|
||||
exit 2
|
||||
fi
|
||||
|
||||
M_CUR_SYNC_STATE=$( echo "$M_CUR_STATE_SYNC_STATE"|cut -d'|' -f2 )
|
||||
debug "Master current sync state : $M_CUR_SYNC_STATE"
|
||||
M_CUR_SYNC_STATE=$( echo "$M_CUR_REPL_STATE_INFO"|cut -d'|' -f2 )
|
||||
debug "Master current sync state: $M_CUR_SYNC_STATE"
|
||||
if [ "$M_CUR_SYNC_STATE" != "sync" ]
|
||||
then
|
||||
echo "CRITICAL : this host is not synchronized according to master host (current sync state = '$M_CUR_SYNC_STATE')"
|
||||
echo "CRITICAL: this host is not synchronized according to master host (current sync state = '$M_CUR_SYNC_STATE')"
|
||||
exit 2
|
||||
fi
|
||||
|
||||
# Check current master XLOG file vs last replay XLOG file
|
||||
if [ "$CHECK_CUR_MASTER_XLOG" == "1" ]
|
||||
M_CUR_SENT_LSN=$( echo "$M_CUR_REPL_STATE_INFO"|cut -d'|' -f3 )
|
||||
M_CUR_WRITED_LSN=$( echo "$M_CUR_REPL_STATE_INFO"|cut -d'|' -f4 )
|
||||
debug "Master current last sent/writed LSN: '$M_CUR_SENT_LSN' / '$M_CUR_WRITED_LSN'"
|
||||
|
||||
# Check current master LSN vs last received LSN
|
||||
if [ "$CHECK_CUR_MASTER_LSN" == "1" ]
|
||||
then
|
||||
# Get current xlog file from master
|
||||
M_CUR_XLOG="$( psql_master_get 'SELECT pg_current_xlog_location()' )"
|
||||
if [ ! -n "$M_CUR_XLOG" ]
|
||||
# Get current LSN from master
|
||||
M_CUR_LSN="$( psql_master_get "SELECT $pg_current_wal_lsn" )"
|
||||
if [ ! -n "$M_CUR_LSN" ]
|
||||
then
|
||||
echo "UNKNOWN : Can't retreive current xlog from master server"
|
||||
echo "UNKNOWN: Can't retreive current LSN from master server"
|
||||
exit 3
|
||||
fi
|
||||
debug "Master current xlog : $M_CUR_XLOG"
|
||||
debug "Master current LSN: $M_CUR_LSN"
|
||||
|
||||
# Master current xlog is the last receive xlog ?
|
||||
if [ "$M_CUR_XLOG" != "$LAST_XLOG_RECEIVE" ]
|
||||
# Master current LSN is the last received LSN ?
|
||||
if [ "$M_CUR_LSN" != "$LAST_RECEIVED_LSN" ]
|
||||
then
|
||||
echo "CRITICAL : Master current xlog is not the last receive xlog"
|
||||
echo "CRITICAL: Master current LSN is not the last received LSN"
|
||||
exit 2
|
||||
fi
|
||||
debug "Master current xlog is the last receive xlog"
|
||||
debug "Master current LSN is the last received LSN"
|
||||
fi
|
||||
|
||||
# The last receive xlog is the last replay file ?
|
||||
if [ "$LAST_XLOG_RECEIVE" != "$LAST_XLOG_REPLAY" ]
|
||||
# The last received LSN is the last replayed ?
|
||||
if [ "$LAST_RECEIVED_LSN" != "$LAST_REPLAYED_LSN" ]
|
||||
then
|
||||
debug "/!\ The last receive xlog is NOT the last replay file ('$M_CUR_XLOG' / '$LAST_XLOG_RECEIVE')"
|
||||
debug "/!\ The last received LSN is NOT the last replayed LSN ('$M_CUR_LSN' / '$LAST_REPLAYED_LSN')"
|
||||
REPLAY_DELAY="$( psql_get 'SELECT EXTRACT(EPOCH FROM now() - pg_last_xact_replay_timestamp());' )"
|
||||
debug "Replay delay is $REPLAY_DELAY second(s)"
|
||||
if [ $( echo "$REPLAY_DELAY >= $REPLAY_CRITICAL_DELAY"|bc -l ) -gt 0 ]
|
||||
then
|
||||
echo "CRITICAL : last receive xlog file is not the last replay file ('$LAST_XLOG_RECEIVE' / '$LAST_XLOG_REPLAY') and replay delay is $REPLAY_DELAY second(s)"
|
||||
echo "CRITICAL: last received LSN is not the last replayed ('$LAST_RECEIVED_LSN' / '$LAST_REPLAYED_LSN') and replay delay is $REPLAY_DELAY second(s)"
|
||||
exit 2
|
||||
fi
|
||||
if [ $( echo "$REPLAY_DELAY >= $REPLAY_WARNING_DELAY"|bc -l ) -gt 0 ]
|
||||
then
|
||||
echo "WARNING : last receive xlog file is not the last replay file ('$LAST_XLOG_RECEIVE' / '$LAST_XLOG_REPLAY') and replay delay is $REPLAY_DELAY second(s)"
|
||||
echo "WARNING: last received LSN is not the last replay file ('$LAST_RECEIVED_LSN' / '$LAST_REPLAYED_LSN') and replay delay is $REPLAY_DELAY second(s)"
|
||||
exit 1
|
||||
fi
|
||||
debug "Replay delay is not worrying"
|
||||
fi
|
||||
debug "Last receive xlog file is the last replay file"
|
||||
debug "Last received LSN is the last replayed file"
|
||||
|
||||
echo "OK : Hot-standby server is uptodate"
|
||||
# The master last sent LSN is the last received (and synced) ?
|
||||
if [ "$M_CUR_SENT_LSN" != "$LAST_RECEIVED_LSN" ]
|
||||
then
|
||||
echo "WARNING: master last sent LSN is not already received (and synced to disk) by slave. May be we have some network delay or load on slave"
|
||||
echo "Master last sent LSN: $M_CUR_SENT_LSN"
|
||||
echo "Slave last received (and synced to disk) LSN: $LAST_RECEIVED_LSN"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "OK: Hot-standby server is uptodate"
|
||||
exit 0
|
||||
else
|
||||
debug "File recovery.conf not found. Master mode."
|
||||
|
@ -322,31 +406,65 @@ else
|
|||
# Check recovery mode
|
||||
if [ $RECOVERY_MODE -eq 1 ]
|
||||
then
|
||||
echo "CRITICAL : In recovery mode while recovery.conf file not found !"
|
||||
echo "CRITICAL: In recovery mode while recovery.conf file not found !"
|
||||
exit 2
|
||||
fi
|
||||
debug "Postgres is not in recovery mode"
|
||||
|
||||
# Retreive current lsn
|
||||
CURRENT_LSN=$( psql_get "SELECT $pg_current_wal_lsn" )
|
||||
if [ -z "$CURRENT_LSN" ]
|
||||
then
|
||||
echo "UNKNOWN: Fail to retreive current LSN (Log Sequence Number)"
|
||||
exit 3
|
||||
fi
|
||||
debug "Current LSN: $CURRENT_LSN"
|
||||
|
||||
# Check standby client
|
||||
STANDBY_CLIENTS=$( psql_get "SELECT client_addr, sync_state FROM pg_stat_replication;" )
|
||||
STANDBY_CLIENTS=$( psql_get "SELECT application_name, client_addr, sent_lsn, write_lsn, state, sync_state, current_lag
|
||||
FROM (
|
||||
SELECT application_name, client_addr, sent_lsn, write_lsn, state, sync_state, current_lag
|
||||
FROM (
|
||||
SELECT application_name, client_addr, $sent_lsn AS sent_lsn, $write_lsn AS write_lsn, state, sync_state,
|
||||
$pg_wal_lsn_diff($pg_current_wal_lsn, $write_lsn) AS current_lag
|
||||
FROM pg_stat_replication
|
||||
) AS s2
|
||||
) AS s1" )
|
||||
if [ ! -n "$STANDBY_CLIENTS" ]
|
||||
then
|
||||
echo "WARNING : no stand-by client connected"
|
||||
echo "WARNING: no stand-by client connected"
|
||||
exit 1
|
||||
fi
|
||||
debug "Stand-by client(s) : $( echo -n $STANDBY_CLIENTS|sed 's/\n/ , /g' )"
|
||||
debug "Stand-by client(s):\n\t$( echo -e "$STANDBY_CLIENTS"|sed 's/\n/\n\t/' )"
|
||||
|
||||
STANDBY_CLIENTS_TXT=""
|
||||
STANDBY_CLIENTS_COUNT=0
|
||||
CURRENT_LSN_IS_LAST_SENT=1
|
||||
for line in $STANDBY_CLIENTS
|
||||
do
|
||||
let STANDBY_CLIENTS_COUNT=STANDBY_CLIENTS_COUNT+1
|
||||
|
||||
IP=$( echo $line|cut -d '|' -f 1 )
|
||||
MODE=$( echo $line|cut -d '|' -f 2 )
|
||||
STANDBY_CLIENTS_TXT="$STANDBY_CLIENTS_TXT $IP (mode=$MODE)"
|
||||
NAME=$( echo $line|cut -d '|' -f 1 )
|
||||
IP=$( echo $line|cut -d '|' -f 2 )
|
||||
SENT_LSN=$( echo $line|cut -d '|' -f 3 )
|
||||
WRITED_LSN=$( echo $line|cut -d '|' -f 4 )
|
||||
STATE=$( echo $line|cut -d '|' -f 5 )
|
||||
SYNC_STATE=$( echo $line|cut -d '|' -f 6 )
|
||||
LAG=$( echo $line|cut -d '|' -f 7 )
|
||||
STANDBY_CLIENTS_TXT="$STANDBY_CLIENTS_TXT\n$NAME ($IP): $STATE/$SYNC_STATE (LSN: sent='$SENT_LSN' / writed='$WRITED_LSN', Lag: ${LAG}b)"
|
||||
[ "$SENT_LSN" != "$CURRENT_LSN" ] && CURRENT_LSN_IS_LAST_SENT=0
|
||||
done
|
||||
|
||||
echo "OK : $STANDBY_CLIENTS_COUNT stand-by client(s) connected - $STANDBY_CLIENTS_TXT"
|
||||
exit 0
|
||||
if [ $CURRENT_LSN_IS_LAST_SENT -eq 1 ]
|
||||
then
|
||||
echo "OK: $STANDBY_CLIENTS_COUNT stand-by client(s) connected"
|
||||
EXIT_CODE=0
|
||||
else
|
||||
echo "WARNING: current master LSN is not the last sent to stand-by client(s) connected. May be we have some load ?"
|
||||
EXIT_CODE=1
|
||||
fi
|
||||
|
||||
echo "Current master LSN: $CURRENT_LSN"
|
||||
echo -e "$STANDBY_CLIENTS_TXT"
|
||||
exit $EXIT_CODE
|
||||
fi
|
||||
|
|
Loading…
Reference in a new issue