#!/bin/bash # # Nagios plugin to check cluster synchronization process # # This check plugin is based on 3 files maintain by synchronization # process : # - status file : Empty file updated on each successfully execution # # Default : /var/run/sync_cluster.stat # # - PID file : Present only when synchronization is running and # contain the PID of the running process. # # Default : /var/run/sync_cluster.pid # # - Errors file : Present only when error(s) occured in previous # synchronizations and contains details on this error(s). # # Default : /var/run/sync_cluster.err # # Author : Benjamin Renard # Date : Thu, 29 Nov 2018 15:41:48 +0100 # Source : http://gogs.zionetrix.net/check_sync_cluster # # Default configuration # Files path template TPL_FILE="/var/run/sync_cluster" # WARNING/CRITICAL time in minutes for status file WARN_STATUS_FILE=5 CRIT_STATUS_FILE=10 # WARNING/CRITICAL time in minutes for PID file WARN_PID_FILE=5 CRIT_PID_FILE=10 function usage() { echo "Usage : $0 [-f files_path_format] [-wcWC] -h Show this help message -f [files path format] Specify files path template (default : $TPL_FILE) -w [minutes] Specify WARNING time in minutes for status file (default : $WARN_STATUS_FILE) -c [minutes] Specify CRITICAL time in minutes for status file (default : $CRIT_STATUS_FILE) -W [minutes] Specify WARNING time in minutes for PID file (default : $WARN_PID_FILE) -C [minutes] Specify CRITICAL time in minutes for PID file (default : $CRIT_PID_FILE) " } while getopts "hf:w:c:W:C:" OPTION do case "$OPTION" in f) TPL_FILE="${OPTARG}" ;; w) WARN_STATUS_FILE="${OPTARG}" ;; c) CRIT_STATUS_FILE="${OPTARG}" ;; W) WARN_PID_FILE="${OPTARG}" ;; C) CRIT_PID_FILE="${OPTARG}" ;; h) usage exit 0 ;; *) echo "Invalid parameter -$OPTION" echo usage exit 1 ;; esac done STAT_FILE="$TPL_FILE.stat" ERR_FILE="$TPL_FILE.err" PID_FILE="$TPL_FILE.pid" # # Prepare return # EXIT_CODE=0 EXIT_STATUS=OK ERROR_MSG=() SUCCESS_MSG=() function add_error() { level="$1" if [ $EXIT_CODE -ne 2 ] then case "$level" in CRITICAL) EXIT_CODE=2 EXIT_STATUS=$level ;; WARNING) EXIT_CODE=1 EXIT_STATUS=$level ;; UNKNOWN) if [ $EXIT_CODE -ne 1 ] then EXIT_CODE=3 EXIT_STATUS=$level fi ;; esac fi ERROR_MSG+=("$2") } function add_success() { SUCCESS_MSG+=("$1") } function join_by { local d=$1; shift; echo -n "$1"; shift; printf "%s" "${@/#/$d}"; } #### # Check status file #### if [ -r $STAT_FILE ];then T=$( ls -al --time-style=+%s $STAT_FILE|awk '{print $6}' ) CUR=$( date +%s ) let diff=CUR-T let WARN_STATUS_FILE_TIME=WARN_STATUS_FILE*60 let CRIT_STATUS_FILE_TIME=CRIT_STATUS_FILE*60 # CRITICAL if diff > $CRIT_STATUS_FILE if [ $diff -gt $CRIT_STATUS_FILE_TIME ] then add_error "CRITICAL" "status file not updated since more than ${CRIT_STATUS_FILE}min" # WARNING if diff > $WARN_STATUS_FILE_TIME elif [ $diff -gt $WARN_STATUS_FILE_TIME ] then add_error "WARNING" "status file not updated since more than ${WARN_STATUS_FILE}min" else add_success "status file updated since less than ${WARN_STATUS_FILE}min" fi else add_error "UNKNOWN" "status file not present" fi #### # Check presence of error file #### if [ -e "$ERR_FILE" ] then add_error "WARNING" "some errors occured syncing cluster" else add_success 'no error reported' fi #### # Check pid file #### if [ -r $PID_FILE ];then T=$( ls -al --time-style=+%s $PID_FILE|awk '{print $6}' ) CUR=$( date +%s ) let diff=CUR-T let WARN_PID_FILE_TIME=WARN_PID_FILE*60 let CRIT_PID_FILE_TIME=CRIT_PID_FILE*60 # CRITICAL if diff > $CRIT_PID_FILE if [ $diff -gt $CRIT_PID_FILE_TIME ] then add_error "CRITICAL" "PID file present since more than ${CRIT_PID_FILE}min" # WARNING if diff > $WARN_PID_FILE elif [ $diff -gt $WARN_PID_FILE_TIME ] then add_error "WARNING" "PID file present since more than ${WARN_PID_FILE}min" else add_success "PID file present since less than ${WARN_PID_FILE}min" fi else add_success 'PID file not present' fi #### # Display return ### echo "$EXIT_STATUS - $( join_by ', ' "${ERROR_MSG[@]}" "${SUCCESS_MSG[@]}" )" echo echo -n "Status file : $STAT_FILE" if [ -e "$STAT_FILE" ] then echo " ($( find $STAT_FILE -printf "%AY/%Am/%Ad %AX"|cut -d'.' -f1 ))" else echo " (not present)" fi echo -n "Errors file : $ERR_FILE" if [ -e "$ERR_FILE" ] then echo " ($( find $ERR_FILE -printf "%AY/%Am/%Ad %AX"|cut -d'.' -f1 ))" echo echo "Errors :" cat $ERR_FILE|sed 's/^/\t/' echo echo " => After fixing this problem(s), please remove indicator file : $ERR_FILE" echo else echo " (not present)" fi echo -n "PID file : $PID_FILE" if [ -e "$PID_FILE" ] then echo " ($( find $PID_FILE -printf "%AY/%Am/%Ad %AX"|cut -d'.' -f1 ))" else echo " (not present)" fi exit $EXIT_CODE