From 9eea772713b58ae5a1598ff800e37ce8acbd63ca Mon Sep 17 00:00:00 2001 From: Benjamin Renard Date: Thu, 29 Nov 2018 15:51:57 +0100 Subject: [PATCH] Initial commit --- .gitignore | 2 + README.md | 36 +++++++ check_sync_cluster | 234 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 272 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100755 check_sync_cluster diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4c5f88a --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +*~ +.*.swp diff --git a/README.md b/README.md new file mode 100644 index 0000000..ca86744 --- /dev/null +++ b/README.md @@ -0,0 +1,36 @@ +Nagios plugin to check cluster synchronization process +====================================================== + +Usage +----- + +``` +Usage : ./check_sync_cluster [-f files_path_format] [-wcWC] + -h Show this help message + -f [files path format] Specify files path template (default : /var/run/sync_cluster) + -w [minutes] Specify WARNING time in minutes for status file (default : 5) + -c [minutes] Specify CRITICAL time in minutes for status file (default : 10) + -W [minutes] Specify WARNING time in minutes for PID file (default : 5) + -C [minutes] Specify CRITICAL time in minutes for PID file (default : 10) +``` + +Copyright +--------- + +Copyright (c) 2018 Benjamin Renard + +License +------- + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License version 3 +as published by the Free Software Foundation. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. diff --git a/check_sync_cluster b/check_sync_cluster new file mode 100755 index 0000000..a2fffd9 --- /dev/null +++ b/check_sync_cluster @@ -0,0 +1,234 @@ +#!/bin/bash +# +# Nagios plugin to check cluster synchronization process +# +# This check plugin is based on 3 files maintain by synchronization +# process : +# - status file : Empty file updated on each successfully execution +# +# Default : /var/run/sync_cluster.stat +# +# - PID file : Present only when synchronization is running and +# contain the PID of the running process. +# +# Default : /var/run/sync_cluster.pid +# +# - Errors file : Present only when error(s) occured in previous +# synchronizations and contains details on this error(s). +# +# Default : /var/run/sync_cluster.err +# +# Author : Benjamin Renard +# Date : Thu, 29 Nov 2018 15:41:48 +0100 +# Source : http://gogs.zionetrix.net/check_sync_cluster +# + +# Default configuration + +# Files path template +TPL_FILE="/var/run/sync_cluster" + +# WARNING/CRITICAL time in minutes for status file +WARN_STATUS_FILE=5 +CRIT_STATUS_FILE=10 + +# WARNING/CRITICAL time in minutes for PID file +WARN_PID_FILE=5 +CRIT_PID_FILE=10 + + +function usage() { + echo "Usage : $0 [-f files_path_format] [-wcWC] + -h Show this help message + -f [files path format] Specify files path template (default : $TPL_FILE) + -w [minutes] Specify WARNING time in minutes for status file (default : $WARN_STATUS_FILE) + -c [minutes] Specify CRITICAL time in minutes for status file (default : $CRIT_STATUS_FILE) + -W [minutes] Specify WARNING time in minutes for PID file (default : $WARN_PID_FILE) + -C [minutes] Specify CRITICAL time in minutes for PID file (default : $CRIT_PID_FILE) +" +} + +while getopts "hf:w:c:W:C:" OPTION +do + case "$OPTION" in + f) + TPL_FILE="${OPTARG}" + ;; + w) + WARN_STATUS_FILE="${OPTARG}" + ;; + c) + CRIT_STATUS_FILE="${OPTARG}" + ;; + W) + WARN_PID_FILE="${OPTARG}" + ;; + C) + CRIT_PID_FILE="${OPTARG}" + ;; + h) + usage + exit 0 + ;; + *) + echo "Invalid parameter -$OPTION" + echo + usage + exit 1 + ;; + esac +done + +STAT_FILE="$TPL_FILE.stat" +ERR_FILE="$TPL_FILE.err" +PID_FILE="$TPL_FILE.pid" + +# +# Prepare return +# + +EXIT_CODE=0 +EXIT_STATUS=OK + +ERROR_MSG=() +SUCCESS_MSG=() + +function add_error() { + level="$1" + if [ $EXIT_CODE -ne 2 ] + then + case "$level" in + CRITICAL) + EXIT_CODE=2 + EXIT_STATUS=$level + ;; + WARNING) + EXIT_CODE=1 + EXIT_STATUS=$level + ;; + UNKNOWN) + if [ $EXIT_CODE -ne 1 ] + then + EXIT_CODE=3 + EXIT_STATUS=$level + fi + ;; + esac + fi + ERROR_MSG+=("$2") +} + +function add_success() { + SUCCESS_MSG+=("$1") +} + +function join_by { + local d=$1; shift; echo -n "$1"; shift; printf "%s" "${@/#/$d}"; +} + +#### +# Check status file +#### +if [ -r $STAT_FILE ];then + + T=$( ls -al --time-style=+%s $STAT_FILE|awk '{print $6}' ) + CUR=$( date +%s ) + let diff=CUR-T + + let WARN_STATUS_FILE_TIME=WARN_STATUS_FILE*60 + let CRIT_STATUS_FILE_TIME=CRIT_STATUS_FILE*60 + + # CRITICAL if diff > $CRIT_STATUS_FILE + if [ $diff -gt $CRIT_STATUS_FILE_TIME ] + then + add_error "CRITICAL" "status file not updated since more than ${CRIT_STATUS_FILE}min" + # WARNING if diff > $WARN_STATUS_FILE_TIME + elif [ $diff -gt $WARN_STATUS_FILE_TIME ] + then + add_error "WARNING" "status file not updated since more than ${WARN_STATUS_FILE}min" + else + add_success "status file updated since less than ${WARN_STATUS_FILE}min" + fi +else + add_error "UNKNOWN" "status file not present" +fi + + + + +#### +# Check presence of error file +#### +if [ -e "$ERR_FILE" ] +then + add_error "WARNING" "some errors occured syncing cluster" +else + add_success 'no error reported' +fi + +#### +# Check pid file +#### +if [ -r $PID_FILE ];then + + T=$( ls -al --time-style=+%s $PID_FILE|awk '{print $6}' ) + CUR=$( date +%s ) + let diff=CUR-T + + let WARN_PID_FILE_TIME=WARN_PID_FILE*60 + let CRIT_PID_FILE_TIME=CRIT_PID_FILE*60 + + # CRITICAL if diff > $CRIT_PID_FILE + if [ $diff -gt $CRIT_PID_FILE_TIME ] + then + add_error "CRITICAL" "PID file present since more than ${CRIT_PID_FILE}min" + # WARNING if diff > $WARN_PID_FILE + elif [ $diff -gt $WARN_PID_FILE_TIME ] + then + add_error "WARNING" "PID file present since more than ${WARN_PID_FILE}min" + else + add_success "PID file present since less than ${WARN_PID_FILE}min" + fi +else + add_success 'PID file not present' +fi + + +#### +# Display return +### +echo "$EXIT_STATUS - $( join_by ', ' "${ERROR_MSG[@]}" "${SUCCESS_MSG[@]}" )" +echo + +echo -n "Status file : $STAT_FILE" +if [ -e "$STAT_FILE" ] +then + echo " ($( find $STAT_FILE -printf "%AY/%Am/%Ad %AX"|cut -d'.' -f1 ))" +else + echo " (not present)" +fi + +echo -n "Errors file : $ERR_FILE" +if [ -e "$ERR_FILE" ] +then + echo " ($( find $ERR_FILE -printf "%AY/%Am/%Ad %AX"|cut -d'.' -f1 ))" + echo + echo "Errors :" + cat $ERR_FILE|sed 's/^/\t/' + echo + echo " => After fixing this problem(s), please remove indicator file : $ERR_FILE" + echo +else + echo " (not present)" +fi + +echo -n "PID file : $PID_FILE" +if [ -e "$PID_FILE" ] +then + echo " ($( find $PID_FILE -printf "%AY/%Am/%Ad %AX"|cut -d'.' -f1 ))" +else + echo " (not present)" +fi + +exit $EXIT_CODE +