diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 0000000..b08c79d --- /dev/null +++ b/.pylintrc @@ -0,0 +1,3 @@ +[MESSAGES CONTROL] +disable=consider-using-f-string, + invalid-name diff --git a/check_ceph_status b/check_ceph_status index c614c78..3074806 100755 --- a/check_ceph_status +++ b/check_ceph_status @@ -1,45 +1,46 @@ #!/usr/bin/env python3 -# -# Nagios plugin to check Ceph cluster state -# -# This plugin check ceph health, number of OSDs UP, number of MONs UP -# and PGs states to determine Ceph cluster status. -# -# Usage: check_ceph_status [options] -# -# Options: -# -h, --help show this help message and exit -# -d, --debug -# -b BIN, --bin=BIN Ceph binary (default : /usr/bin/ceph) -# --conf=CONF Ceph configuration file -# -m MON, --mon=MON Ceph monitor address[:port] -# -i ID, --id=ID Ceph client id -# -k KEYRING, --keyring=KEYRING -# Ceph client keyring file -# -w WARNLOSTOSD, --warning-lost-osd=WARNLOSTOSD -# Warning number of non-up OSDs (default : 1) -# -c CRITLOSTOSD, --critical-lost-osd=CRITLOSTOSD -# Critical number of non-up OSDs (default : 2) -# -W WARNLOSTMON, --warning-lost-mon=WARNLOSTMON -# Warning number of non-up MONs (default : 1) -# -C CRITLOSTMON, --critical-lost-mon=CRITLOSTMON -# Critical number of non-up MONs (default : 2) -# -# Copyright (c) 2013 Benjamin Renard -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License version 2 -# as published by the Free Software Foundation. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -# +""" +Nagios plugin to check Ceph cluster state + +This plugin check ceph health, number of OSDs UP, number of MONs UP +and PGs states to determine Ceph cluster status. + + Usage: check_ceph_status [options] + + Options: + -h, --help show this help message and exit + -d, --debug + -b BIN, --bin=BIN Ceph binary (default : /usr/bin/ceph) + --conf=CONF Ceph configuration file + -m MON, --mon=MON Ceph monitor address[:port] + -i ID, --id=ID Ceph client id + -k KEYRING, --keyring=KEYRING + Ceph client keyring file + -w WARNLOSTOSD, --warning-lost-osd=WARNLOSTOSD + Warning number of non-up OSDs (default : 1) + -c CRITLOSTOSD, --critical-lost-osd=CRITLOSTOSD + Critical number of non-up OSDs (default : 2) + -W WARNLOSTMON, --warning-lost-mon=WARNLOSTMON + Warning number of non-up MONs (default : 1) + -C CRITLOSTMON, --critical-lost-mon=CRITLOSTMON + Critical number of non-up MONs (default : 2) + +Copyright (c) 2013 Benjamin Renard + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License version 2 +as published by the Free Software Foundation. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +""" + import argparse import json import os @@ -154,7 +155,7 @@ parser.add_argument( options = parser.parse_args() - # validate args +# validate args if not os.path.exists(options.bin): print("ERROR: ceph executable '%s' doesn't exist" % options.bin) sys.exit(STATUS['UNKNOWN']) @@ -185,6 +186,7 @@ ceph_cmd.append('status') ceph_cmd.append('--format=json') # exec command +# pylint: disable=consider-using-with p = subprocess.Popen(ceph_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) output, err = p.communicate() @@ -209,7 +211,8 @@ total_mon = data['monmap'].get('num_mons', len(data['monmap'].get('mons', []))) if not total_mon: print("UNKNOWN : fail to retreive total number of monitors") sys.exit(STATUS['UNKNOWN']) -total_mon_up = len(data.get('quorum', data['health'].get('timechecks', {}).get('mons', []))) +total_mon_up = len(data.get( + 'quorum', data['health'].get('timechecks', {}).get('mons', []))) if not total_mon_up: print("UNKNOWN : fail to retreive total number of UP monitors") sys.exit(STATUS['UNKNOWN']) @@ -218,7 +221,8 @@ num_lost_mon = total_mon-total_mon_up if num_lost_mon == 0: monstate = "(MONs UP : %s/%s)" % (total_mon_up, total_mon) else: - monstate = "%s MONs down (MONs UP : %s/%s)" % (num_lost_mon, total_mon_up, total_mon) + monstate = "%s MONs down (MONs UP : %s/%s)" % ( + num_lost_mon, total_mon_up, total_mon) if num_lost_mon >= options.critlostmon: status = 'CRITICAL' elif num_lost_mon >= options.warnlostmon and status != 'CRITICAL': @@ -243,15 +247,22 @@ elif num_lost_osd >= options.warnlostosd and status != 'CRITICAL': total_pg = data['pgmap']['num_pgs'] pgstate = "" for st in data['pgmap']['pgs_by_state']: - if re.search('(down|inconsistent|imcomplete|stale)', st['state_name'], re.IGNORECASE): + if re.search( + '(down|inconsistent|imcomplete|stale)', st['state_name'], + re.IGNORECASE + ): status = 'CRITICAL' pgstate = "%s / %s PGs %s" % (pgstate, st['count'], st['state_name']) - elif re.search('(replay|degraded|repair|recovering|backfill)', st['state_name'], re.IGNORECASE): + elif re.search( + '(replay|degraded|repair|recovering|backfill)', st['state_name'], + re.IGNORECASE + ): if status != 'CRITICAL': status = "WARNING" pgstate = "%s / %s PGs %s" % (pgstate, st['count'], st['state_name']) elif st['state_name'] == "active+clean": - pgstate = "%s / %s/%s PGs active+clean" % (pgstate, st['count'], total_pg) + pgstate = "%s / %s/%s PGs active+clean" % ( + pgstate, st['count'], total_pg) msg = "%s : %s%s %s" % (status, health, pgstate, monstate) @@ -259,5 +270,6 @@ msg = "%s : %s%s %s" % (status, health, pgstate, monstate) if num_lost_osd == 0: print("%s (OSDs UP : %s/%s)" % (msg, total_osd_up, total_osd)) else: - print("%s / %s OSDs down (OSDs UP : %s/%s)" % (msg, num_lost_osd, total_osd_up, total_osd)) + print("%s / %s OSDs down (OSDs UP : %s/%s)" % ( + msg, num_lost_osd, total_osd_up, total_osd)) sys.exit(STATUS[status])