Python3 and Octopus compatibility, code cleaning
This commit is contained in:
parent
dfb2f2e98b
commit
9693e0d22b
3 changed files with 210 additions and 180 deletions
49
README
49
README
|
@ -1,49 +0,0 @@
|
||||||
Nagios plugin to check Ceph cluster status
|
|
||||||
==========================================
|
|
||||||
|
|
||||||
This plugin check ceph health, number of OSDs UP, number of MONs UP
|
|
||||||
and PGs states to determine Ceph cluster status.
|
|
||||||
|
|
||||||
Usage
|
|
||||||
-----
|
|
||||||
|
|
||||||
Usage: check_ceph_status [options]
|
|
||||||
|
|
||||||
Options:
|
|
||||||
-h, --help show this help message and exit
|
|
||||||
-d, --debug
|
|
||||||
-b BIN, --bin=BIN Ceph binary (default : /usr/bin/ceph)
|
|
||||||
--conf=CONF Ceph configuration file
|
|
||||||
-m MON, --mon=MON Ceph monitor address[:port]
|
|
||||||
-i ID, --id=ID Ceph client id
|
|
||||||
-k KEYRING, --keyring=KEYRING
|
|
||||||
Ceph client keyring file
|
|
||||||
-w WARNLOSTOSD, --warning-lost-osd=WARNLOSTOSD
|
|
||||||
Warning number of non-up OSDs (default : 1)
|
|
||||||
-c CRITLOSTOSD, --critical-lost-osd=CRITLOSTOSD
|
|
||||||
Critical number of non-up OSDs (default : 2)
|
|
||||||
-W WARNLOSTMON, --warning-lost-mon=WARNLOSTMON
|
|
||||||
Warning number of non-up MONs (default : 1)
|
|
||||||
-C CRITLOSTMON, --critical-lost-mon=CRITLOSTMON
|
|
||||||
Critical number of non-up MONs (default : 2)
|
|
||||||
|
|
||||||
Copyright
|
|
||||||
---------
|
|
||||||
|
|
||||||
Copyright (c) 2013 Benjamin Renard <brenard@zionetrix.net>
|
|
||||||
|
|
||||||
License
|
|
||||||
-------
|
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or
|
|
||||||
modify it under the terms of the GNU General Public License version 2
|
|
||||||
as published by the Free Software Foundation.
|
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
GNU General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License
|
|
||||||
along with this program; if not, write to the Free Software
|
|
||||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
|
49
README.md
Normal file
49
README.md
Normal file
|
@ -0,0 +1,49 @@
|
||||||
|
# Nagios plugin to check Ceph cluster status
|
||||||
|
|
||||||
|
This plugin check ceph health, number of OSDs UP, number of MONs UP
|
||||||
|
and PGs states to determine Ceph cluster status.
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
```
|
||||||
|
usage: check_ceph_status [-h] [-d] [-b BIN] [--conf CONF] [-m MON] [-i ID]
|
||||||
|
[-k KEYRING] [-w WARNLOSTOSD] [-c CRITLOSTOSD]
|
||||||
|
[-W WARNLOSTMON] [-C CRITLOSTMON]
|
||||||
|
|
||||||
|
optional arguments:
|
||||||
|
-h, --help show this help message and exit
|
||||||
|
-d, --debug
|
||||||
|
-b BIN, --bin BIN Ceph binary (default : /usr/bin/ceph)
|
||||||
|
--conf CONF Ceph configuration file
|
||||||
|
-m MON, --mon MON Ceph monitor address[:port]
|
||||||
|
-i ID, --id ID Ceph client id
|
||||||
|
-k KEYRING, --keyring KEYRING
|
||||||
|
Ceph client keyring file
|
||||||
|
-w WARNLOSTOSD, --warning-lost-osd WARNLOSTOSD
|
||||||
|
Warning number of non-up OSDs (default : 1)
|
||||||
|
-c CRITLOSTOSD, --critical-lost-osd CRITLOSTOSD
|
||||||
|
Critical number of non-up OSDs (default : 2)
|
||||||
|
-W WARNLOSTMON, --warning-lost-mon WARNLOSTMON
|
||||||
|
Warning number of non-up MONs (default : 1)
|
||||||
|
-C CRITLOSTMON, --critical-lost-mon CRITLOSTMON
|
||||||
|
Critical number of non-up MONs (default : 2)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Copyright
|
||||||
|
|
||||||
|
Copyright (c) 2013-2021 Benjamin Renard <brenard@zionetrix.net>
|
||||||
|
|
||||||
|
## License
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License version 3
|
||||||
|
as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
|
@ -1,4 +1,4 @@
|
||||||
#!/usr/bin/python
|
#!/usr/bin/env python
|
||||||
#
|
#
|
||||||
# Nagios plugin to check Ceph cluster state
|
# Nagios plugin to check Ceph cluster state
|
||||||
#
|
#
|
||||||
|
@ -6,10 +6,10 @@
|
||||||
# and PGs states to determine Ceph cluster status.
|
# and PGs states to determine Ceph cluster status.
|
||||||
#
|
#
|
||||||
# Usage: check_ceph_status [options]
|
# Usage: check_ceph_status [options]
|
||||||
#
|
#
|
||||||
# Options:
|
# Options:
|
||||||
# -h, --help show this help message and exit
|
# -h, --help show this help message and exit
|
||||||
# -d, --debug
|
# -d, --debug
|
||||||
# -b BIN, --bin=BIN Ceph binary (default : /usr/bin/ceph)
|
# -b BIN, --bin=BIN Ceph binary (default : /usr/bin/ceph)
|
||||||
# --conf=CONF Ceph configuration file
|
# --conf=CONF Ceph configuration file
|
||||||
# -m MON, --mon=MON Ceph monitor address[:port]
|
# -m MON, --mon=MON Ceph monitor address[:port]
|
||||||
|
@ -30,19 +30,23 @@
|
||||||
# This program is free software; you can redistribute it and/or
|
# This program is free software; you can redistribute it and/or
|
||||||
# modify it under the terms of the GNU General Public License version 2
|
# modify it under the terms of the GNU General Public License version 2
|
||||||
# as published by the Free Software Foundation.
|
# as published by the Free Software Foundation.
|
||||||
#
|
#
|
||||||
# This program is distributed in the hope that it will be useful,
|
# This program is distributed in the hope that it will be useful,
|
||||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
# GNU General Public License for more details.
|
# GNU General Public License for more details.
|
||||||
#
|
#
|
||||||
# You should have received a copy of the GNU General Public License
|
# You should have received a copy of the GNU General Public License
|
||||||
# along with this program; if not, write to the Free Software
|
# along with this program; if not, write to the Free Software
|
||||||
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
#
|
#
|
||||||
|
|
||||||
import sys,os,json,subprocess,re
|
import sys
|
||||||
from optparse import OptionParser
|
import os
|
||||||
|
import json
|
||||||
|
import subprocess
|
||||||
|
import re
|
||||||
|
import argparse
|
||||||
|
|
||||||
# default ceph values
|
# default ceph values
|
||||||
CEPH_COMMAND = '/usr/bin/ceph'
|
CEPH_COMMAND = '/usr/bin/ceph'
|
||||||
|
@ -53,103 +57,114 @@ CRIT_LOST_MON = 2
|
||||||
|
|
||||||
# nagios exit code
|
# nagios exit code
|
||||||
STATUS = {
|
STATUS = {
|
||||||
'OK': 0,
|
'OK': 0,
|
||||||
'WARNING': 1,
|
'WARNING': 1,
|
||||||
'CRITICAL': 2,
|
'CRITICAL': 2,
|
||||||
'UNKNOWN': 3
|
'UNKNOWN': 3
|
||||||
}
|
}
|
||||||
|
|
||||||
parser = OptionParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_option('-d',
|
parser.add_argument(
|
||||||
'--debug',
|
'-d', '--debug',
|
||||||
action="store_true",
|
action="store_true",
|
||||||
dest="debug",
|
dest="debug",
|
||||||
default=False)
|
default=False
|
||||||
|
)
|
||||||
|
|
||||||
parser.add_option('-b',
|
parser.add_argument(
|
||||||
'--bin',
|
'-b', '--bin',
|
||||||
action="store",
|
action="store",
|
||||||
dest="bin",
|
dest="bin",
|
||||||
help="Ceph binary (default : %s)" % CEPH_COMMAND,
|
help="Ceph binary (default : %s)" % CEPH_COMMAND,
|
||||||
type='string',
|
type=str,
|
||||||
default=CEPH_COMMAND)
|
default=CEPH_COMMAND
|
||||||
|
)
|
||||||
|
|
||||||
parser.add_option('--conf',
|
parser.add_argument(
|
||||||
action="store",
|
'--conf',
|
||||||
dest="conf",
|
action="store",
|
||||||
help="Ceph configuration file",
|
dest="conf",
|
||||||
type='string',
|
help="Ceph configuration file",
|
||||||
default=None)
|
type=str,
|
||||||
|
default=None
|
||||||
|
)
|
||||||
|
|
||||||
parser.add_option('-m',
|
parser.add_argument(
|
||||||
'--mon',
|
'-m', '--mon',
|
||||||
action="store",
|
action="store",
|
||||||
dest="mon",
|
dest="mon",
|
||||||
help="Ceph monitor address[:port]",
|
help="Ceph monitor address[:port]",
|
||||||
type='string',
|
type=str,
|
||||||
default=None)
|
default=None
|
||||||
|
)
|
||||||
|
|
||||||
parser.add_option('-i',
|
parser.add_argument(
|
||||||
'--id',
|
'-i', '--id',
|
||||||
action="store",
|
action="store",
|
||||||
dest="id",
|
dest="id",
|
||||||
help="Ceph client id",
|
help="Ceph client id",
|
||||||
type='string',
|
type=str,
|
||||||
default=None)
|
default=None
|
||||||
|
)
|
||||||
|
|
||||||
parser.add_option('-k',
|
parser.add_argument(
|
||||||
'--keyring',
|
'-k', '--keyring',
|
||||||
action="store",
|
action="store",
|
||||||
dest="keyring",
|
dest="keyring",
|
||||||
help="Ceph client keyring file",
|
help="Ceph client keyring file",
|
||||||
type='string',
|
type=str,
|
||||||
default=None)
|
default=None
|
||||||
|
)
|
||||||
|
|
||||||
parser.add_option('-w',
|
parser.add_argument(
|
||||||
'--warning-lost-osd',
|
'-w', '--warning-lost-osd',
|
||||||
action="store",
|
action="store",
|
||||||
dest="warnlostosd",
|
dest="warnlostosd",
|
||||||
help="Warning number of non-up OSDs (default : %s)" % WARN_LOST_OSD,
|
help="Warning number of non-up OSDs (default : %s)" % WARN_LOST_OSD,
|
||||||
type='int',
|
type=int,
|
||||||
default=WARN_LOST_OSD)
|
default=WARN_LOST_OSD
|
||||||
|
)
|
||||||
|
|
||||||
parser.add_option('-c',
|
parser.add_argument(
|
||||||
'--critical-lost-osd',
|
'-c', '--critical-lost-osd',
|
||||||
action="store",
|
action="store",
|
||||||
dest="critlostosd",
|
dest="critlostosd",
|
||||||
help="Critical number of non-up OSDs (default : %s)" % CRIT_LOST_OSD,
|
help="Critical number of non-up OSDs (default : %s)" % CRIT_LOST_OSD,
|
||||||
type='int',
|
type=int,
|
||||||
default=CRIT_LOST_OSD)
|
default=CRIT_LOST_OSD
|
||||||
|
)
|
||||||
|
|
||||||
parser.add_option('-W',
|
parser.add_argument(
|
||||||
'--warning-lost-mon',
|
'-W', '--warning-lost-mon',
|
||||||
action="store",
|
action="store",
|
||||||
dest="warnlostmon",
|
dest="warnlostmon",
|
||||||
help="Warning number of non-up MONs (default : %s)" % WARN_LOST_MON,
|
help="Warning number of non-up MONs (default : %s)" % WARN_LOST_MON,
|
||||||
type='int',
|
type=int,
|
||||||
default=WARN_LOST_MON)
|
default=WARN_LOST_MON
|
||||||
|
)
|
||||||
|
|
||||||
parser.add_option('-C',
|
parser.add_argument(
|
||||||
'--critical-lost-mon',
|
'-C', '--critical-lost-mon',
|
||||||
action="store",
|
action="store",
|
||||||
dest="critlostmon",
|
dest="critlostmon",
|
||||||
help="Critical number of non-up MONs (default : %s)" % CRIT_LOST_MON,
|
help="Critical number of non-up MONs (default : %s)" % CRIT_LOST_MON,
|
||||||
type='int',
|
type=int,
|
||||||
default=CRIT_LOST_MON)
|
default=CRIT_LOST_MON
|
||||||
|
)
|
||||||
|
|
||||||
(options, args) = parser.parse_args()
|
options = parser.parse_args()
|
||||||
|
|
||||||
# validate args
|
# validate args
|
||||||
if not os.path.exists(options.bin):
|
if not os.path.exists(options.bin):
|
||||||
print "ERROR: ceph executable '%s' doesn't exist" % options.bin
|
print("ERROR: ceph executable '%s' doesn't exist" % options.bin)
|
||||||
sys.exit(STATUS['UNKNOWN'])
|
sys.exit(STATUS['UNKNOWN'])
|
||||||
|
|
||||||
if options.conf and not os.path.exists(options.conf):
|
if options.conf and not os.path.exists(options.conf):
|
||||||
print "ERROR: ceph conf file '%s' doesn't exist" % options.conf
|
print("ERROR: ceph conf file '%s' doesn't exist" % options.conf)
|
||||||
sys.exit(STATUS['UNKNOWN'])
|
sys.exit(STATUS['UNKNOWN'])
|
||||||
|
|
||||||
if options.keyring and not os.path.exists(options.keyring):
|
if options.keyring and not os.path.exists(options.keyring):
|
||||||
print "ERROR: keyring file '%s' doesn't exist" % options.keyring
|
print("ERROR: keyring file '%s' doesn't exist" % options.keyring)
|
||||||
sys.exit(STATUS['UNKNOWN'])
|
sys.exit(STATUS['UNKNOWN'])
|
||||||
|
|
||||||
# build command
|
# build command
|
||||||
|
@ -168,66 +183,81 @@ if options.keyring:
|
||||||
ceph_cmd.append(options.keyring)
|
ceph_cmd.append(options.keyring)
|
||||||
ceph_cmd.append('status')
|
ceph_cmd.append('status')
|
||||||
ceph_cmd.append('--format=json')
|
ceph_cmd.append('--format=json')
|
||||||
|
|
||||||
# exec command
|
# exec command
|
||||||
p = subprocess.Popen(ceph_cmd,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
|
p = subprocess.Popen(ceph_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||||
output, err = p.communicate()
|
output, err = p.communicate()
|
||||||
|
|
||||||
if output:
|
if output:
|
||||||
data=json.loads(output)
|
data = json.loads(output.decode(sys.getdefaultencoding()))
|
||||||
|
|
||||||
status='OK'
|
status = 'OK'
|
||||||
|
|
||||||
health=data['health']['overall_status']
|
health = data['health'].get('status', data['health'].get('overall_status'))
|
||||||
if health=='HEALTH_WARN':
|
if not health:
|
||||||
status='WARNING'
|
print("UNKNOWN : fail to retreive health status")
|
||||||
elif health=='HEALTH_CRIT':
|
sys.exit(STATUS['UNKNOWN'])
|
||||||
status='CRITICAL'
|
if health == 'HEALTH_WARN':
|
||||||
|
status = 'WARNING'
|
||||||
|
elif health == 'HEALTH_CRIT':
|
||||||
|
status = 'CRITICAL'
|
||||||
|
|
||||||
total_mon=len(data['monmap']['mons'])
|
total_mon = data['monmap'].get('num_mons', len(data['monmap'].get('mons', [])))
|
||||||
total_mon_up=len(data['health']['timechecks']['mons'])
|
if not total_mon:
|
||||||
|
print("UNKNOWN : fail to retreive total number of monitors")
|
||||||
|
sys.exit(STATUS['UNKNOWN'])
|
||||||
|
total_mon_up = len(data.get('quorum', data['health'].get('timechecks', dict()).get('mons', [])))
|
||||||
|
if not total_mon_up:
|
||||||
|
print("UNKNOWN : fail to retreive total number of UP monitors")
|
||||||
|
sys.exit(STATUS['UNKNOWN'])
|
||||||
|
|
||||||
num_lost_mon=total_mon-total_mon_up
|
num_lost_mon = total_mon-total_mon_up
|
||||||
if num_lost_mon==0:
|
if num_lost_mon == 0:
|
||||||
monstate="(MONs UP : %s/%s)" % (total_mon_up,total_mon)
|
monstate = "(MONs UP : %s/%s)" % (total_mon_up, total_mon)
|
||||||
else:
|
else:
|
||||||
monstate="%s MONs down (MONs UP : %s/%s)" % (num_lost_mon,total_mon_up,total_mon)
|
monstate = "%s MONs down (MONs UP : %s/%s)" % (num_lost_mon, total_mon_up, total_mon)
|
||||||
if num_lost_mon >= options.critlostmon:
|
if num_lost_mon >= options.critlostmon:
|
||||||
status='CRITICAL'
|
status = 'CRITICAL'
|
||||||
elif num_lost_mon >= options.warnlostmon and status!='CRITICAL':
|
elif num_lost_mon >= options.warnlostmon and status != 'CRITICAL':
|
||||||
status='WARNING'
|
status = 'WARNING'
|
||||||
|
|
||||||
total_osd=data['osdmap']['osdmap']['num_osds']
|
total_osd = data['osdmap'].get('osdmap', data['osdmap']).get('num_osds')
|
||||||
total_osd_up=data['osdmap']['osdmap']['num_up_osds']
|
if total_osd is None:
|
||||||
|
print("UNKNOWN : fail to retreive total number of OSD")
|
||||||
|
sys.exit(STATUS['UNKNOWN'])
|
||||||
|
total_osd_up = data['osdmap'].get('osdmap', data['osdmap']).get('num_up_osds')
|
||||||
|
if total_osd_up is None:
|
||||||
|
print("UNKNOWN : fail to retreive total number of UP OSD")
|
||||||
|
sys.exit(STATUS['UNKNOWN'])
|
||||||
|
|
||||||
num_lost_osd=total_osd-total_osd_up
|
num_lost_osd = total_osd - total_osd_up
|
||||||
|
|
||||||
if num_lost_osd>=options.critlostosd:
|
if num_lost_osd >= options.critlostosd:
|
||||||
status='CRITICAL'
|
status = 'CRITICAL'
|
||||||
elif num_lost_osd>=options.warnlostosd and status!='CRITICAL':
|
elif num_lost_osd >= options.warnlostosd and status != 'CRITICAL':
|
||||||
status='WARNING'
|
status = 'WARNING'
|
||||||
|
|
||||||
total_pg=data['pgmap']['num_pgs']
|
total_pg = data['pgmap']['num_pgs']
|
||||||
pgstate=""
|
pgstate = ""
|
||||||
for st in data['pgmap']['pgs_by_state']:
|
for st in data['pgmap']['pgs_by_state']:
|
||||||
if re.search('(down|inconsistent|imcomplete|stale)',st['state_name'],re.IGNORECASE):
|
if re.search('(down|inconsistent|imcomplete|stale)', st['state_name'], re.IGNORECASE):
|
||||||
status='CRITICAL'
|
status = 'CRITICAL'
|
||||||
pgstate="%s / %s PGs %s" % (pgstate,st['count'],st['state_name'])
|
pgstate = "%s / %s PGs %s" % (pgstate, st['count'], st['state_name'])
|
||||||
elif re.search('(replay|degraded|repair|recovering|backfill)',st['state_name'],re.IGNORECASE):
|
elif re.search('(replay|degraded|repair|recovering|backfill)', st['state_name'], re.IGNORECASE):
|
||||||
if status!='CRITICAL':
|
if status != 'CRITICAL':
|
||||||
status="WARNING"
|
status = "WARNING"
|
||||||
pgstate="%s / %s PGs %s" % (pgstate,st['count'],st['state_name'])
|
pgstate = "%s / %s PGs %s" % (pgstate, st['count'], st['state_name'])
|
||||||
elif st['state_name']=="active+clean":
|
elif st['state_name'] == "active+clean":
|
||||||
pgstate="%s / %s/%s PGs active+clean" % (pgstate,st['count'],total_pg)
|
pgstate = "%s / %s/%s PGs active+clean" % (pgstate, st['count'], total_pg)
|
||||||
|
|
||||||
msg="%s : %s%s %s" % (status,health,pgstate,monstate)
|
msg = "%s : %s%s %s" % (status, health, pgstate, monstate)
|
||||||
|
|
||||||
|
|
||||||
if num_lost_osd==0:
|
if num_lost_osd == 0:
|
||||||
print "%s (OSDs UP : %s/%s)" % (msg,total_osd_up,total_osd)
|
print("%s (OSDs UP : %s/%s)" % (msg, total_osd_up, total_osd))
|
||||||
else:
|
else:
|
||||||
print "%s / %s OSDs down (OSDs UP : %s/%s)" % (msg,num_lost_osd,total_osd_up,total_osd)
|
print("%s / %s OSDs down (OSDs UP : %s/%s)" % (msg, num_lost_osd, total_osd_up, total_osd))
|
||||||
sys.exit(STATUS[status])
|
sys.exit(STATUS[status])
|
||||||
else:
|
else:
|
||||||
print "UNKNOWN : fail to execute ceph status command"
|
print("UNKNOWN : fail to execute ceph status command")
|
||||||
sys.exit(STATUS['UNKNOWN'])
|
sys.exit(STATUS['UNKNOWN'])
|
||||||
|
|
Loading…
Reference in a new issue