From fadfa27591da2271db54a38b16320672af96300b Mon Sep 17 00:00:00 2001 From: Benjamin Renard Date: Tue, 3 Dec 2013 11:00:39 +0100 Subject: [PATCH] Initial commit --- .gitignore | 1 + README | 47 +++++++++++ check_ceph_usage | 207 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 255 insertions(+) create mode 100644 .gitignore create mode 100644 README create mode 100755 check_ceph_usage diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b25c15b --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*~ diff --git a/README b/README new file mode 100644 index 0000000..ce70d4f --- /dev/null +++ b/README @@ -0,0 +1,47 @@ +Nagios plugin to check Ceph cluster usage +========================================= + +Usage +----- + + Usage: check_ceph_usage [options] + + Options: + -h, --help show this help message and exit + -d, --debug + -b BIN, --bin=BIN Ceph binary (default : /usr/bin/ceph) + --conf=CONF Ceph configuration file + -m MON, --mon=MON Ceph monitor address[:port] + -i ID, --id=ID Ceph client id + -k KEYRING, --keyring=KEYRING + Ceph client keyring file + -w WARNDATA, --warning-data=WARNDATA + Warning data threshold (default : 70%) + -c CRITDATA, --critical-data=CRITDATA + Critical data threshold (default : 85%) + -W WARNALLOC, --warning-allocated=WARNALLOC + Warning allocated threshold (default : 80%) + -C CRITALLOC, --critical-allocated=CRITALLOC + Critical allocated threshold (default : 90%) + +Copyright +--------- + +Copyright (c) 2013 Benjamin Renard + +License +------- + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License version 2 +as published by the Free Software Foundation. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + diff --git a/check_ceph_usage b/check_ceph_usage new file mode 100755 index 0000000..4c9446c --- /dev/null +++ b/check_ceph_usage @@ -0,0 +1,207 @@ +#!/usr/bin/python +# +# Nagios plugin to check Ceph cluster usage +# +# Usage: check_ceph_usage [options] +# +# Options: +# -h, --help show this help message and exit +# -d, --debug +# -b BIN, --bin=BIN Ceph binary (default : /usr/bin/ceph) +# --conf=CONF Ceph configuration file +# -m MON, --mon=MON Ceph monitor address[:port] +# -i ID, --id=ID Ceph client id +# -k KEYRING, --keyring=KEYRING +# Ceph client keyring file +# -w WARNDATA, --warning-data=WARNDATA +# Warning data threshold (default : 70%) +# -c CRITDATA, --critical-data=CRITDATA +# Critical data threshold (default : 85%) +# -W WARNALLOC, --warning-allocated=WARNALLOC +# Warning allocated threshold (default : 80%) +# -C CRITALLOC, --critical-allocated=CRITALLOC +# Critical allocated threshold (default : 90%) +# +# Copyright (c) 2013 Benjamin Renard +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License version 2 +# as published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# + +import sys,os,json,subprocess +from optparse import OptionParser + +# default ceph values +CEPH_COMMAND = '/usr/bin/ceph' +WARN_DATA = 70 +CRIT_DATA = 85 +WARN_ALLOC = 80 +CRIT_ALLOC = 90 + +# nagios exit code +STATUS_OK = 0 +STATUS_WARNING = 1 +STATUS_ERROR = 2 +STATUS_UNKNOWN = 3 + +parser = OptionParser() +parser.add_option('-d', + '--debug', + action="store_true", + dest="debug", + default=False) + +parser.add_option('-b', + '--bin', + action="store", + dest="bin", + help="Ceph binary (default : %s)" % CEPH_COMMAND, + type='string', + default=CEPH_COMMAND) + +parser.add_option('--conf', + action="store", + dest="conf", + help="Ceph configuration file", + type='string', + default=None) + +parser.add_option('-m', + '--mon', + action="store", + dest="mon", + help="Ceph monitor address[:port]", + type='string', + default=None) + +parser.add_option('-i', + '--id', + action="store", + dest="id", + help="Ceph client id", + type='string', + default=None) + +parser.add_option('-k', + '--keyring', + action="store", + dest="keyring", + help="Ceph client keyring file", + type='string', + default=None) + +parser.add_option('-w', + '--warning-data', + action="store", + dest="warndata", + help="Warning data threshold (default : %s%%)" % WARN_DATA, + type='int', + default=WARN_DATA) + +parser.add_option('-c', + '--critical-data', + action="store", + dest="critdata", + help="Critical data threshold (default : %s%%)" % CRIT_DATA, + type='int', + default=CRIT_DATA) + +parser.add_option('-W', + '--warning-allocated', + action="store", + dest="warnalloc", + help="Warning allocated threshold (default : %s%%)" % WARN_ALLOC, + type='int', + default=WARN_ALLOC) + +parser.add_option('-C', + '--critical-allocated', + action="store", + dest="critalloc", + help="Critical allocated threshold (default : %s%%)" % CRIT_ALLOC, + type='int', + default=CRIT_ALLOC) + +(options, args) = parser.parse_args() + + # validate args +if not os.path.exists(options.bin): + print "ERROR: ceph executable '%s' doesn't exist" % options.bin + sys.exit(STATUS_UNKNOWN) + +if options.conf and not os.path.exists(options.conf): + print "ERROR: ceph conf file '%s' doesn't exist" % options.conf + sys.exit(STATUS_UNKNOWN) + +if options.keyring and not os.path.exists(options.keyring): + print "ERROR: keyring file '%s' doesn't exist" % options.keyring + sys.exit(STATUS_UNKNOWN) + +# build command +ceph_cmd = [options.bin] +if options.mon: + ceph_cmd.append('-m') + ceph_cmd.append(options.mon) +if options.conf: + ceph_cmd.append('-c') + ceph_cmd.append(options.conf) +if options.id: + ceph_cmd.append('--id') + ceph_cmd.append(options.id) +if options.keyring: + ceph_cmd.append('--keyring') + ceph_cmd.append(options.keyring) +ceph_cmd.append('status') +ceph_cmd.append('--format=json') + +# exec command +p = subprocess.Popen(ceph_cmd,stdout=subprocess.PIPE,stderr=subprocess.PIPE) +output, err = p.communicate() + +if output: + data=json.loads(output) + if 'pgmap' not in data: + print "UNKNOWN : fail to read pgmap status" + sys.exit(STATUS_UNKNOWN) + + if options.debug: + print "data : %s" % data['pgmap']['data_bytes'] + print "allocated : %s" % data['pgmap']['bytes_used'] + print "total : %s" % data['pgmap']['bytes_total'] + + PER_DATA=round(int(data['pgmap']['data_bytes'])*100/int(data['pgmap']['bytes_total']),1) + DATA_WARN_T=int(int(data['pgmap']['bytes_total'])*options.warndata/100) + DATA_CRIT_T=int(int(data['pgmap']['bytes_total'])*options.critdata/100) + PER_ALLOC=round(int(data['pgmap']['bytes_used'])*100/int(data['pgmap']['bytes_total']),1) + ALLOC_WARN_T=int(int(data['pgmap']['bytes_total'])*options.warnalloc/100) + ALLOC_CRIT_T=int(int(data['pgmap']['bytes_total'])*options.critalloc/100) + + + if options.debug: + print "%% data : %s" % PER_DATA + print "%% allocated : %s" % PER_ALLOC + + STATUS=STATUS_OK + STATUS_TXT="OK" + if PER_DATA > options.critdata or PER_ALLOC > options.critalloc: + STATUS=STATUS_CRITICAL + STATUS_TXT="CRITICAL" + elif PER_DATA > options.warndata or PER_ALLOC > options.warnalloc: + STATUS=STATUS_WARNING + STATUS_TXT="WARNING" + + print "%s : %s%% allocated / %s%% really used|allocated=%sB;%s;%s;0;%s,used=%sB;%s;%s;0;%s" % (STATUS_TXT,PER_ALLOC,PER_DATA,data['pgmap']['bytes_used'],ALLOC_WARN_T,ALLOC_CRIT_T,data['pgmap']['bytes_total'],data['pgmap']['data_bytes'],DATA_WARN_T,DATA_CRIT_T,data['pgmap']['bytes_total']) + sys.exit(STATUS) +else: + print "UNKNOWN : fail to execute ceph status command" + sys.exit(STATUS_UNKNOWN)