#!/usr/bin/python # # Nagios plugin to check Ceph cluster usage # # Usage: check_ceph_usage [options] # # Options: # -h, --help show this help message and exit # -d, --debug # -b BIN, --bin=BIN Ceph binary (default : /usr/bin/ceph) # --conf=CONF Ceph configuration file # -m MON, --mon=MON Ceph monitor address[:port] # -i ID, --id=ID Ceph client id # -k KEYRING, --keyring=KEYRING # Ceph client keyring file # -w WARNDATA, --warning-data=WARNDATA # Warning data threshold (default : 70%) # -c CRITDATA, --critical-data=CRITDATA # Critical data threshold (default : 85%) # -W WARNALLOC, --warning-allocated=WARNALLOC # Warning allocated threshold (default : 80%) # -C CRITALLOC, --critical-allocated=CRITALLOC # Critical allocated threshold (default : 90%) # # Copyright (c) 2013 Benjamin Renard # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License version 2 # as published by the Free Software Foundation. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # import sys,os,json,subprocess from optparse import OptionParser # default ceph values CEPH_COMMAND = '/usr/bin/ceph' WARN_DATA = 70 CRIT_DATA = 85 WARN_ALLOC = 80 CRIT_ALLOC = 90 # nagios exit code STATUS_OK = 0 STATUS_WARNING = 1 STATUS_ERROR = 2 STATUS_UNKNOWN = 3 parser = OptionParser() parser.add_option('-d', '--debug', action="store_true", dest="debug", default=False) parser.add_option('-b', '--bin', action="store", dest="bin", help="Ceph binary (default : %s)" % CEPH_COMMAND, type='string', default=CEPH_COMMAND) parser.add_option('--conf', action="store", dest="conf", help="Ceph configuration file", type='string', default=None) parser.add_option('-m', '--mon', action="store", dest="mon", help="Ceph monitor address[:port]", type='string', default=None) parser.add_option('-i', '--id', action="store", dest="id", help="Ceph client id", type='string', default=None) parser.add_option('-k', '--keyring', action="store", dest="keyring", help="Ceph client keyring file", type='string', default=None) parser.add_option('-w', '--warning-data', action="store", dest="warndata", help="Warning data threshold (default : %s%%)" % WARN_DATA, type='int', default=WARN_DATA) parser.add_option('-c', '--critical-data', action="store", dest="critdata", help="Critical data threshold (default : %s%%)" % CRIT_DATA, type='int', default=CRIT_DATA) parser.add_option('-W', '--warning-allocated', action="store", dest="warnalloc", help="Warning allocated threshold (default : %s%%)" % WARN_ALLOC, type='int', default=WARN_ALLOC) parser.add_option('-C', '--critical-allocated', action="store", dest="critalloc", help="Critical allocated threshold (default : %s%%)" % CRIT_ALLOC, type='int', default=CRIT_ALLOC) (options, args) = parser.parse_args() # validate args if not os.path.exists(options.bin): print "ERROR: ceph executable '%s' doesn't exist" % options.bin sys.exit(STATUS_UNKNOWN) if options.conf and not os.path.exists(options.conf): print "ERROR: ceph conf file '%s' doesn't exist" % options.conf sys.exit(STATUS_UNKNOWN) if options.keyring and not os.path.exists(options.keyring): print "ERROR: keyring file '%s' doesn't exist" % options.keyring sys.exit(STATUS_UNKNOWN) # build command ceph_cmd = [options.bin] if options.mon: ceph_cmd.append('-m') ceph_cmd.append(options.mon) if options.conf: ceph_cmd.append('-c') ceph_cmd.append(options.conf) if options.id: ceph_cmd.append('--id') ceph_cmd.append(options.id) if options.keyring: ceph_cmd.append('--keyring') ceph_cmd.append(options.keyring) ceph_cmd.append('status') ceph_cmd.append('--format=json') # exec command p = subprocess.Popen(ceph_cmd,stdout=subprocess.PIPE,stderr=subprocess.PIPE) output, err = p.communicate() if output: data=json.loads(output) if 'pgmap' not in data: print "UNKNOWN : fail to read pgmap status" sys.exit(STATUS_UNKNOWN) if options.debug: print "data : %s" % data['pgmap']['data_bytes'] print "allocated : %s" % data['pgmap']['bytes_used'] print "total : %s" % data['pgmap']['bytes_total'] PER_DATA=round(int(data['pgmap']['data_bytes'])*100/int(data['pgmap']['bytes_total']),1) DATA_WARN_T=int(int(data['pgmap']['bytes_total'])*options.warndata/100) DATA_CRIT_T=int(int(data['pgmap']['bytes_total'])*options.critdata/100) PER_ALLOC=round(int(data['pgmap']['bytes_used'])*100/int(data['pgmap']['bytes_total']),1) ALLOC_WARN_T=int(int(data['pgmap']['bytes_total'])*options.warnalloc/100) ALLOC_CRIT_T=int(int(data['pgmap']['bytes_total'])*options.critalloc/100) if options.debug: print "%% data : %s" % PER_DATA print "%% allocated : %s" % PER_ALLOC STATUS=STATUS_OK STATUS_TXT="OK" if PER_DATA > options.critdata or PER_ALLOC > options.critalloc: STATUS=STATUS_CRITICAL STATUS_TXT="CRITICAL" elif PER_DATA > options.warndata or PER_ALLOC > options.warnalloc: STATUS=STATUS_WARNING STATUS_TXT="WARNING" print "%s : %s%% allocated / %s%% really used|allocated=%sB;%s;%s;0;%s,used=%sB;%s;%s;0;%s" % (STATUS_TXT,PER_ALLOC,PER_DATA,data['pgmap']['bytes_used'],ALLOC_WARN_T,ALLOC_CRIT_T,data['pgmap']['bytes_total'],data['pgmap']['data_bytes'],DATA_WARN_T,DATA_CRIT_T,data['pgmap']['bytes_total']) sys.exit(STATUS) else: print "UNKNOWN : fail to execute ceph status command" sys.exit(STATUS_UNKNOWN)