#!/usr/bin/env python """ Nagios plugin to check Ceph cluster usage Usage: check_ceph_usage [options] Options: -h, --help show this help message and exit -d, --debug -b BIN, --bin=BIN Ceph binary (default: /usr/bin/ceph) --conf=CONF Ceph configuration file -m MON, --mon=MON Ceph monitor address[:port] -i ID, --id=ID Ceph client id -k KEYRING, --keyring=KEYRING Ceph client keyring file -w WARNDATA, --warning-data=WARNDATA Warning data threshold (default: 70%) -c CRITDATA, --critical-data=CRITDATA Critical data threshold (default: 85%) -W WARNALLOC, --warning-allocated=WARNALLOC Warning allocated threshold (default: 80%) -C CRITALLOC, --critical-allocated=CRITALLOC Critical allocated threshold (default: 90%) Copyright (c) 2013 Benjamin Renard This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License version 2 as published by the Free Software Foundation. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. """ import sys import os import json import subprocess import argparse # default ceph values CEPH_COMMAND = '/usr/bin/ceph' WARN_DATA = 70 CRIT_DATA = 85 WARN_ALLOC = 80 CRIT_ALLOC = 90 # nagios exit code STATUS = { 'OK': 0, 'WARNING': 1, 'CRITICAL': 2, 'UNKNOWN': 3 } parser = argparse.ArgumentParser() parser.add_argument( '-d', '--debug', action="store_true", dest="debug", default=False ) parser.add_argument( '-b', '--bin', action="store", dest="bin", help="Ceph binary (default: %s)" % CEPH_COMMAND, type=str, default=CEPH_COMMAND ) parser.add_argument( '--conf', action="store", dest="conf", help="Ceph configuration file", type=str, default=None ) parser.add_argument( '-m', '--mon', action="store", dest="mon", help="Ceph monitor address[:port]", type=str, default=None ) parser.add_argument( '-i', '--id', action="store", dest="id", help="Ceph client id", type=str, default=None ) parser.add_argument( '-k', '--keyring', action="store", dest="keyring", help="Ceph client keyring file", type=str, default=None ) parser.add_argument( '-w', '--warning-data', action="store", dest="warndata", help="Warning data threshold (default: %s%%%%)" % WARN_DATA, type=int, default=WARN_DATA ) parser.add_argument( '-c', '--critical-data', action="store", dest="critdata", help="Critical data threshold (default: %s%%%%)" % CRIT_DATA, type=int, default=CRIT_DATA ) parser.add_argument( '-W', '--warning-allocated', action="store", dest="warnalloc", help="Warning allocated threshold (default: %s%%%%)" % WARN_ALLOC, type=int, default=WARN_ALLOC ) parser.add_argument( '-C', '--critical-allocated', action="store", dest="critalloc", help="Critical allocated threshold (default: %s%%%%)" % CRIT_ALLOC, type=int, default=CRIT_ALLOC ) options = parser.parse_args() # validate args if not os.path.exists(options.bin): print("ERROR: ceph executable '%s' doesn't exist" % options.bin) sys.exit(STATUS['UNKNOWN']) if options.conf and not os.path.exists(options.conf): print("ERROR: ceph conf file '%s' doesn't exist" % options.conf) sys.exit(STATUS['UNKNOWN']) if options.keyring and not os.path.exists(options.keyring): print("ERROR: keyring file '%s' doesn't exist" % options.keyring) sys.exit(STATUS['UNKNOWN']) # build command ceph_cmd = [options.bin] if options.mon: ceph_cmd.append('-m') ceph_cmd.append(options.mon) if options.conf: ceph_cmd.append('-c') ceph_cmd.append(options.conf) if options.id: ceph_cmd.append('--id') ceph_cmd.append(options.id) if options.keyring: ceph_cmd.append('--keyring') ceph_cmd.append(options.keyring) ceph_cmd.append('status') ceph_cmd.append('--format=json') # exec command # pylint: disable=consider-using-with p = subprocess.Popen(ceph_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) output, err = p.communicate() if not output: print("UNKNOWN: fail to execute ceph status command") sys.exit(STATUS['UNKNOWN']) data = json.loads(output.decode(sys.getdefaultencoding())) if 'pgmap' not in data: print("UNKNOWN: fail to read pgmap status") sys.exit(STATUS['UNKNOWN']) if options.debug: print("data: %s" % data['pgmap']['data_bytes']) print("allocated: %s" % data['pgmap']['bytes_used']) print("total: %s" % data['pgmap']['bytes_total']) PER_DATA = round( int(data['pgmap']['data_bytes']) * 100 / int(data['pgmap']['bytes_total']), 1) DATA_WARN_T = int(int(data['pgmap']['bytes_total']) * options.warndata / 100) DATA_CRIT_T = int(int(data['pgmap']['bytes_total']) * options.critdata / 100) PER_ALLOC = round( int(data['pgmap']['bytes_used']) * 100 / int(data['pgmap']['bytes_total']), 1) ALLOC_WARN_T = int(int(data['pgmap']['bytes_total']) * options.warnalloc / 100) ALLOC_CRIT_T = int(int(data['pgmap']['bytes_total']) * options.critalloc / 100) if options.debug: print("%% data: %s" % PER_DATA) print("%% allocated: %s" % PER_ALLOC) if PER_DATA > options.critdata or PER_ALLOC > options.critalloc: STATUS_TXT = 'CRITICAL' elif PER_DATA > options.warndata or PER_ALLOC > options.warnalloc: STATUS_TXT = 'WARNING' else: STATUS_TXT = 'OK' print( "%s - %s%% allocated / %s%% really used|" "allocated=%sB;%s;%s;0;%s,used=%sB;%s;%s;0;%s" % ( STATUS_TXT, PER_ALLOC, PER_DATA, data['pgmap']['bytes_used'], ALLOC_WARN_T, ALLOC_CRIT_T, data['pgmap']['bytes_total'], data['pgmap']['data_bytes'], DATA_WARN_T, DATA_CRIT_T, data['pgmap']['bytes_total'] ) ) sys.exit(STATUS[STATUS_TXT])