check_ceph_usage/check_ceph_usage
2021-05-21 13:01:34 +02:00

226 lines
6.2 KiB
Python
Executable file

#!/usr/bin/env python
#
# Nagios plugin to check Ceph cluster usage
#
# Usage: check_ceph_usage [options]
#
# Options:
# -h, --help show this help message and exit
# -d, --debug
# -b BIN, --bin=BIN Ceph binary (default: /usr/bin/ceph)
# --conf=CONF Ceph configuration file
# -m MON, --mon=MON Ceph monitor address[:port]
# -i ID, --id=ID Ceph client id
# -k KEYRING, --keyring=KEYRING
# Ceph client keyring file
# -w WARNDATA, --warning-data=WARNDATA
# Warning data threshold (default: 70%)
# -c CRITDATA, --critical-data=CRITDATA
# Critical data threshold (default: 85%)
# -W WARNALLOC, --warning-allocated=WARNALLOC
# Warning allocated threshold (default: 80%)
# -C CRITALLOC, --critical-allocated=CRITALLOC
# Critical allocated threshold (default: 90%)
#
# Copyright (c) 2013 Benjamin Renard <brenard@zionetrix.net>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License version 2
# as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
import sys
import os
import json
import subprocess
import argparse
# default ceph values
CEPH_COMMAND = '/usr/bin/ceph'
WARN_DATA = 70
CRIT_DATA = 85
WARN_ALLOC = 80
CRIT_ALLOC = 90
# nagios exit code
STATUS = {
'OK': 0,
'WARNING': 1,
'CRITICAL': 2,
'UNKNOWN': 3
}
parser = argparse.ArgumentParser()
parser.add_argument(
'-d', '--debug',
action="store_true",
dest="debug",
default=False
)
parser.add_argument(
'-b', '--bin',
action="store",
dest="bin",
help="Ceph binary (default: %s)" % CEPH_COMMAND,
type=str,
default=CEPH_COMMAND
)
parser.add_argument(
'--conf',
action="store",
dest="conf",
help="Ceph configuration file",
type=str,
default=None
)
parser.add_argument(
'-m', '--mon',
action="store",
dest="mon",
help="Ceph monitor address[:port]",
type=str,
default=None
)
parser.add_argument(
'-i', '--id',
action="store",
dest="id",
help="Ceph client id",
type=str,
default=None
)
parser.add_argument(
'-k', '--keyring',
action="store",
dest="keyring",
help="Ceph client keyring file",
type=str,
default=None
)
parser.add_argument(
'-w', '--warning-data',
action="store",
dest="warndata",
help="Warning data threshold (default: %s%%%%)" % WARN_DATA,
type=int,
default=WARN_DATA
)
parser.add_argument(
'-c', '--critical-data',
action="store",
dest="critdata",
help="Critical data threshold (default: %s%%%%)" % CRIT_DATA,
type=int,
default=CRIT_DATA
)
parser.add_argument(
'-W', '--warning-allocated',
action="store",
dest="warnalloc",
help="Warning allocated threshold (default: %s%%%%)" % WARN_ALLOC,
type=int,
default=WARN_ALLOC
)
parser.add_argument(
'-C', '--critical-allocated',
action="store",
dest="critalloc",
help="Critical allocated threshold (default: %s%%%%)" % CRIT_ALLOC,
type=int,
default=CRIT_ALLOC
)
options = parser.parse_args()
# validate args
if not os.path.exists(options.bin):
print("ERROR: ceph executable '%s' doesn't exist" % options.bin)
sys.exit(STATUS['UNKNOWN'])
if options.conf and not os.path.exists(options.conf):
print("ERROR: ceph conf file '%s' doesn't exist" % options.conf)
sys.exit(STATUS['UNKNOWN'])
if options.keyring and not os.path.exists(options.keyring):
print("ERROR: keyring file '%s' doesn't exist" % options.keyring)
sys.exit(STATUS['UNKNOWN'])
# build command
ceph_cmd = [options.bin]
if options.mon:
ceph_cmd.append('-m')
ceph_cmd.append(options.mon)
if options.conf:
ceph_cmd.append('-c')
ceph_cmd.append(options.conf)
if options.id:
ceph_cmd.append('--id')
ceph_cmd.append(options.id)
if options.keyring:
ceph_cmd.append('--keyring')
ceph_cmd.append(options.keyring)
ceph_cmd.append('status')
ceph_cmd.append('--format=json')
# exec command
p = subprocess.Popen(ceph_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
output, err = p.communicate()
if not output:
print("UNKNOWN: fail to execute ceph status command")
sys.exit(STATUS['UNKNOWN'])
data = json.loads(output.decode(sys.getdefaultencoding()))
if 'pgmap' not in data:
print("UNKNOWN: fail to read pgmap status")
sys.exit(STATUS['UNKNOWN'])
if options.debug:
print("data: %s" % data['pgmap']['data_bytes'])
print("allocated: %s" % data['pgmap']['bytes_used'])
print("total: %s" % data['pgmap']['bytes_total'])
PER_DATA = round(int(data['pgmap']['data_bytes']) * 100 / int(data['pgmap']['bytes_total']), 1)
DATA_WARN_T = int(int(data['pgmap']['bytes_total']) * options.warndata / 100)
DATA_CRIT_T = int(int(data['pgmap']['bytes_total']) * options.critdata / 100)
PER_ALLOC = round(int(data['pgmap']['bytes_used']) * 100 / int(data['pgmap']['bytes_total']), 1)
ALLOC_WARN_T = int(int(data['pgmap']['bytes_total']) * options.warnalloc / 100)
ALLOC_CRIT_T = int(int(data['pgmap']['bytes_total']) * options.critalloc / 100)
if options.debug:
print("%% data: %s" % PER_DATA)
print("%% allocated: %s" % PER_ALLOC)
if PER_DATA > options.critdata or PER_ALLOC > options.critalloc:
STATUS_TXT = 'CRITICAL'
elif PER_DATA > options.warndata or PER_ALLOC > options.warnalloc:
STATUS_TXT = 'WARNING'
else:
STATUS_TXT = 'OK'
print(
"%s - %s%% allocated / %s%% really used|allocated=%sB;%s;%s;0;%s,used=%sB;%s;%s;0;%s" % (
STATUS_TXT, PER_ALLOC, PER_DATA,
data['pgmap']['bytes_used'], ALLOC_WARN_T, ALLOC_CRIT_T, data['pgmap']['bytes_total'],
data['pgmap']['data_bytes'], DATA_WARN_T, DATA_CRIT_T, data['pgmap']['bytes_total']
)
)
sys.exit(STATUS[STATUS_TXT])