check_esphome_devices/check_esphome_devices

204 lines
5.5 KiB
Python
Executable file

#!/usr/bin/python3
"""
Icinga/Nagios plugin to check ESPHome devices status using the ESPHome
Dashboard API.
Copyright (c) 2022 Benjamin Renard <brenard@zionetrix.net>
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License version 3
as published by the Free Software Foundation.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
"""
import argparse
import logging
import re
import sys
import time
import requests
# nagios exit code
STATUS = {"OK": 0, "WARNING": 1, "CRITICAL": 2, "UNKNOWN": 3}
DEFAULT_HOST = "http://127.0.0.1:6052"
DEFAULT_RETRY_COUNT = 4
DEFAULT_RETRY_DELAY = 1
DEFAULT_TIMEOUT = 10
parser = argparse.ArgumentParser()
parser.add_argument("-d", "--debug", action="store_true", dest="debug", default=False)
parser.add_argument(
"-H",
"--host",
action="store",
dest="host",
help=f"ESPHome dashboard URL (default: {DEFAULT_HOST})",
type=str,
default=DEFAULT_HOST,
)
parser.add_argument(
"-r",
"--retry",
action="store",
dest="retry_count",
help=("Number of retry to retrieve device status " f"(default: {DEFAULT_RETRY_COUNT})"),
type=int,
default=DEFAULT_RETRY_COUNT,
)
parser.add_argument(
"-D",
"--delay",
action="store",
dest="retry_delay",
help=(
"Delay in second between two retry to retrieve device status "
f"(default: {DEFAULT_RETRY_DELAY}s)"
),
type=int,
default=DEFAULT_RETRY_DELAY,
)
parser.add_argument(
"-t",
"--timeout",
action="store",
dest="timeout",
help=f"Timeout in second on API requests (default: {DEFAULT_TIMEOUT}s)",
type=int,
default=DEFAULT_TIMEOUT,
)
def exclude_pattern(value):
"""Check and compile exclusion pattern parameter"""
return re.compile(value)
parser.add_argument(
"-x",
"--exclude",
action="append",
dest="exclude",
help="Regex exclude pattern(s)",
type=exclude_pattern,
default=[],
)
options = parser.parse_args()
logging.basicConfig(
level=logging.DEBUG if options.debug else logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",
)
if options.host[-1] == "/":
options.host = options.host[-1]
def is_excluded(name):
"""Check if device is excluded"""
for pattern in options.exclude:
if pattern.search(name):
logging.debug("Device %s is excluded", name)
return True
logging.debug("Device %s is not excluded", name)
return False
r = requests.get(f"{options.host}/devices", timeout=options.timeout)
devices_data = r.json()
logging.debug("Devices data: %s (%s)", devices_data, type(devices_data))
if not devices_data:
print("UNKNOWN - Fail to retrieve devices using ESPHome Dashboard API")
sys.exit(STATUS["UNKNOWN"])
COUNT = 0
while COUNT < options.retry_count:
r = requests.get(f"{options.host}/ping", timeout=options.timeout)
COUNT += 1
ping_data = r.json()
logging.debug("Ping data: %s (%s)", ping_data, type(ping_data))
if ping_data:
UNREACHABLE = False
for dev in ping_data:
if not ping_data[dev] and not is_excluded(dev.replace(".yaml", "")):
UNREACHABLE = True
break
if not UNREACHABLE:
break
logging.debug("Wait %d seconds before retry...", options.retry_delay)
time.sleep(options.retry_delay)
if not ping_data:
print("UNKNOWN - Fail to retrieve devices status using ESPHome Dashboard API")
sys.exit(STATUS["UNKNOWN"])
UPDATE_AVAILABLE = 0
UNREACHABLE_DEVICES = 0
NO_PING_DATA = 0
errors = []
devices = {}
for dev in devices_data["configured"]:
devices[dev["name"]] = dev
logging.debug("Device %s: %s", dev["name"], dev)
if is_excluded(dev["name"]):
continue
if dev["deployed_version"] != dev["current_version"]:
UPDATE_AVAILABLE += 1
errors.append(
f'Update available for device {dev["name"]} '
f'({dev["deployed_version"]} => {dev["current_version"]})'
)
if dev["configuration"] not in ping_data:
NO_PING_DATA += 1
errors.append(f'No ping data found for device {dev["name"]} ' f'({dev["configuration"]})')
elif not ping_data[dev["configuration"]]:
UNREACHABLE_DEVICES += 1
errors.append(f'Device {dev["name"]} is unreachable')
if not errors:
print(f"OK - no problem detected on the {len(devices)} devices")
EXIT_STATUS = STATUS["OK"]
else:
msg = []
if UNREACHABLE_DEVICES:
msg.append(f"{UNREACHABLE_DEVICES} unreachable devices")
if NO_PING_DATA:
msg.append(f"{NO_PING_DATA} missing ping device status")
if UPDATE_AVAILABLE:
msg.append(f"{UPDATE_AVAILABLE} update available")
print(f'WARNING - {", ".join(msg)}')
print("\n".join([f"- {error}" for error in errors]))
EXIT_STATUS = STATUS["WARNING"]
print(
"\nDevices:\n"
+ "\n".join(
[
(
f"- {name} (version = "
f'{dev["deployed_version"] if dev["deployed_version"] else "unknown"}'
f', address = {dev["address"] if dev["address"] else "unknown"})'
)
for name, dev in devices.items()
]
)
)
sys.exit(EXIT_STATUS)