#!/usr/bin/python3 """ Icinga/Nagios plugin to check ESPHome devices status using the ESPHome Dashboard API. Copyright (c) 2022 Benjamin Renard This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License version 3 as published by the Free Software Foundation. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. """ import argparse import logging import re import sys import time import requests # nagios exit code STATUS = {"OK": 0, "WARNING": 1, "CRITICAL": 2, "UNKNOWN": 3} DEFAULT_HOST = "http://127.0.0.1:6052" DEFAULT_RETRY_COUNT = 4 DEFAULT_RETRY_DELAY = 1 DEFAULT_TIMEOUT = 10 parser = argparse.ArgumentParser() parser.add_argument("-d", "--debug", action="store_true", dest="debug", default=False) parser.add_argument( "-H", "--host", action="store", dest="host", help=f"ESPHome dashboard URL (default: {DEFAULT_HOST})", type=str, default=DEFAULT_HOST, ) parser.add_argument( "-r", "--retry", action="store", dest="retry_count", help=("Number of retry to retrieve device status " f"(default: {DEFAULT_RETRY_COUNT})"), type=int, default=DEFAULT_RETRY_COUNT, ) parser.add_argument( "-D", "--delay", action="store", dest="retry_delay", help=( "Delay in second between two retry to retrieve device status " f"(default: {DEFAULT_RETRY_DELAY}s)" ), type=int, default=DEFAULT_RETRY_DELAY, ) parser.add_argument( "-t", "--timeout", action="store", dest="timeout", help=f"Timeout in second on API requests (default: {DEFAULT_TIMEOUT}s)", type=int, default=DEFAULT_TIMEOUT, ) def exclude_pattern(value): """Check and compile exclusion pattern parameter""" return re.compile(value) parser.add_argument( "-x", "--exclude", action="append", dest="exclude", help="Regex exclude pattern(s)", type=exclude_pattern, default=[], ) options = parser.parse_args() logging.basicConfig( level=logging.DEBUG if options.debug else logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s", ) if options.host[-1] == "/": options.host = options.host[-1] def is_excluded(name): """Check if device is excluded""" for pattern in options.exclude: if pattern.search(name): logging.debug("Device %s is excluded", name) return True logging.debug("Device %s is not excluded", name) return False r = requests.get(f"{options.host}/devices", timeout=options.timeout) devices_data = r.json() logging.debug("Devices data: %s (%s)", devices_data, type(devices_data)) if not devices_data: print("UNKNOWN - Fail to retrieve devices using ESPHome Dashboard API") sys.exit(STATUS["UNKNOWN"]) COUNT = 0 while COUNT < options.retry_count: r = requests.get(f"{options.host}/ping", timeout=options.timeout) COUNT += 1 ping_data = r.json() logging.debug("Ping data: %s (%s)", ping_data, type(ping_data)) if ping_data: UNREACHABLE = False for dev in ping_data: if not ping_data[dev] and not is_excluded(dev.replace(".yaml", "")): UNREACHABLE = True break if not UNREACHABLE: break logging.debug("Wait %d seconds before retry...", options.retry_delay) time.sleep(options.retry_delay) if not ping_data: print("UNKNOWN - Fail to retrieve devices status using ESPHome Dashboard API") sys.exit(STATUS["UNKNOWN"]) UPDATE_AVAILABLE = 0 UNREACHABLE_DEVICES = 0 NO_PING_DATA = 0 errors = [] devices = {} for dev in devices_data["configured"]: devices[dev["name"]] = dev logging.debug("Device %s: %s", dev["name"], dev) if is_excluded(dev["name"]): continue if dev["deployed_version"] != dev["current_version"]: UPDATE_AVAILABLE += 1 errors.append( f'Update available for device {dev["name"]} ' f'({dev["deployed_version"]} => {dev["current_version"]})' ) if dev["configuration"] not in ping_data: NO_PING_DATA += 1 errors.append(f'No ping data found for device {dev["name"]} ' f'({dev["configuration"]})') elif not ping_data[dev["configuration"]]: UNREACHABLE_DEVICES += 1 errors.append(f'Device {dev["name"]} is unreachable') if not errors: print(f"OK - no problem detected on the {len(devices)} devices") EXIT_STATUS = STATUS["OK"] else: msg = [] if UNREACHABLE_DEVICES: msg.append(f"{UNREACHABLE_DEVICES} unreachable devices") if NO_PING_DATA: msg.append(f"{NO_PING_DATA} missing ping device status") if UPDATE_AVAILABLE: msg.append(f"{UPDATE_AVAILABLE} update available") print(f'WARNING - {", ".join(msg)}') print("\n".join([f"- {error}" for error in errors])) EXIT_STATUS = STATUS["WARNING"] print( "\nDevices:\n" + "\n".join( [ ( f"- {name} (version = " f'{dev["deployed_version"] if dev["deployed_version"] else "unknown"}' f', address = {dev["address"] if dev["address"] else "unknown"})' ) for name, dev in devices.items() ] ) ) sys.exit(EXIT_STATUS)