From 9f08367de6e69fbbe8224a98d62e6fc715ffd133 Mon Sep 17 00:00:00 2001 From: Jonny007-MKD Date: Thu, 15 May 2025 14:17:22 +0100 Subject: [PATCH] Add initial version --- check_iperf3.py | 436 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 436 insertions(+) create mode 100755 check_iperf3.py diff --git a/check_iperf3.py b/check_iperf3.py new file mode 100755 index 0000000..2decde2 --- /dev/null +++ b/check_iperf3.py @@ -0,0 +1,436 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +""" +This module provides a Nagios plugin to run and evaluate network performance tests using iperf3. + +The script executes an iperf3 test, evaluates the results based on user-defined thresholds, and +outputs a formatted Nagios status message along with performance data. The script supports both +TCP and UDP tests, bidirectional testing, and customizable thresholds for throughput and +retransmissions. + +Key functions in this module: +- `run_iperf3`: Executes an iperf3 test and returns the parsed JSON output. +- `determine_result`: Analyzes the iperf3 output and determines the Nagios status code and message. +- `build_perfdata`: Constructs the Nagios performance data string based on iperf3 results. +- `check_iperf3`: Orchestrates the test execution, result evaluation, and Nagios output generation. + +Command-line interface (CLI): +- The script expects command-line arguments to configure the iperf3 test. These arguments are parsed + into an `options` object, which is then passed through the testing and evaluation functions. + +Usage: + Run this module as a standalone script to initiate a network performance test: + + python check_iperf3.py --remote --port --rate-critical ... + +The `options` object is populated with the user-specified parameters, and the `check_iperf3` +function is invoked to execute the test, evaluate the results, and output Nagios-compatible +status and performance data. + +Example: + python check_iperf3.py --remote 192.168.1.1 --port 5201 --rate-critical 1000000000 ... + +This would run an iperf3 test against the specified remote host and port, evaluating the throughput +against the provided critical and warning thresholds, and outputting the result in Nagios format. +""" + + +from optparse import OptionParser, OptionGroup +import subprocess +import json +import sys +from typing import Tuple, Optional, List, Union, NoReturn + + +def nagexit(exit_code: int, statuslines: Union[str, List[str]], perfdata: Optional[List[str]]=None) -> NoReturn: + """ + Exits this Nagios plugin with the appropriate status message and performance data. + + This function formats and prints a Nagios-compatible status line and exits the + script with the provided exit code. The exit code must be one of the standard + Nagios status codes: + 0 - OK + 1 - WARNING + 2 - CRITICAL + 3 - UNKNOWN + + Args: + exit_code (int): The exit code indicating the plugin result. Must be 0, 1, 2, or 3. + statuslines (str | List[str]): The human-readable status message(s). If a list + is provided, each item will be printed on a new line. + perfdata (List, optional): A list of performance data strings to be appended + in Nagios performance data format. Defaults to None. + + Returns: + NoReturn: This function does not return; it exits the program using sys.exit(). + """ + + status_code = {0: 'OK', 1: 'WARNING', + 2: 'CRITICAL', 3: 'UNKNOWN'}[exit_code] + status = statuslines if isinstance(statuslines, str) else "\n".join(statuslines) + perf = "|" + ("; ".join(perfdata)) if perfdata else "" + if status: + print(f"{status_code}: {status}{perf}") + else: + print(f"{status_code}{perf}") + sys.exit(exit_code) + + +def make_iperf_cmdline(options) -> List[str]: + """ + Constructs the command-line arguments for an iperf3 network performance test. + + This function generates a list of command-line arguments for running `iperf3` + based on the provided options. It supports TCP/UDP, bidirectional or reverse + tests, and can specify either a data limit in bytes or a time duration. + + Args: + options: An object containing the following attributes: + - remote (str): The target hostname or IP address of the iperf3 server. + - port (int): The port number to connect to on the server. + - bidir (bool): If True, run a bidirectional test (overrides downstream). + - downstream (bool): If True, run a reverse test (server to client). + - udp (bool): If True, use UDP instead of TCP. + - bytes (str): Optional. Amount of data to transfer (e.g., "10M"). + - time (int): Duration of the test in seconds (used if `bytes` is not set). + + Returns: + List[str]: A list of command-line arguments for iperf3. + """ + params = [ + "iperf3", + "--json", + "--client", options.remote, + "--port", str(options.port), + "--connect-timeout", "10000", + ] + if options.bidir: + params.append("--bidir") + elif options.downstream: + params.append("--reverse") + if options.udp: + params.append("--udp") + if options.bytes: + params += ["--bytes", options.bytes] + else: + params += ["--time", str(options.time)] + return params + + +def run_iperf3(options) -> Union[dict, NoReturn]: + """ + Executes an iperf3 test using the given options and returns the parsed JSON result. + + This function builds the iperf3 command line using `make_iperf_cmdline(options)`, + executes the command, and returns the resulting output as a parsed JSON dictionary. + If the iperf3 executable is not found, or the command fails, it exits the program + with an appropriate Nagios status using `nagexit`. + + Args: + options: An object containing iperf3 options. Must be compatible with + `make_iperf_cmdline()` (i.e., have attributes like `remote`, `port`, + `bidir`, `downstream`, `udp`, `bytes`, and `time`). + + Returns: + dict: The parsed JSON output from iperf3 if the command succeeds. + + Exits: + Exits with code 3 (UNKNOWN) if iperf3 is not found or if the command fails. + """ + try: + params = make_iperf_cmdline(options) + raw_data = subprocess.check_output( + params, + stderr=subprocess.STDOUT, + universal_newlines=True + ) + return json.loads(raw_data) + except OSError as e: + if e.errno == 2: + nagexit(3, "cannot find iperf3") + except subprocess.CalledProcessError as e: + nagexit(3, e.output) + nagexit(3, "what happended?") + + +def determine_result(options, json_data) -> Tuple[int, Optional[List[str]]]: + """ + Evaluates iperf3 test results against performance thresholds and determines the Nagios status. + + This function checks the iperf3 output (`json_data`) for errors and compares metrics like + transfer rate and retransmissions against critical and warning thresholds provided in `options`. + + If an error is found in the iperf3 output, the function exits with a Nagios UNKNOWN state. + + Args: + options: An object with threshold attributes used for evaluation. + Expected attributes include: + - rate_warn (int): Warning threshold for bits per second. + - rate_crit (int): Critical threshold for bits per second. + - retrans_warn (int): Warning threshold for retransmissions (TCP only). + - retrans_crit (int): Critical threshold for retransmissions (TCP only). + - udp (bool): Whether the test was run over UDP (retransmissions are ignored if True). + json_data (dict): Parsed JSON output from an iperf3 test. + + Returns: + Tuple[int, Optional[List[str]]]: + - An integer exit code for Nagios (0 = OK, 1 = WARNING, 2 = CRITICAL). + - A list of status messages if thresholds were breached, or an empty list if all checks + passed. + + Exits: + Exits with code 3 (UNKNOWN) if an "error" key is present in the iperf3 output. + """ + rc = 0 + statuslines = [] + + if "error" in json_data: + nagexit(3, json_data["error"]) + + if options.rate_crit and options.rate_warn: + if json_data["end"]["sum_sent"]["bits_per_second"] <= options.rate_crit: + rc = max(rc, 2) + statuslines.append("transfer rate below critical threshold") + elif json_data["end"]["sum_sent"]["bits_per_second"] <= options.rate_warn: + rc = max(rc, 1) + statuslines.append("transfer rate below warning threshold") + + if not options.udp and options.retrans_crit and options.retrans_warn: + if json_data["end"]["sum_sent"]["retransmits"] >= options.retrans_crit: + rc = max(rc, 2) + statuslines.append("retransmissions over critical threshold") + elif json_data["end"]["sum_sent"]["retransmits"] >= options.retrans_warn: + rc = max(rc, 1) + statuslines.append("retransmissions over warning threshold") + + return rc, statuslines + + +def build_single_perfdata(key: str, value, limit_warn=None, limit_crit=None) -> str: + """ + Builds a single Nagios performance data string for a metric. + + This function formats a key-value pair into a Nagios-compatible performance data + string, optionally including warning and critical thresholds. + + Args: + key (str): The name of the metric (e.g., 'throughput'). + value: The current value of the metric. Typically numeric. + limit_warn (optional): The warning threshold for the metric. Defaults to None. + limit_crit (optional): The critical threshold for the metric. Defaults to None. + + Returns: + str: A formatted performance data string suitable for use in Nagios plugins, + e.g., "'throughput'=500;400;600". + """ + if limit_warn is None and limit_crit is None: + return f"'{key}'={value}" + return f"'{key}'={value};{limit_warn or ''};{limit_crit or ''}" + + +def bits_per_second(bps: float) -> str: + """ + Formats a bits-per-second value as a string with a 'b' suffix. + + If the value is greater than 1000, it is rounded to the nearest whole number + before formatting. + + Args: + bps (float): The bits-per-second value to format. + + Returns: + str: The formatted string with a 'b' suffix, e.g., "950.1234b" or "1200b". + """ + if bps > 1000: + bps = round(bps) + return f"{bps}b" + + +def packets(pkts: int) -> str: + """ + Formats a packet count as a string with the 'packets' suffix. + + Args: + pkts (int): The number of packets. + + Returns: + str: A string representing the packet count, e.g., "123packets". + """ + return f"{pkts}packets" + + +def percent(prcnt: float) -> str: + """ + Formats a float value as a percentage string. + + Args: + prcnt (float): The percentage value to format. + + Returns: + str: The formatted percentage string with a '%' suffix, e.g., "99.5%". + """ + return f"{prcnt}%" + + +def build_perfdata(options, json_data: dict) -> List[str]: + """ + Constructs a list of Nagios performance data strings based on iperf3 test results. + + This function extracts key metrics from the iperf3 JSON output, including per-interval + throughput, average throughput (upstream and/or downstream), retransmissions (for TCP), + and CPU utilization. It uses `build_single_perfdata()` to format each data point for + inclusion in a Nagios plugin's performance data output. + + Args: + options: An object containing test configuration and threshold values. Expected attributes: + - bidir (bool): Whether the test was bidirectional. + - downstream (bool): Whether to use reverse (download) direction. + - udp (bool): Whether the test used UDP (retransmissions ignored). + - rate_warn (int): Warning threshold for throughput (in bits/sec). + - rate_crit (int): Critical threshold for throughput. + - retrans_warn (int): Warning threshold for retransmissions (TCP only). + - retrans_crit (int): Critical threshold for retransmissions. + json_data (dict): Parsed iperf3 output containing measurement results. + + Returns: + List[str]: A list of performance data strings formatted for Nagios output. + """ + perfdata = [ + build_single_perfdata( + f"bps{i+1}", bits_per_second(intv['sum']['bits_per_second'])) + for i, intv in enumerate(json_data["intervals"]) + ] + + json_end = json_data['end'] + + bps_avg_up = json_end['sum_sent']['bits_per_second'] if options.bidir or not options.downstream else None + bps_avg_down = json_end['sum_sent_bidir_reverse']['bits_per_second'] if options.bidir else json_data[ + 'end']['sum_sent']['bits_per_second'] if options.downstream else None + if not options.udp: + retrans_sum_up = json_end['sum_sent']['retransmits'] if options.bidir or not options.downstream else None + retrans_sum_down = json_end['sum_sent_bidir_reverse']['retransmits'] if options.bidir else json_data[ + 'end']['sum_sent']['retransmits'] if options.downstream else None + + if bps_avg_up is not None: + perfdata.append(build_single_perfdata("bps_avg_up", bits_per_second( + bps_avg_up), options.rate_warn, options.rate_crit)) + if bps_avg_down is not None: + perfdata.append(build_single_perfdata("bps_avg_down", bits_per_second( + bps_avg_down), options.rate_warn, options.rate_crit)) + + if not options.udp: + if retrans_sum_up is not None: + perfdata.append(build_single_perfdata("retrans_sum_up", packets( + retrans_sum_up), options.retrans_warn, options.retrans_crit)) + if retrans_sum_down is not None: + perfdata.append(build_single_perfdata("retrans_sum_down", packets( + retrans_sum_down), options.retrans_warn, options.retrans_crit)) + + perfdata.append(build_single_perfdata("local_cpu", percent( + json_end['cpu_utilization_percent']['host_total']))) + perfdata.append(build_single_perfdata("remote_cpu", percent( + json_end['cpu_utilization_percent']['remote_total']))) + return perfdata + +def check_iperf3(options): + """ + Executes an iperf3 test, evaluates the results, and outputs Nagios status and performance data. + + This function orchestrates the process of running an iperf3 test with the specified options, + parsing the JSON output, and evaluating the results against user-defined thresholds. Based on + the evaluation, it outputs a Nagios status message (OK, WARNING, CRITICAL, or UNKNOWN) and + performance data. It then exits the program with the appropriate exit code. + + Args: + options: An object containing configuration options for the iperf3 test. Expected attributes + include: + - remote (str): The target iperf3 server's hostname or IP address. + - port (int): The port number to use for the test. + - bidir (bool): Whether the test should be bidirectional. + - downstream (bool): Whether the test should use reverse mode (server to client). + - udp (bool): Whether the test should use UDP instead of TCP. + - rate_warn (int): Warning threshold for throughput (in bits/sec). + - rate_crit (int): Critical threshold for throughput. + - retrans_warn (int): Warning threshold for retransmissions (TCP only). + - retrans_crit (int): Critical threshold for retransmissions (TCP only). + - bytes (str): Optional. Data to transfer (e.g., '10M'). + - time (int): Duration of the test in seconds. + + Exits: + Exits the program with the appropriate Nagios status code (0 for OK, 1 for WARNING, + 2 for CRITICAL, 3 for UNKNOWN) and prints the relevant status message and performance data. + """ + json_data = run_iperf3(options) + rc, statuslines = determine_result(options, json_data) + perfdata = build_perfdata(options, json_data) + nagexit(rc, statuslines, perfdata) + + +if __name__ == "__main__": + DESC = "%prog is used to run an iperf3 check against a given host." + parser = OptionParser(description=DESC, version="%prog version 0.2") + + gen_opts = OptionGroup(parser, "Generic options") + thres_opts = OptionGroup(parser, "Threshold options") + parser.add_option_group(gen_opts) + parser.add_option_group(thres_opts) + + # transfer rate + thres_opts.add_option("-w", "--rate-warning", dest="rate_warn", + type="int", metavar="BITS", action="store", + help="Defines the transfer rate's warning threshold") + thres_opts.add_option("-c", "--rate-critical", dest="rate_crit", + type="int", metavar="BITS", action="store", + help="Defines the transfer rate's critical threshold") + + # retransmits + thres_opts.add_option("-W", "--retransmit-warning", dest="retrans_warn", + type="int", metavar="RETRANS", action="store", + help="Defines the retransmission warning threshold") + thres_opts.add_option("-C", "--retransmit-critical", dest="retrans_crit", + type="int", metavar="RETRANS", action="store", + help="Defines the retransmission critical threshold") + + # -r / --remote + gen_opts.add_option("-r", "--remote", dest="remote", + type="string", action="store", + help="iperf3 server to connect to") + + # -p / --port + gen_opts.add_option("-p", "--port", dest="port", + type="int", action="store", default=5201, + help="iperf3 server port to connect to [default: %default]") + + # -d / --downstream + gen_opts.add_option("-d", "--downstream", dest="downstream", + action="store_true", default=False, + help="measure downstream instead of upstream") + + # --bidir + gen_opts.add_option("--bidir", dest="bidir", + action="store_true", default=False, + help="test in both directions (normal and reverse), with both the client and server sending and receiving data simultaneously") + + # -u / --udp + gen_opts.add_option("-u", "--udp", dest="udp", + action="store_true", default=False, + help="use UDP rather than TCP") + + # -t / --time + gen_opts.add_option("-t", "--time", dest="time", + type="int", action="store", default=10, + help="time in seconds to transmit for [default: %default]") + + # -n / --bytes + gen_opts.add_option("-n", "--bytes", dest="bytes", + type="string", action="store", + help="number of bytes to transmit (instead of --time)") + + (opts, args) = parser.parse_args() + if not opts.remote or opts.time <= 0: + parser.print_help() + sys.exit(3) + + check_iperf3(opts) +