#!/usr/bin/env python3
# -*- coding: utf-8; py-indent-offset: 4 -*-
#
# Author:  Linuxfabrik GmbH, Zurich, Switzerland
# Contact: info (at) linuxfabrik (dot) ch
#          https://www.linuxfabrik.ch/
# License: The Unlicense, see LICENSE file.

# https://github.com/Linuxfabrik/monitoring-plugins/blob/main/CONTRIBUTING.md

"""See the check's README for more details."""

import argparse
import sys

import lib.args
import lib.base
import lib.lftest
import lib.shell
import lib.txt
from lib.globals import STATE_CRIT, STATE_OK, STATE_UNKNOWN, STATE_WARN

__author__ = 'Linuxfabrik GmbH, Zurich/Switzerland'
__version__ = '2026061201'

DESCRIPTION = """Checks IPMI sensor readings (temperature, voltage, fan speed, power, etc.) using
ipmitool. Alerts when any sensor reports a non-ok status. Provides detailed output
including current values, thresholds, and sensor states.
Requires root or sudo."""

COL_SENSOR = 0
COL_VALUE = 1
COL_UOM = 2  # Unit Of Measurement
COL_STATE = 3
COL_LOWERNR = 4  # NR = non-recoverable
COL_LOWERCT = 5  # CT = critical
COL_LOWERNC = 6  # NC = non-critical
COL_UPPERNC = 7  # NC = non-critical
COL_UPPERCT = 8  # CT = critical
COL_UPPERNR = 9  # NR = non-recoverable


def parse_args():
    """Parse command line arguments using argparse."""
    parser = argparse.ArgumentParser(description=DESCRIPTION)

    parser.add_argument(
        '-V',
        '--version',
        action='version',
        version=f'%(prog)s: v{__version__} by {__author__}',
    )

    parser.add_argument(
        '--authtype',
        help='Authentication type for IPMIv1.5 lan session activation. '
        'Supported types are NONE, PASSWORD, MD2, MD5, or OEM. '
        'Default: %(default)s',
        dest='V15AUTHTYPE',
        choices=['NONE', 'PASSWORD', 'MD2', 'MD5', 'OEM'],
        default='NONE',
    )

    parser.add_argument(
        '-H',
        '--hostname',
        help='Remote server address, can be a hostname or IP address. '
        'Required for lan and lanplus interfaces.',
        dest='HOSTNAME',
        default=None,
    )

    parser.add_argument(
        '--interface',
        help='IPMI interface to use. '
        'Supported types are "lan" (IPMI v1.5) or "lanplus" (IPMI v2.0). '
        'Default: %(default)s',
        dest='INTERFACE',
        choices=['lan', 'lanplus'],
        default='lan',
    )

    parser.add_argument(
        '--password',
        help='Remote server password.',
        dest='PASSWORD',
    )

    parser.add_argument(
        '--port',
        help='Remote server UDP port to connect to. Default: %(default)s',
        dest='PORT',
        default=623,
    )

    parser.add_argument(
        '--privlevel',
        help='Force session privilege level. '
        'Can be CALLBACK, USER, OPERATOR, ADMINISTRATOR. '
        'Default: %(default)s',
        dest='PRIVLEVEL',
        choices=['CALLBACK', 'USER', 'OPERATOR', 'ADMINISTRATOR'],
        default='USER',
    )

    parser.add_argument(
        '--test',
        help=lib.args.help('--test'),
        dest='TEST',
        type=lib.args.csv,
    )

    parser.add_argument(
        '--username',
        help='Remote server username. Default: %(default)s',
        dest='USERNAME',
        default='NULL',
    )

    args, _ = parser.parse_known_args()
    return args


def shorten_uom(uom):
    if uom == 'degrees C':
        return 'C'
    if uom == 'degrees F':
        return 'F'
    if uom == 'Volts':
        return 'V'
    if uom == 'Watts':
        return 'W'
    return uom


# Map an IPMI unit of measurement to its canonical sensor type plus the (lower-cased) name words
# that already imply that type. The canonical type is appended as a suffix so a dashboard can group
# readings by a stable regex (e.g. /Temperature$/), matching the Redfish sensor wording.
SENSOR_TYPES = {
    'Amps': ('Current', ('amp', 'amps', 'current')),
    'RPM': ('Fan', ('fan',)),
    'Volts': ('Voltage', ('volt', 'voltage', 'volts')),
    'Watts': ('Power', ('power', 'pwr')),
    'degrees C': ('Temperature', ('temp', 'temperature')),
    'degrees F': ('Temperature', ('temp', 'temperature')),
}


def metric_name(sensor, uom):
    """Build the perfdata metric name for an IPMI sensor.

    ipmitool sensor names are vendor-specific and do not reliably encode the sensor type, while the
    unit column is reliable. The canonical type (derived from the unit) is appended as a suffix so a
    Grafana dashboard can group readings by a stable regex (e.g. /Temperature$/), matching the
    redfish-sensors wording. Any type word the vendor already put in the name is dropped first, so
    'CPU Temp' becomes 'CPU_Temperature' instead of 'CPU_Temp_Temperature'. Units without a known
    mapping keep the bare sensor name.
    """
    entry = SENSOR_TYPES.get(uom)
    if entry is None:
        return sensor.replace(' ', '_')
    suffix, redundant = entry
    parts = [word for word in sensor.split() if word.lower() not in redundant]
    if not parts:
        # the name consisted only of the type word; keep it rather than emitting a bare suffix
        parts = sensor.split()
    return '_'.join([*parts, suffix])


def main():
    """The main function. This is where the magic happens."""

    # parse the command line
    try:
        args = parse_args()
    except SystemExit:
        sys.exit(STATE_UNKNOWN)

    # fetch data
    if args.TEST is None:
        # build parameters for ipmitool
        cmd = ['ipmitool', 'sensor', 'list']
        if args.HOSTNAME:
            # use ipmi with remote parameters
            cmd += ['-A', args.V15AUTHTYPE] if args.INTERFACE == 'lan' else []
            cmd += ['-H', args.HOSTNAME] if args.HOSTNAME else []
            cmd += ['-I', args.INTERFACE] if args.INTERFACE else []
            cmd += ['-L', args.PRIVLEVEL] if args.PRIVLEVEL else []
            cmd += ['-p', str(args.PORT)] if args.PORT else []
            cmd += ['-P', args.PASSWORD] if args.PASSWORD else []
            cmd += ['-U', args.USERNAME] if args.USERNAME else []

        # execute the shell command and return its result and exit code
        stdout, stderr, retc = lib.base.coe(lib.shell.shell_exec(cmd))
    else:
        # do not call the command, put in test data
        stdout, stderr, retc = lib.lftest.test(args.TEST)

    if stderr or retc != 0:
        lib.base.cu(stderr)

    # init some vars
    msg = ''
    perfdata = ''
    state = STATE_OK
    sensor_counter = 0

    for sensor in stdout.splitlines():
        # split by '|'' and trim each column
        cols = [col.strip() for col in sensor.split('|')]

        if cols[COL_UOM] == 'discrete':
            # for now we support only 'Threshold' sensors - maybe enhance this in future
            # ['Chassis Intru', '0x0', 'discrete', '0x0000', 'na', 'na', 'na', 'na', 'na', 'na']
            continue

        # na - Not Available, ns - Not Specified
        if cols[COL_STATE] == 'na' or cols[COL_STATE] == 'ns':
            continue

        perfuom = None
        perfwarn = None if cols[COL_UPPERNC] == 'na' else cols[COL_UPPERNC]
        perfcrit = None if cols[COL_UPPERCT] == 'na' else cols[COL_UPPERCT]
        perfmin = None if cols[COL_LOWERNR] == 'na' else cols[COL_LOWERNR]
        perfmin = 0 if perfmin and float(perfmin) > 0 else perfmin
        perfmax = (
            100
            if cols[COL_UOM] == 'percent'
            else None
            if cols[COL_UPPERNR] == 'na'
            else cols[COL_UPPERNR]
        )
        perfdata += lib.base.get_perfdata(
            metric_name(cols[COL_SENSOR], cols[COL_UOM]),
            cols[COL_VALUE],
            uom=perfuom,
            warn=perfwarn,
            crit=perfcrit,
            _min=perfmin,
            _max=perfmax,
        )

        sensor_counter += 1

        # ok
        if cols[COL_STATE] == 'ok':
            continue

        sensor_state = STATE_WARN

        # nr - Non Recoverable
        if cols[COL_STATE] == 'nr':
            # build the message
            msg += f'\n* {cols[COL_SENSOR]} ({cols[COL_VALUE]} {shorten_uom(cols[COL_UOM])}) is NON-RECOVERABLE. Hardware might be DAMAGED.'
            sensor_state = STATE_CRIT

        # cr - Critical
        if cols[COL_STATE] == 'cr':
            msg += f'\n* {cols[COL_SENSOR]} ({cols[COL_VALUE]} {shorten_uom(cols[COL_UOM])}) is above/below a critical threshold.'
            sensor_state = STATE_CRIT

        # nc - Non Critical
        if cols[COL_STATE] == 'nc':
            msg += f'\n* {cols[COL_SENSOR]} ({cols[COL_VALUE]} {shorten_uom(cols[COL_UOM])}) is above/below a non-critical threshold.'
            sensor_state = STATE_WARN

        state = lib.base.get_worst(sensor_state, state)

    if state == STATE_CRIT:
        msg = (
            f'Checked {sensor_counter} {lib.txt.pluralize("sensor", sensor_counter)}. There are critical errors.'
            + msg
        )
    elif state == STATE_WARN:
        msg = (
            f'Checked {sensor_counter} {lib.txt.pluralize("sensor", sensor_counter)}. There are warnings.'
            + msg
        )
    else:
        msg = (
            f'Everything is ok, checked {sensor_counter} {lib.txt.pluralize("sensor", sensor_counter)}.'
            + msg
        )

    # over and out
    lib.base.oao(msg, state, perfdata)


if __name__ == '__main__':
    try:
        main()
    except Exception:
        lib.base.cu()
