#!/usr/bin/env python3
# -*- coding: utf-8; py-indent-offset: 4 -*-
#
# Author:  Linuxfabrik GmbH, Zurich, Switzerland
# Contact: info (at) linuxfabrik (dot) ch
#          https://www.linuxfabrik.ch/
# License: The Unlicense, see LICENSE file.

# https://github.com/Linuxfabrik/monitoring-plugins/blob/main/CONTRIBUTING.md

"""See the check's README for more details.
"""

import argparse  # pylint: disable=C0413
import sys  # pylint: disable=C0413

import lib.args  # pylint: disable=C0413
import lib.base  # pylint: disable=C0413
import lib.shell  # pylint: disable=C0413
import lib.lftest  # pylint: disable=C0413
import lib.txt  # pylint: disable=C0413
from lib.globals import (STATE_CRIT, STATE_OK,  # pylint: disable=C0413
                          STATE_UNKNOWN, STATE_WARN)

__author__ = 'Linuxfabrik GmbH, Zurich/Switzerland'
__version__ = '2025062601'

DESCRIPTION = """Checks dmesg for emerg, alert, crit and err messages. Executes `dmesg
                --level=emerg,alert,crit,err --ctime `. If you fixed the issues (or just want to
                clear them), use `dmesg --clear` to clear the Kernel Ring Buffer Messages."""

DEFAULT_SEVERITY = 'crit'

cmd = 'dmesg --level=emerg,alert,crit,err --ctime'

# ignore false positives / bugs / messages without impact on anything:
DEFAULT_IGNORE = [
    ' Asking for cache data failed',
    ' Assuming drive cache: write through',
    ' brcmfmac: brcmf_c_preinit_dcmds: Firmware: BCM4345/6',                                # Raspbian
    ' brcmfmac: brcmf_fw_alloc_request: using brcm/brcmfmac43455-sdio for chip BCM4345/6',  # Raspbian
    ' CIFS VFS: Free previous auth_key.response = ',
    ' cpufreq: __cpufreq_add_dev: ->get() failed',
    ' EFI MOKvar config table is not in EFI runtime memory',    # https://rockylinux.org/news/rocky-linux-8-5-ga-release/
    ' ERST: Failed to get Error Log Address Range.',
    ' flip_done timed out',                                     # https://access.redhat.com/solutions/4490391
    ' i8042: No controller found',
    ' Ignoring unsafe software power cap!',
    ' integrity: Problem loading X.509 certificate -126',       # https://access.redhat.com/solutions/7049158
    ' ioctl error in smb2_get_dfs_refer rc=-5',                 # https://access.redhat.com/solutions/3496971
    ' kvm_set_msr_common: MSR_IA32_DEBUGCTLMSR ',               # is mostly caused by Windows-VMs on KVM/oVirt
    ' mokvar: EFI MOKvar config table is not in EFI runtime memory', # https://rockylinux.org/news/rocky-linux-8-5-ga-release/
    ' No Caching mode page found',
    ' SMBus base address uninitialized - upgrade BIOS or use ', # https://access.redhat.com/solutions/2115401
    ' SMBus Host Controller not enabled!',
    ' tsc: Fast TSC calibration failed',
    ' unhandled rdmsr: ',                                       # https://access.redhat.com/solutions/59299
    ' unhandled wrmsr: ',                                       # https://bugzilla.redhat.com/show_bug.cgi?id=874627
    ' vcpu0 disabled perfctr wrmsr',                            # https://access.redhat.com/solutions/2188061
    ' Warning: Deprecated Driver is detected', # can be ignored during daily runtime - driver is still supported/running, and it is just a warning
    ' Warning: Unmaintained driver is detected', # can be ignored during daily runtime - driver is still supported/running, and it is just a warning
]


def parse_args():
    """Parse command line arguments using argparse.
    """
    parser = argparse.ArgumentParser(description=DESCRIPTION)

    parser.add_argument(
        '-V', '--version',
        action='version',
        version='{0}: v{1} by {2}'.format('%(prog)s', __version__, __author__)
    )

    parser.add_argument(
        '--always-ok',
        help='Always returns OK.',
        dest='ALWAYS_OK',
        action='store_true',
        default=False,
    )

    parser.add_argument(
        '--ignore',
        help='Ignore a kernel message (case-sensitive, repeating). Default: %(default)s',
        dest='IGNORE',
        default=DEFAULT_IGNORE,
        action='append',
    )

    parser.add_argument(
        '--severity',
        help='Severity for alerting. Default: %(default)s',
        dest='SEVERITY',
        default=DEFAULT_SEVERITY,
        choices=['warn', 'crit'],
    )

    parser.add_argument(
        '--test',
        help='For unit tests. Needs "path-to-stdout-file,path-to-stderr-file,expected-retc".',
        dest='TEST',
        type=lib.args.csv,
    )

    return parser.parse_args()


def main():
    """The main function. Hier spielt die Musik.
    """

    # parse the command line, exit with UNKNOWN if it fails
    try:
        args = parse_args()
    except SystemExit:
        sys.exit(STATE_UNKNOWN)

    # fetch data
    if args.TEST is None:
        # execute the shell command and return its result and exit code
        stdout, stderr, retc = lib.base.coe(lib.shell.shell_exec(cmd))
        if (stderr or retc != 0):
            lib.base.cu(stderr)
    else:
        # do not call the command, put in test data
        stdout, stderr, retc = lib.lftest.test(args.TEST)

    # analyze data
    result = stdout.strip()
    result = lib.txt.filter_mltext(result, args.IGNORE).strip()

    # build the message
    if len(result) > 0:
        cnt = result.count('\n') + 1
        if cnt > 10:
            # shorten the message
            result = result.split('\n')
            result = result[0:5] + ['...'] + result[-5:]
            result = '\n'.join(result)
        msg = '{} {} in Kernel Ring Buffer.\n\n{}'.format(cnt, lib.txt.pluralize('error', cnt), result)
        state = STATE_CRIT if args.SEVERITY == 'crit' else STATE_WARN
    else:
        msg = 'Everything is ok.'
        state = STATE_OK

    # over and out
    lib.base.oao(msg, state, always_ok=args.ALWAYS_OK)


if __name__ == '__main__':
    try:
        main()
    except Exception:   # pylint: disable=W0703
        lib.base.cu()
