#!/usr/bin/env python3
# -*- coding: utf-8; py-indent-offset: 4 -*-
#
# Author:  Linuxfabrik GmbH, Zurich, Switzerland
# Contact: info (at) linuxfabrik (dot) ch
#          https://www.linuxfabrik.ch/
# License: The Unlicense, see LICENSE file.

# https://github.com/Linuxfabrik/monitoring-plugins/blob/main/CONTRIBUTING.md

"""See the check's README for more details."""

import argparse
import sys

import lib.args
import lib.base
import lib.lftest
import lib.shell
import lib.txt
from lib.globals import STATE_CRIT, STATE_OK, STATE_UNKNOWN, STATE_WARN

__author__ = 'Linuxfabrik GmbH, Zurich/Switzerland'
__version__ = '2026040801'

DESCRIPTION = """Checks the kernel ring buffer (dmesg) for messages at severity levels emerg, alert,
crit, and err. Known false positives and hardware-specific noise are filtered out by
default. To clear reported messages after resolving the underlying issue, run
"dmesg --clear".
Requires root or sudo."""

DEFAULT_SEVERITY = 'crit'

cmd = 'dmesg --level=emerg,alert,crit,err --ctime'

# ignore false positives / bugs / messages without impact on anything:
DEFAULT_IGNORE = [
    ' Asking for cache data failed',
    ' Assuming drive cache: write through',
    ' brcmfmac: brcmf_c_preinit_dcmds: Firmware: BCM4345/6',
    ' brcmfmac: brcmf_fw_alloc_request: using brcm/brcmfmac43455-sdio'
    ' for chip BCM4345/6',
    ' CIFS VFS: Free previous auth_key.response = ',
    ' cpufreq: __cpufreq_add_dev: ->get() failed',
    ' EFI MOKvar config table is not in EFI runtime memory',
    ' ERST: Failed to get Error Log Address Range.',
    ' flip_done timed out',  # https://access.redhat.com/solutions/4490391
    ' i8042: No controller found',
    ' Ignoring unsafe software power cap!',
    ' integrity: Problem loading X.509 certificate -126',
    ' ioctl error in smb2_get_dfs_refer rc=-5',
    ' kvm_set_msr_common: MSR_IA32_DEBUGCTLMSR ',
    ' mokvar: EFI MOKvar config table is not in EFI runtime memory',
    ' No Caching mode page found',
    ' SMBus base address uninitialized - upgrade BIOS or use ',
    ' SMBus Host Controller not enabled!',
    ' tsc: Fast TSC calibration failed',
    ' unhandled rdmsr: ',  # https://access.redhat.com/solutions/59299
    ' unhandled wrmsr: ',  # https://bugzilla.redhat.com/show_bug.cgi?id=874627
    ' vcpu0 disabled perfctr wrmsr',  # https://access.redhat.com/solutions/2188061
    ' Warning: Deprecated Driver is detected',
    ' Warning: Unmaintained driver is detected',
]


def parse_args():
    """Parse command line arguments using argparse."""
    parser = argparse.ArgumentParser(description=DESCRIPTION)

    parser.add_argument(
        '-V',
        '--version',
        action='version',
        version=f'%(prog)s: v{__version__} by {__author__}',
    )

    parser.add_argument(
        '--always-ok',
        help=lib.args.help('--always-ok'),
        dest='ALWAYS_OK',
        action='store_true',
        default=False,
    )

    parser.add_argument(
        '--ignore',
        help='Ignore a kernel message (case-sensitive, repeating). Default: %(default)s',
        dest='IGNORE',
        default=DEFAULT_IGNORE,
        action='append',
    )

    parser.add_argument(
        '--severity',
        help=lib.args.help('--severity') + ' Default: %(default)s',
        dest='SEVERITY',
        default=DEFAULT_SEVERITY,
        choices=['warn', 'crit'],
    )

    parser.add_argument(
        '--test',
        help=lib.args.help('--test'),
        dest='TEST',
        type=lib.args.csv,
    )

    args, _ = parser.parse_known_args()
    return args


def main():
    """The main function. This is where the magic happens."""

    # parse the command line
    try:
        args = parse_args()
    except SystemExit:
        sys.exit(STATE_UNKNOWN)

    # fetch data
    if args.TEST is None:
        # execute the shell command and return its result and exit code
        stdout, stderr, retc = lib.base.coe(lib.shell.shell_exec(cmd))
        if stderr or retc != 0:
            lib.base.cu(stderr)
    else:
        # do not call the command, put in test data
        stdout, stderr, retc = lib.lftest.test(args.TEST)

    # analyze data
    result = stdout.strip()
    result = lib.txt.filter_mltext(result, args.IGNORE).strip()

    # build the message
    if len(result) > 0:
        cnt = result.count('\n') + 1
        if cnt > 10:
            # shorten the message
            result = result.split('\n')
            result = [*result[0:5], '...', *result[-5:]]
            result = '\n'.join(result)
        msg = f'{cnt} {lib.txt.pluralize("error", cnt)} in Kernel Ring Buffer.\n\n{result}'
        state = STATE_CRIT if args.SEVERITY == 'crit' else STATE_WARN
    else:
        msg = 'Everything is ok.'
        state = STATE_OK

    # over and out
    lib.base.oao(msg, state, always_ok=args.ALWAYS_OK)


if __name__ == '__main__':
    try:
        main()
    except Exception:
        lib.base.cu()
