#!/usr/bin/env python3
# -*- coding: utf-8; py-indent-offset: 4 -*-
#
# Author:  Linuxfabrik GmbH, Zurich, Switzerland
# Contact: info (at) linuxfabrik (dot) ch
#          https://www.linuxfabrik.ch/
# License: The Unlicense, see LICENSE file.

# https://github.com/Linuxfabrik/monitoring-plugins/blob/main/CONTRIBUTING.md

"""See the check's README for more details."""

import argparse
import json
import os
import sys

import lib.args
import lib.base
import lib.lftest
import lib.redfish
import lib.time
import lib.txt
import lib.url
from lib.globals import STATE_CRIT, STATE_OK, STATE_UNKNOWN, STATE_WARN

__author__ = 'Linuxfabrik GmbH, Zurich/Switzerland'
__version__ = '2026060705'

DESCRIPTION = """Checks the event log entries exposed under the LogServices of a Redfish-compatible
server via the Redfish API and alerts based on the severity of the log entries. By default it reads
the System Event Log (SEL); `--log-type` selects the management controller log (MEL) or both.
Entries can be filtered by regular expression (--match, --ignore), and entries older than --max-age
days can be aged out so a long-since resolved event does not keep the check in a non-OK state
forever."""

API_BASE = '/redfish/v1'
DEFAULT_CACHE_EXPIRE = 15  # minutes; keep below the controller's session timeout
DEFAULT_INSECURE = True
DEFAULT_LOG_TYPE = 'sel'
DEFAULT_MAX_AGE = 0  # days; 0 disables aging
DEFAULT_NO_PROXY = False
DEFAULT_RETRIES = 3  # extra attempts on a failed Redfish request
DEFAULT_TIMEOUT = 8
DEFAULT_URL = 'https://localhost:5000'

# Per-vendor LogService paths. The SEL (System Event Log) paths are exercised against the
# fixtures/mockup; the MEL (management controller event log) paths are ported 1:1 from
# bb-Ricardo/check_redfish and still need real-hardware verification, which is why --log-type
# defaults to "sel". An empty string means "no known path for this vendor/log".
LOG_PATHS = {
    'ami': {'sel': '/LogServices/BIOS/Entries', 'mel': '/LogServices/EventLog/Entries'},
    'avigilon': {'sel': '/LogServices', 'mel': '/LogServices/Lclog/Entries'},
    'cisco': {'sel': '/LogServices/SEL/Entries', 'mel': '/LogServices/CIMC/Entries'},
    'dell': {'sel': '/LogServices/Sel/Entries', 'mel': '/LogServices/Lclog/Entries'},
    'generic': {'sel': '/LogServices/Log/Entries', 'mel': ''},
    'hp': {'sel': '/LogServices/IML/Entries', 'mel': '/LogServices/IEL/Entries'},
    'hpe': {'sel': '/LogServices/IML/Entries', 'mel': '/LogServices/IEL/Entries'},
    'lenovo': {
        'sel': '/LogServices/ActiveLog/Entries',
        'mel': '/LogServices/StandardLog/Entries',
    },
    'supermicro': {'sel': '/LogServices/Log1/Entries', 'mel': ''},
    'ts_fujitsu': {
        'sel': '/LogServices/SystemEventLog/Entries',
        'mel': '/LogServices/InternalEventLog/Entries',
    },
}


def parse_args():
    """Parse command line arguments using argparse."""
    parser = argparse.ArgumentParser(description=DESCRIPTION)

    parser.add_argument(
        '-V',
        '--version',
        action='version',
        version=f'%(prog)s: v{__version__} by {__author__}',
    )

    parser.add_argument(
        '--always-ok',
        help=lib.args.help('--always-ok'),
        dest='ALWAYS_OK',
        action='store_true',
        default=False,
    )

    parser.add_argument(
        '--cache-expire',
        help=lib.args.help('--cache-expire') + ' Default: %(default)s',
        dest='CACHE_EXPIRE',
        type=int,
        default=DEFAULT_CACHE_EXPIRE,
    )

    parser.add_argument(
        '--ignore',
        help='Ignore SEL entries whose message matches this Python regular expression. '
        'Case-sensitive by default; use `(?i)` for case-insensitive matching. '
        'Can be specified multiple times. '
        'Example: `--ignore="Log area reset/cleared"`.',
        dest='IGNORE',
        action='append',
        default=None,
    )

    parser.add_argument(
        '--insecure',
        help=lib.args.help('--insecure'),
        dest='INSECURE',
        action='store_true',
        default=DEFAULT_INSECURE,
    )

    parser.add_argument(
        '--log-type',
        help='Which log to read: `sel` (System Event Log, default), `mel` (management controller '
        'event log) or `both`. '
        'Default: %(default)s',
        dest='LOG_TYPE',
        choices=['sel', 'mel', 'both'],
        default=DEFAULT_LOG_TYPE,
    )

    parser.add_argument(
        '--match',
        help='Only consider SEL entries whose message matches this Python regular expression. '
        'Case-sensitive by default; use `(?i)` for case-insensitive matching. '
        'Can be specified multiple times. '
        'Example: `--match="(?i)temperature"`.',
        dest='MATCH',
        action='append',
        default=None,
    )

    parser.add_argument(
        '--max-age',
        help='Age out SEL entries older than this many days: they are no longer alerted on, only '
        'counted in the summary. A controller keeps an entry until the log is cleared, so a '
        'long-since resolved event would otherwise keep the check in a non-OK state forever. '
        'Default: %(default)s (0 disables aging).',
        dest='MAX_AGE',
        type=int,
        default=DEFAULT_MAX_AGE,
    )

    parser.add_argument(
        '--no-proxy',
        help=lib.args.help('--no-proxy'),
        dest='NO_PROXY',
        action='store_true',
        default=DEFAULT_NO_PROXY,
    )

    parser.add_argument(
        '--password',
        help='Redfish API password.',
        dest='PASSWORD',
    )

    parser.add_argument(
        '--retries',
        help='Number of extra attempts if a request to the Redfish API fails, before the '
        'check gives up. Helps against an occasionally slow or flaky management controller. '
        'Default: %(default)s',
        dest='RETRIES',
        type=int,
        default=DEFAULT_RETRIES,
    )

    parser.add_argument(
        '--test',
        help=lib.args.help('--test'),
        dest='TEST',
        type=lib.args.csv,
    )

    parser.add_argument(
        '--timeout',
        help=lib.args.help('--timeout') + ' Default: %(default)s (seconds)',
        dest='TIMEOUT',
        type=int,
        default=DEFAULT_TIMEOUT,
    )

    parser.add_argument(
        '--url',
        help='Redfish API URL. Default: %(default)s',
        dest='URL',
        default=DEFAULT_URL,
    )

    parser.add_argument(
        '--username',
        help='Redfish API username.',
        dest='USERNAME',
    )

    args, _ = parser.parse_known_args()
    return args


def load_test_fixture(test_args, path):
    # Replace the first element of args.TEST with the walk-specific
    # fixture path, read it via lib.lftest.test() and return the parsed
    # JSON. On a missing file or malformed JSON, exit STATE_UNKNOWN with
    # a helpful message instead of letting json.loads raise a traceback.
    if not os.path.isfile(path):
        lib.base.cu(f'Test fixture not found: "{path}".')
    test_args[0] = path
    stdout, _, _ = lib.lftest.test(test_args)
    try:
        return json.loads(stdout)
    except (json.JSONDecodeError, ValueError) as e:
        lib.base.cu(f'Test fixture "{path}" does not contain valid JSON: {e}')


def main():
    """The main function. This is where the magic happens."""

    # parse the command line
    try:
        args = parse_args()
    except SystemExit:
        sys.exit(STATE_UNKNOWN)

    # set default values for append parameters that were not specified
    if args.IGNORE is None:
        args.IGNORE = []
    if args.MATCH is None:
        args.MATCH = []

    # compile the filter regexes and work out the age cutoff once
    ignore_patterns = [
        lib.base.coe(p) for p in lib.txt.compile_regex(args.IGNORE, '--ignore')
    ]
    match_patterns = [
        lib.base.coe(p) for p in lib.txt.compile_regex(args.MATCH, '--match')
    ]
    cutoff_epoch = int(lib.time.now()) - args.MAX_AGE * 86400 if args.MAX_AGE else 0

    # fetch data
    if args.TEST is None:
        if not args.URL.startswith('http'):
            lib.base.cu('--url parameter has to start with "http://" or https://".')
        header = {'Accept': 'application/json'}
        # reuse a cached Redfish session token across requests and runs, so we
        # do not create (and have the controller log) a new session each time
        header.update(lib.redfish.get_auth_header(args))
        # service root: figure out the vendor to pick the entry point
        result = lib.base.coe(
            lib.url.fetch_json(
                f'{args.URL}{API_BASE}/',
                header=header,
                insecure=args.INSECURE,
                no_proxy=args.NO_PROXY,
                timeout=args.TIMEOUT,
                retries=args.RETRIES,
            )
        )
        vendor = lib.redfish.get_vendor(result)
        entry_point = 'Systems' if vendor == 'supermicro' else 'Managers'
        # Entry point: the Managers (or Systems for Supermicro) collection
        result = lib.base.coe(
            lib.url.fetch_json(
                f'{args.URL}{API_BASE}/{entry_point}',
                header=header,
                insecure=args.INSECURE,
                no_proxy=args.NO_PROXY,
                timeout=args.TIMEOUT,
                retries=args.RETRIES,
            )
        )
    else:
        # do not call the API, put in test data. Each API call in the
        # Redfish walk has an explicit fixture suffix, so the fixture
        # file names describe what they contain (root, managers, sel)
        # instead of being a string-appended chain.
        test_base = args.TEST[0]
        result = load_test_fixture(args.TEST, f'{test_base}-root')
        vendor = lib.redfish.get_vendor(result)
        result = load_test_fixture(args.TEST, f'{test_base}-managers')

    # "Members": [
    #     {
    #         "@odata.id": "/redfish/v1/Managers/BMC"
    #     }
    # ],
    if len(result.get('Members', [])) == 0:
        lib.base.cu('Nothing to check, no Redfish members found.')

    # init some vars
    msg = ''
    state = STATE_OK
    perfdata = ''
    member_count = 0
    aged_out_total = 0

    # resolve which log path(s) to read for this vendor and --log-type
    paths = LOG_PATHS.get(vendor, {'sel': '', 'mel': ''})
    wanted = ['sel', 'mel'] if args.LOG_TYPE == 'both' else [args.LOG_TYPE]
    logs = [(label, paths.get(label, '')) for label in wanted if paths.get(label)]
    log_label = {'sel': 'SEL', 'mel': 'MEL', 'both': 'SEL and MEL'}[args.LOG_TYPE]

    # analyze data: follow each "Member" link and aggregate the selected log
    # entries' severity into `state`.
    for member in result.get('Members', []):
        if not logs:
            continue
        member_count += 1
        for label, path in logs:
            if args.TEST is None:
                # a missing or unknown log path must not abort the check
                success, entries = lib.url.fetch_json(
                    f'{args.URL}{member["@odata.id"]}{path}',
                    header=header,
                    insecure=args.INSECURE,
                    no_proxy=args.NO_PROXY,
                    timeout=args.TIMEOUT,
                    retries=args.RETRIES,
                )
                if not success or not isinstance(entries, dict):
                    entries = {}
            else:
                entries = load_test_fixture(args.TEST, f'{test_base}-{label}')
            member_msg, member_state, member_aged = (
                lib.redfish.get_manager_logservices_sel_entries(
                    entries,
                    match=match_patterns,
                    ignore=ignore_patterns,
                    cutoff_epoch=cutoff_epoch,
                )
            )
            aged_out_total += member_aged
            if member_msg:
                # build the message
                msg += f'{member["@odata.id"]} ({label.upper()})\n{member_msg}\n\n'
                state = lib.base.get_worst(state, member_state)

    members = lib.txt.pluralize('member', member_count)
    aged = f' {aged_out_total} aged out.' if aged_out_total else ''
    if state == STATE_CRIT:
        msg = (
            f'Checked {log_label} on {member_count} {members}.'
            f' There are critical errors.{aged}\n\n'
        ) + msg
    elif state == STATE_WARN:
        msg = (
            f'Checked {log_label} on {member_count} {members}. There are warnings.{aged}\n\n'
        ) + msg
    else:
        msg = (
            f'Everything is ok, checked {log_label} on {member_count} {members}.{aged}\n\n'
        ) + msg

    # over and out
    lib.base.oao(msg, state, perfdata, always_ok=args.ALWAYS_OK)


if __name__ == '__main__':
    try:
        main()
    except Exception:
        lib.base.cu()
