#!/usr/bin/env python3
# -*- coding: utf-8; py-indent-offset: 4 -*-
#
# Author:  Linuxfabrik GmbH, Zurich, Switzerland
# Contact: info (at) linuxfabrik (dot) ch
#          https://www.linuxfabrik.ch/
# License: The Unlicense, see LICENSE file.

# https://github.com/Linuxfabrik/monitoring-plugins/blob/main/CONTRIBUTING.md

"""See the check's README for more details."""

import argparse
import base64
import json
import os
import sys

import lib.args
import lib.base
import lib.lftest
import lib.redfish
import lib.txt
import lib.url
from lib.globals import STATE_CRIT, STATE_OK, STATE_UNKNOWN, STATE_WARN

__author__ = 'Linuxfabrik GmbH, Zurich/Switzerland'
__version__ = '2026042401'

DESCRIPTION = """Checks the state of all physical drives and their storage controllers in a
Redfish-compatible server via the Redfish API. Alerts when any drive or storage controller
reports a degraded or failed state. System-level health (processors, BIOS, power, temperature,
indicator LED, etc.) is deliberately ignored by this check so that a system warning unrelated
to storage does not mask the drive status; use `redfish-system` for that."""

API_BASE = '/redfish/v1'
DEFAULT_INSECURE = True
DEFAULT_NO_PROXY = False
DEFAULT_TIMEOUT = 8
DEFAULT_URL = 'https://localhost:5000'


def parse_args():
    """Parse command line arguments using argparse."""
    parser = argparse.ArgumentParser(description=DESCRIPTION)

    parser.add_argument(
        '-V',
        '--version',
        action='version',
        version=f'%(prog)s: v{__version__} by {__author__}',
    )

    parser.add_argument(
        '--always-ok',
        help=lib.args.help('--always-ok'),
        dest='ALWAYS_OK',
        action='store_true',
        default=False,
    )

    parser.add_argument(
        '--insecure',
        help=lib.args.help('--insecure'),
        dest='INSECURE',
        action='store_true',
        default=DEFAULT_INSECURE,
    )

    parser.add_argument(
        '--no-proxy',
        help=lib.args.help('--no-proxy'),
        dest='NO_PROXY',
        action='store_true',
        default=DEFAULT_NO_PROXY,
    )

    parser.add_argument(
        '--password',
        help='Redfish API password.',
        dest='PASSWORD',
    )

    parser.add_argument(
        '--test',
        help=lib.args.help('--test'),
        dest='TEST',
        type=lib.args.csv,
    )

    parser.add_argument(
        '--timeout',
        help=lib.args.help('--timeout') + ' Default: %(default)s (seconds)',
        dest='TIMEOUT',
        type=int,
        default=DEFAULT_TIMEOUT,
    )

    parser.add_argument(
        '--url',
        help='Redfish API URL. Default: %(default)s',
        dest='URL',
        default=DEFAULT_URL,
    )

    parser.add_argument(
        '--username',
        help='Redfish API username.',
        dest='USERNAME',
    )

    args, _ = parser.parse_known_args()
    return args


def load_test_fixture(test_args, path):
    # Replace the first element of args.TEST with the walk-specific
    # fixture path, read it via lib.lftest.test() and return the parsed
    # JSON. On a missing file or malformed JSON, exit STATE_UNKNOWN with
    # a helpful message instead of letting json.loads raise a traceback.
    if not os.path.isfile(path):
        lib.base.cu(f'Test fixture not found: "{path}".')
    test_args[0] = path
    stdout, _, _ = lib.lftest.test(test_args)
    try:
        return json.loads(stdout)
    except (json.JSONDecodeError, ValueError) as e:
        lib.base.cu(f'Test fixture "{path}" does not contain valid JSON: {e}')


def main():
    """The main function. This is where the magic happens."""

    # parse the command line
    try:
        args = parse_args()
    except SystemExit:
        sys.exit(STATE_UNKNOWN)

    # fetch data
    if args.TEST is None:
        if not args.URL.startswith('http'):
            lib.base.cu('--url parameter has to start with "http://" or https://".')
        header = {'Accept': 'application/json'}
        if args.USERNAME and args.PASSWORD:
            auth = f'{args.USERNAME}:{args.PASSWORD}'
            encoded_auth = lib.txt.to_text(base64.b64encode(lib.txt.to_bytes(auth)))
            header['Authorization'] = f'Basic {encoded_auth}'
        # Entry point: the Systems collection
        result = lib.base.coe(
            lib.url.fetch_json(
                f'{args.URL}{API_BASE}/Systems',
                header=header,
                insecure=args.INSECURE,
                no_proxy=args.NO_PROXY,
                timeout=args.TIMEOUT,
            )
        )
    else:
        # do not call the API, put in test data. Each API call in the
        # Redfish walk has an explicit fixture suffix, so the fixture
        # file names describe what they contain (systems, system,
        # storages, storage, drive-N).
        test_base = args.TEST[0]
        result = load_test_fixture(args.TEST, f'{test_base}-systems')
    # "Members": [
    #     {
    #         "@odata.id": "/redfish/v1/Systems/437XR1138R2"
    #     }
    # ],
    if len(result.get('Members', [])) == 0:
        lib.base.cu('Nothing to check, no Redfish members found.')

    # init some vars
    msg = ''
    state = STATE_OK
    perfdata = ''
    member_count = 0

    # analyze data: follow each "Member" link, aggregate drive and
    # storage-controller health into `state`. System-level health is
    # deliberately not aggregated (see `redfish-system` for that).
    for member in result.get('Members', []):
        if args.TEST is None:
            # "/redfish/v1/Systems/437XR1138R2"
            systems = lib.base.coe(
                lib.url.fetch_json(
                    f'{args.URL}{member["@odata.id"]}',
                    header=header,
                    insecure=args.INSECURE,
                    no_proxy=args.NO_PROXY,
                    timeout=args.TIMEOUT,
                )
            )
        else:
            systems = load_test_fixture(args.TEST, f'{test_base}-system')
        systems = lib.redfish.get_systems(systems)
        if systems['Status_State'] not in ['Enabled', 'Quiesced']:
            continue
        member_count += 1

        # build the message: only identify the member (manufacturer, model,
        # hostname, SKU, serial number). System-level health is intentionally
        # not aggregated here and not labelled with a state; use
        # `redfish-system` for that.
        msg += 'Member:'
        msg += f' {systems["Manufacturer"]}' if systems['Manufacturer'] else ''
        msg += f' {systems["Model"]}' if systems['Model'] else ''
        msg += ', '
        msg += f'HostName: {systems["HostName"]}, ' if systems['HostName'] else ''
        msg += f'SKU: {systems["SKU"]}, ' if systems['SKU'] else ''
        msg += f'SerNo: {systems["SerialNumber"]}, ' if systems['SerialNumber'] else ''
        msg = msg[:-2]

        # get all available storage links for the member
        if not systems['Storage_@odata.id']:
            msg += '\n\n'
            continue

        if args.TEST is None:
            # "/redfish/v1/Systems/437XR1138R2/Storage"
            storages = lib.base.coe(
                lib.url.fetch_json(
                    f'{args.URL}{systems["Storage_@odata.id"]}',
                    header=header,
                    insecure=args.INSECURE,
                    no_proxy=args.NO_PROXY,
                    timeout=args.TIMEOUT,
                )
            )
        else:
            storages = load_test_fixture(args.TEST, f'{test_base}-storages')
        table_data = []
        table_data_drive = []
        for storage in storages.get('Members', []):
            if args.TEST is None:
                # "/redfish/v1/Systems/437XR1138R2/Storage/RAID.SL.7-1"
                storage_data = lib.base.coe(
                    lib.url.fetch_json(
                        f'{args.URL}{storage["@odata.id"]}',
                        header=header,
                        insecure=args.INSECURE,
                        no_proxy=args.NO_PROXY,
                        timeout=args.TIMEOUT,
                    )
                )
            else:
                storage_data = load_test_fixture(args.TEST, f'{test_base}-storage')

            # get drives attached to the storage member
            for drive_idx, drive in enumerate(storage_data.get('Drives', [])):
                if args.TEST is None:
                    # "/redfish/v1/Systems/437XR1138R2/Storage/RAID.SL.7-1/Drives/Disk.Bay.0"
                    drive_data = lib.base.coe(
                        lib.url.fetch_json(
                            f'{args.URL}{drive["@odata.id"]}',
                            header=header,
                            insecure=args.INSECURE,
                            no_proxy=args.NO_PROXY,
                            timeout=args.TIMEOUT,
                        )
                    )
                else:
                    drive_data = load_test_fixture(
                        args.TEST,
                        f'{test_base}-drive-{drive_idx}',
                    )
                drive_data = lib.redfish.get_systems_storage_drives(drive_data)
                if drive_data['Status_State'] not in ['Enabled', 'Quiesced']:
                    continue
                # is the storage_data state healthy at all?
                drive_data_state = lib.redfish.get_state(drive_data)
                state = lib.base.get_worst(state, drive_data_state)
                drive_data['State'] = lib.base.state2str(
                    drive_data_state, empty_ok=False
                )
                table_data_drive.append(drive_data)

            storage_data = lib.redfish.get_systems_storage(storage_data)
            if storage_data['Status_State'] not in ['Enabled', 'Quiesced']:
                continue
            # is the storage_data state healthy at all?
            storage_data_state = lib.redfish.get_state(storage_data)
            state = lib.base.get_worst(state, storage_data_state)
            storage_data['State'] = lib.base.state2str(
                storage_data_state, empty_ok=False
            )
            table_data.append(storage_data)

        if table_data_drive:
            keys = [
                'Name',
                'MediaType',
                'Protocol',
                'Manufacturer',
                'Model',
                'SerialNumber',
                'CapacityBytes',
                'PredictedMediaLifeLeftPercent',
                'State',
            ]
            headers = [
                'Disk',
                'Type',
                'Proto',
                'Manufacturer',
                'Model',
                'SerialNumber',
                'Size',
                'LifeLeft %',
                'State',
            ]
            msg += '\n\n' + lib.base.get_table(table_data_drive, keys, header=headers)

        if table_data:
            keys = ['Id', 'Name', 'Description', 'Drives@odata.count', 'State']
            headers = ['ID', 'Name', 'Description', 'Drives', 'State']
            msg += '\n\n' + lib.base.get_table(table_data, keys, header=headers)

        msg += '\n\n'

    # build the message
    members = lib.txt.pluralize('member', member_count)
    if state == STATE_CRIT:
        msg = (
            f'Checked storage on {member_count} {members}.'
            f' There are critical errors.\n\n'
        ) + msg
    elif state == STATE_WARN:
        msg = (
            f'Checked storage on {member_count} {members}. There are warnings.\n\n'
        ) + msg
    else:
        msg = (
            f'Everything is ok, checked storage on {member_count} {members}.\n\n'
        ) + msg

    # over and out
    lib.base.oao(msg, state, perfdata, always_ok=args.ALWAYS_OK)


if __name__ == '__main__':
    try:
        main()
    except Exception:
        lib.base.cu()
