#!/usr/bin/env python3
# -*- coding: utf-8; py-indent-offset: 4 -*-
#
# Author:  Linuxfabrik GmbH, Zurich, Switzerland
# Contact: info (at) linuxfabrik (dot) ch
#          https://www.linuxfabrik.ch/
# License: The Unlicense, see LICENSE file.

# https://github.com/Linuxfabrik/monitoring-plugins/blob/main/CONTRIBUTING.md

"""See the check's README for more details."""

import argparse
import json
import sys

import lib.args
import lib.base
import lib.cache
import lib.human
import lib.lftest
import lib.time
from lib.globals import STATE_UNKNOWN

try:
    import psutil
except ImportError:
    print('Python module "psutil" is not installed.')
    sys.exit(STATE_UNKNOWN)


__author__ = 'Linuxfabrik GmbH, Zurich/Switzerland'
__version__ = '2026041301'

DESCRIPTION = """Reports how long the system has been running since the last boot. Optionally displays
the timestamp and duration of the last downtime - the more frequently the check runs,
the more accurate the downtime information will be. Alerts when uptime exceeds the
configured thresholds (useful for detecting servers that have not been rebooted after
patching)."""

DEFAULT_WARN = '3m:180D'  # warn if uptime is not in 3 minutes to 180 days
DEFAULT_CRIT = ':1Y'  # crit if uptime is greater than 365 days


def parse_args():
    """Parse command line arguments using argparse."""
    parser = argparse.ArgumentParser(description=DESCRIPTION)

    parser.add_argument(
        '-V',
        '--version',
        action='version',
        version=f'%(prog)s: v{__version__} by {__author__}',
    )

    parser.add_argument(
        '--always-ok',
        help=lib.args.help('--always-ok'),
        dest='ALWAYS_OK',
        action='store_true',
        default=False,
    )

    parser.add_argument(
        '-c',
        '--critical',
        help='Threshold for the uptime in a human-readable format '
        '(s = seconds, m = minutes, h = hours, D = days, W = weeks, M = months, Y = years). '
        'Supports Nagios ranges. '
        'Example: `:1Y` alerts if uptime is greater than 1 year. '
        'Default: %(default)s',
        dest='CRIT',
        default=DEFAULT_CRIT,
    )

    parser.add_argument(
        '--test',
        help=lib.args.help('--test'),
        dest='TEST',
        type=lib.args.csv,
    )

    parser.add_argument(
        '-w',
        '--warning',
        help='Threshold for the uptime in a human-readable format '
        '(s = seconds, m = minutes, h = hours, D = days, W = weeks, M = months, Y = years). '
        'Supports Nagios ranges. '
        'Example: `5m:180D` warns if uptime is not between 5 minutes and 180 days. '
        'Default: %(default)s',
        dest='WARN',
        default=DEFAULT_WARN,
    )

    args, _ = parser.parse_known_args()
    return args


def _load_uptime_fixture(raw_json):
    """Convert a test fixture into the values the plugin needs.
    Returns a tuple of `(now, boot_time)` as floats. The fixture shape:

        {"now": <epoch seconds>, "boot_time": <epoch seconds>}

    Both values are plain floats; no namedtuples are involved.
    """
    data = json.loads(raw_json)
    return float(data['now']), float(data['boot_time'])


def main():
    """The main function. This is where the magic happens."""

    # parse the command line
    try:
        args = parse_args()
    except SystemExit:
        sys.exit(STATE_UNKNOWN)

    # convert human readable nagios ranges to something that the Linuxfabrik libraries
    # can understand
    CRIT = lib.human.humanrange2seconds(args.CRIT)
    WARN = lib.human.humanrange2seconds(args.WARN)

    # fetch data
    if args.TEST is None:
        now = lib.time.now()
        boot_time = psutil.boot_time()
    else:
        stdout, _, _ = lib.lftest.test(args.TEST)
        now, boot_time = _load_uptime_fixture(stdout)

    # analyze data
    uptime = now - boot_time

    if args.TEST is None:
        last_ts = lib.cache.get(
            'last_ts', filename='linuxfabrik-monitoring-plugins-uptime.db'
        )
        if not last_ts:
            last_ts = now
        if boot_time > float(last_ts):
            # there was a reboot
            downtime = boot_time - float(last_ts)
            down_ts = last_ts
            lib.cache.set(
                'last_downtime',
                downtime,
                filename='linuxfabrik-monitoring-plugins-uptime.db',
            )
            lib.cache.set(
                'last_down_ts',
                down_ts,
                filename='linuxfabrik-monitoring-plugins-uptime.db',
            )
        else:
            downtime = lib.cache.get(
                'last_downtime', filename='linuxfabrik-monitoring-plugins-uptime.db'
            )
            down_ts = lib.cache.get(
                'last_down_ts', filename='linuxfabrik-monitoring-plugins-uptime.db'
            )
            if not downtime:
                downtime, down_ts = 0, 0
        lib.cache.set(
            'last_ts', now, filename='linuxfabrik-monitoring-plugins-uptime.db'
        )
    else:
        # bypass the cache entirely for deterministic unit tests
        downtime, down_ts = 0, 0

    # build the message
    state = lib.base.get_state(uptime, WARN, CRIT, _operator='range')
    msg = (
        f'Up {lib.human.seconds2human(uptime)} '
        f'since {lib.time.epoch2iso(boot_time)} '
        f'(thresholds {args.WARN}/{args.CRIT})'
        f'{lib.base.state2str(state, prefix=" ")}'
    )
    if downtime:
        msg += (
            f'.\nLast power event '
            f'at ~{lib.time.epoch2iso(down_ts)} '
            f'and down for ~{lib.human.seconds2human(downtime)}.'
        )
    perfdata = lib.base.get_perfdata(
        'uptime',
        uptime,
        uom='s',
        _min=0,
    )

    # over and out
    lib.base.oao(msg, state, perfdata, always_ok=args.ALWAYS_OK)


if __name__ == '__main__':
    try:
        main()
    except Exception:
        lib.base.cu()
