#!/usr/bin/env python3
# -*- coding: utf-8; py-indent-offset: 4 -*-
#
# Author:  Linuxfabrik GmbH, Zurich, Switzerland
# Contact: info (at) linuxfabrik (dot) ch
#          https://www.linuxfabrik.ch/
# License: The Unlicense, see LICENSE file.

# https://github.com/Linuxfabrik/monitoring-plugins/blob/main/CONTRIBUTING.md

"""See the check's README for more details.
"""

import argparse  # pylint: disable=C0413
import json  # pylint: disable=C0413
import sys  # pylint: disable=C0413

import lib.args  # pylint: disable=C0413
import lib.base  # pylint: disable=C0413
import lib.human  # pylint: disable=C0413
import lib.lftest  # pylint: disable=C0413
import lib.txt  # pylint: disable=C0413
import lib.url  # pylint: disable=C0413
import lib.veeam  # pylint: disable=C0413
from lib.globals import (STATE_CRIT, STATE_OK,  # pylint: disable=C0413
                          STATE_UNKNOWN, STATE_WARN)

__author__ = 'Linuxfabrik GmbH, Zurich/Switzerland'
__version__ = '2025021501'

DESCRIPTION = """Checks Veeam for failed VM or jobs, jobs that are running too long, and overuse of
                 the backup repositories. In addition, the check provides information about backup
                 infrastructure components and performed backup and replication jobs, executed jobs,
                 their status and duration, backed up and replicated VMs, available recovery points
                 and backup repositories, their capacity, free storage space and size of the backup
                 files - using the Veeam Enterprise Manager API (requires a Veeam Enterprise
                 License)."""

DEFAULT_CRIT = 90
DEFAULT_FAILED_JOB_RUNS = 0
DEFAULT_FAILED_VM_LASTEST_STATES = 0
DEFAULT_INSECURE = True
DEFAULT_MAX_BACKUP_JOB_DURATION= 86400
DEFAULT_MAX_REPLICA_JOB_DURATION = 86400
DEFAULT_NO_PROXY = False
DEFAULT_TIMEOUT = 3
DEFAULT_URL = 'https://localhost:9398'
DEFAULT_USERNAME = 'Administrator'
DEFAULT_WARN = 80
DEFAULT_WARNING_VM_LASTEST_STATES = 0
DEFAULT_WARNINGS_JOB_RUNS = 0


def parse_args():
    """Parse command line arguments using argparse.
    """
    parser = argparse.ArgumentParser(description=DESCRIPTION)

    parser.add_argument(
        '-V', '--version',
        action='version',
        version='%(prog)s: v{} by {}'.format(__version__, __author__)
    )

    parser.add_argument(
        '--always-ok',
        help='Always returns OK.',
        dest='ALWAYS_OK',
        action='store_true',
        default=False,
    )

    parser.add_argument(
        '-c', '--critical',
        help='Set the CRIT threshold for Backup Size as a percentage. '
             'Default: >= %(default)s',
        dest='CRIT',
        type=int,
        default=DEFAULT_CRIT,
    )

    parser.add_argument(
        '--failed-job-runs',
        help='Veeam threshold for `FailedJobRuns`. '
             'Default: > %(default)s',
        dest='FAILED_JOB_RUNS',
        type=int,
        default=DEFAULT_FAILED_JOB_RUNS,
    )

    parser.add_argument(
        '--failed-vm-lastest-states',
        help='Veeam threshold for `FailedVmLastestStates`. '
             'Default: > %(default)s',
        dest='FAILED_VM_LASTEST_STATES',
        type=int,
        default=DEFAULT_FAILED_VM_LASTEST_STATES,
    )

    parser.add_argument(
        '--insecure',
        help='This option explicitly allows to perform "insecure" SSL connections. '
             'Default: %(default)s',
        dest='INSECURE',
        action='store_true',
        default=DEFAULT_INSECURE,
    )

    parser.add_argument(
        '--max-backup-job-duration',
        help='Veeam threshold for `MaxBackupJobDuration`. '
             'Default: > %(default)s',
        dest='MAX_BACKUP_JOB_DURATION',
        type=int,
        default=DEFAULT_MAX_BACKUP_JOB_DURATION,
    )

    parser.add_argument(
        '--max-replica-job-duration',
        help='Veeam threshold for `MaxDurationReplicaJobName`. '
             'Default: > %(default)s',
        dest='MAX_REPLICA_JOB_DURATION',
        type=int,
        default=DEFAULT_MAX_REPLICA_JOB_DURATION,
    )

    parser.add_argument(
        '--no-proxy',
        help='Do not use a proxy. '
             'Default: %(default)s',
        dest='NO_PROXY',
        action='store_true',
        default=DEFAULT_NO_PROXY,
    )

    parser.add_argument(
        '-p', '--password',
        help='Veeam API password.',
        dest='PASSWORD',
        required=True,
    )

    parser.add_argument(
        '--test',
        help='For unit tests. Needs "path-to-stdout-file,path-to-stderr-file,expected-retc".',
        dest='TEST',
        type=lib.args.csv,
    )

    parser.add_argument(
        '--timeout',
        help='Network timeout in seconds. '
             'Default: %(default)s (seconds)',
        dest='TIMEOUT',
        type=int,
        default=DEFAULT_TIMEOUT,
    )

    parser.add_argument(
        '--url',
        help='Veeam API URL. '
             'Default: %(default)s',
        dest='URL',
        default=DEFAULT_URL,
    )

    parser.add_argument(
        '--username',
        help='Veeam API username. '
             'Default: %(default)s',
        dest='USERNAME',
        default=DEFAULT_USERNAME,
        required=True,
    )

    parser.add_argument(
        '-w', '--warning',
        help='Set the WARN threshold for Backup Size as a percentage. '
             'Default: >= %(default)s',
        dest='WARN',
        type=int,
        default=DEFAULT_WARN,
    )

    parser.add_argument(
        '--warnings-job-runs',
        help='Veeam threshold for `WarningsJobRuns`. '
             'Default: > %(default)s',
        dest='WARNINGS_JOB_RUNS',
        type=int,
        default=DEFAULT_WARNINGS_JOB_RUNS,
    )

    parser.add_argument(
        '--warning-vm-lastest-states',
        help='Veeam threshold for `WarningVmLastestStates`. '
             'Default: > %(default)s',
        dest='WARNING_VM_LASTEST_STATES',
        type=int,
        default=DEFAULT_WARNING_VM_LASTEST_STATES,
    )

    return parser.parse_args()


def main():
    """The main function. Hier spielt die Musik.
    """

    # parse the command line, exit with UNKNOWN if it fails
    try:
        args = parse_args()
    except SystemExit:
        sys.exit(STATE_UNKNOWN)

    # fetch data
    if args.TEST is None:
        success, result = lib.veeam.get_token(args)
        if not success:
            lib.base.cu(result)

        header = {
            'X-RestSvcSessionId': result['X-RestSvcSessionId'],
            'Accept': 'application/json',
        }
        result = {}

        # https://helpcenter.veeam.com/docs/backup/em_rest/reports_summary_overview.html?ver=110
        url = args.URL + '/api/reports/summary/overview'
        result['overview'] = lib.base.coe(
            lib.url.fetch_json(
                url,
                header=header,
                insecure=args.INSECURE,
                no_proxy=args.NO_PROXY,
                timeout=args.TIMEOUT,
            ),
        )

        # https://helpcenter.veeam.com/docs/backup/em_rest/reports_summary_statistics.html?ver=110
        url = args.URL + '/api/reports/summary/job_statistics'
        result['job_statistics'] = lib.base.coe(
            lib.url.fetch_json(
                url,
                header=header,
                insecure=args.INSECURE,
                no_proxy=args.NO_PROXY,
                timeout=args.TIMEOUT,
            ),
        )

        # https://helpcenter.veeam.com/docs/backup/em_rest/reports_summary_vms_overview.html?ver=110
        url = args.URL + '/api/reports/summary/vms_overview'
        result['vms_overview'] = lib.base.coe(
            lib.url.fetch_json(
                url,
                header=header,
                insecure=args.INSECURE,
                no_proxy=args.NO_PROXY,
                timeout=args.TIMEOUT,
            ),
        )

        # https://helpcenter.veeam.com/docs/backup/em_rest/reports_summary_repository.html?ver=110
        url = args.URL + '/api/reports/summary/repository'
        result['repository'] = lib.base.coe(
            lib.url.fetch_json(
                url,
                header=header,
                insecure=args.INSECURE,
                no_proxy=args.NO_PROXY,
                timeout=args.TIMEOUT,
            ),
        )
    else:
        # do not call the command, put in test data
        stdout, stderr, retc = lib.lftest.test(args.TEST)
        result = json.loads(stdout)

    # init some vars
    msg = ''
    state = STATE_OK
    perfdata = ''
    table_values = []

    # analyze data - "Lastets" (Veeam: sic!)

    table_values.append({'key': 'BackedUpVms', 'value': result['vms_overview']['BackedUpVms']})
    table_values.append({'key': 'BackupServers', 'value': result['overview']['BackupServers']})

    val = result['job_statistics']['FailedJobRuns']
    local_state = STATE_OK
    if  val > args.FAILED_JOB_RUNS:
        local_state = STATE_CRIT
        state = lib.base.get_worst(state, local_state)
        msg += '{} {} failed{}, '.format(
            val,
            lib.txt.pluralize('Job', val),
            lib.base.state2str(local_state, prefix=' ')
        )
    table_values.append({'key': 'FailedJobRuns', 'value': str(result['job_statistics']['FailedJobRuns']) + lib.base.state2str(local_state, prefix=' ')})

    val = result['overview']['FailedVmLastestStates']
    local_state = STATE_OK
    if val > args.FAILED_VM_LASTEST_STATES:
        local_state = STATE_CRIT
        state = lib.base.get_worst(state, local_state)
        msg += '{} {} failed{}, '.format(
            val,
            lib.txt.pluralize('VM', val),
            lib.base.state2str(local_state, prefix=' ')
        )
    table_values.append({'key': 'FailedVmLastestStates', 'value': str(result['overview']['FailedVmLastestStates']) + lib.base.state2str(local_state, prefix=' ')})

    table_values.append({'key': 'FullBackupPointsSize', 'value': lib.human.bytes2human(result['vms_overview']['FullBackupPointsSize'])})
    table_values.append({'key': 'IncrementalBackupPointsSize', 'value': lib.human.bytes2human(result['vms_overview']['IncrementalBackupPointsSize'])})

    val = result['job_statistics']['MaxBackupJobDuration']
    local_state = STATE_OK
    if  val > args.MAX_BACKUP_JOB_DURATION:
        # job lasts longer than 24 hours
        local_state = STATE_WARN
        state = lib.base.get_worst(state, local_state)
        msg += '"{}" ran for {}{}, '.format(
            result['job_statistics']['MaxDurationBackupJobName'],
            lib.human.seconds2human(val),
            lib.base.state2str(local_state, prefix=' ')
        )
    table_values.append({'key': 'MaxBackupJobDuration', 'value': lib.human.seconds2human(result['job_statistics']['MaxBackupJobDuration']) + lib.base.state2str(local_state, prefix=' ')})

    table_values.append({'key': 'MaxDurationBackupJobName', 'value': result['job_statistics']['MaxDurationBackupJobName']})
    table_values.append({'key': 'MaxDurationReplicaJobName', 'value': result['job_statistics']['MaxDurationReplicaJobName']})
    table_values.append({'key': 'MaxJobDuration', 'value': lib.human.seconds2human(result['job_statistics']['MaxJobDuration'])})

    val = result['job_statistics']['MaxReplicaJobDuration']
    local_state = STATE_OK
    if  val > args.MAX_REPLICA_JOB_DURATION:
        # job lasts longer than 24 hours
        local_state = STATE_WARN
        state = lib.base.get_worst(state, local_state)
        msg += '{} ran for {}{}, '.format(
            result['job_statistics']['MaxDurationReplicaJobName'],
            lib.human.seconds2human(val),
            lib.base.state2str(local_state, prefix=' ')
        )
    table_values.append({'key': 'MaxReplicaJobDuration', 'value': lib.human.seconds2human(result['job_statistics']['MaxReplicaJobDuration']) + lib.base.state2str(local_state, prefix=' ')})

    table_values.append({'key': 'ProtectedVms', 'value': result['vms_overview']['ProtectedVms']})
    table_values.append({'key': 'ProxyServers', 'value': result['overview']['ProxyServers']})
    table_values.append({'key': 'ReplicaRestorePointsSize', 'value': lib.human.bytes2human(result['vms_overview']['ReplicaRestorePointsSize'])})
    table_values.append({'key': 'ReplicatedVms', 'value': result['vms_overview']['ReplicatedVms']})
    table_values.append({'key': 'RepositoryServers', 'value': result['overview']['RepositoryServers']})
    table_values.append({'key': 'RestorePoints', 'value': result['vms_overview']['RestorePoints']})
    table_values.append({'key': 'RunningJobs', 'value': result['job_statistics']['RunningJobs']})
    table_values.append({'key': 'ScheduledBackupJobs', 'value': result['job_statistics']['ScheduledBackupJobs']})
    table_values.append({'key': 'ScheduledJobs', 'value': result['job_statistics']['ScheduledJobs']})
    table_values.append({'key': 'ScheduledReplicaJobs', 'value': result['job_statistics']['ScheduledReplicaJobs']})
    table_values.append({'key': 'SourceVmsSize', 'value': lib.human.bytes2human(result['vms_overview']['SourceVmsSize'])})
    table_values.append({'key': 'SuccessBackupPercents', 'value': str(result['vms_overview']['SuccessBackupPercents']) + '%'})
    table_values.append({'key': 'SuccessfulJobRuns', 'value': result['job_statistics']['SuccessfulJobRuns']})
    table_values.append({'key': 'SuccessfulVmLastestStates', 'value': result['overview']['SuccessfulVmLastestStates']})
    table_values.append({'key': 'TotalJobRuns', 'value': result['job_statistics']['TotalJobRuns']})

    val = result['job_statistics']['WarningsJobRuns']
    local_state = STATE_OK
    if  val > args.WARNINGS_JOB_RUNS:
        local_state = STATE_WARN
        state = lib.base.get_worst(state, local_state)
        msg += '{} {} with warnings{}, '.format(
            val,
            lib.txt.pluralize('Job', val),
            lib.base.state2str(local_state, prefix=' ')
        )
    table_values.append({'key': 'WarningsJobRuns', 'value': str(result['job_statistics']['WarningsJobRuns']) + lib.base.state2str(local_state, prefix=' ')})

    val = result['overview']['WarningVmLastestStates']
    local_state = STATE_OK
    if val > args.WARNING_VM_LASTEST_STATES:
        local_state = STATE_WARN
        state = lib.base.get_worst(state, local_state)
        msg += '{} {} with warnings{}, '.format(
            val,
            lib.txt.pluralize('VM', val),
            lib.base.state2str(local_state, prefix=' ')
        )
    table_values.append({'key': 'WarningVmLastestStates', 'value': str(result['overview']['WarningVmLastestStates']) + lib.base.state2str(local_state, prefix=' ')})

    for repo in result['repository']['Periods']:
        val = round(float(repo['BackupSize']) / float(repo['Capacity']) * 100, 1)
        local_state = lib.base.get_state(val, args.WARN, args.CRIT)
        state = lib.base.get_worst(state, local_state)
        msg += '"{}" {}%{} used - total: {}, used: {}, free: {}, '.format(
            repo['Name'],
            val,
            lib.base.state2str(local_state, prefix=' '),
            lib.human.bytes2human(repo['Capacity']),
            lib.human.bytes2human(repo['BackupSize']),
            lib.human.bytes2human(repo['FreeSpace']),
        )
        perfdata += lib.base.get_perfdata('Repo Usage ' + repo['Name'], val, '%', args.WARN, args.CRIT, 0, 100)
        perfdata += lib.base.get_perfdata('Repo Capacity ' + repo['Name'], repo['Capacity'], 'B', None, None, 0, None)
        perfdata += lib.base.get_perfdata('Repo FreeSpace ' + repo['Name'], repo['FreeSpace'], 'B', None, None, 0, None)
        perfdata += lib.base.get_perfdata('Repo BackupSize ' + repo['Name'], repo['BackupSize'], 'B', None, None, 0, None)

    # Build perfdata and property table
    perfdata += lib.base.get_perfdata('BackedUpVms', result['vms_overview']['BackedUpVms'], None, None, None, 0, None)
    perfdata += lib.base.get_perfdata('BackupServers', result['overview']['BackupServers'], None, None, None, 0, None)
    perfdata += lib.base.get_perfdata('FailedJobRuns', result['job_statistics']['FailedJobRuns'], None, None, None, 0, None)
    perfdata += lib.base.get_perfdata('FailedVmLastestStates', result['overview']['FailedVmLastestStates'], None, None, None, 0, None)
    perfdata += lib.base.get_perfdata('FullBackupPointsSize', result['vms_overview']['FullBackupPointsSize'], 'B', None, None, 0, None)
    perfdata += lib.base.get_perfdata('IncrementalBackupPointsSize', result['vms_overview']['IncrementalBackupPointsSize'], 'B', None, None, 0, None)
    perfdata += lib.base.get_perfdata('MaxBackupJobDuration', result['job_statistics']['MaxBackupJobDuration'], 's', None, None, 0, None)
    perfdata += lib.base.get_perfdata('MaxJobDuration', result['job_statistics']['MaxJobDuration'], 's', None, None, 0, None)
    perfdata += lib.base.get_perfdata('MaxReplicaJobDuration', result['job_statistics']['MaxReplicaJobDuration'], 's', None, None, 0, None)
    perfdata += lib.base.get_perfdata('ProtectedVms', result['vms_overview']['ProtectedVms'], None, None, None, 0, None)
    perfdata += lib.base.get_perfdata('ProxyServers', result['overview']['ProxyServers'], None, None, None, 0, None)
    perfdata += lib.base.get_perfdata('ReplicaRestorePointsSize', result['vms_overview']['ReplicaRestorePointsSize'], 'B', None, None, 0, None)
    perfdata += lib.base.get_perfdata('ReplicatedVms', result['vms_overview']['ReplicatedVms'], None, None, None, 0, None)
    perfdata += lib.base.get_perfdata('RepositoryServers', result['overview']['RepositoryServers'], None, None, None, 0, None)
    perfdata += lib.base.get_perfdata('RestorePoints', result['vms_overview']['RestorePoints'], None, None, None, 0, None)
    perfdata += lib.base.get_perfdata('RunningJobs', result['job_statistics']['RunningJobs'], None, None, None, 0, None)
    perfdata += lib.base.get_perfdata('ScheduledBackupJobs', result['job_statistics']['ScheduledBackupJobs'], None, None, None, 0, None)
    perfdata += lib.base.get_perfdata('ScheduledJobs', result['job_statistics']['ScheduledJobs'], None, None, None, 0, None)
    perfdata += lib.base.get_perfdata('ScheduledReplicaJobs', result['job_statistics']['ScheduledReplicaJobs'], None, None, None, 0, None)
    perfdata += lib.base.get_perfdata('SourceVmsSize', result['vms_overview']['SourceVmsSize'], 'B', None, None, 0, None)
    perfdata += lib.base.get_perfdata('SuccessBackupPercents', result['vms_overview']['SuccessBackupPercents'], '%', None, None, 0, 100)
    perfdata += lib.base.get_perfdata('SuccessfulJobRuns', result['job_statistics']['SuccessfulJobRuns'], None, None, None, 0, None)
    perfdata += lib.base.get_perfdata('SuccessfulVmLastestStates', result['overview']['SuccessfulVmLastestStates'], None, None, None, 0, None)
    perfdata += lib.base.get_perfdata('TotalJobRuns', result['job_statistics']['TotalJobRuns'], None, None, None, 0, None)
    perfdata += lib.base.get_perfdata('WarningsJobRuns', result['job_statistics']['WarningsJobRuns'], None, None, None, 0, None)
    perfdata += lib.base.get_perfdata('WarningVmLastestStates', result['overview']['WarningVmLastestStates'], None, None, None, 0, None)

    # build the message
    msg = msg[:-2] + '\n\n' + lib.base.get_table(
        table_values,
        ['key', 'value'],
        header=['Key', 'Value'],
    )

    # over and out
    lib.base.oao(msg, state, perfdata, always_ok=args.ALWAYS_OK)


if __name__ == '__main__':
    try:
        main()
    except Exception:   # pylint: disable=W0703
        lib.base.cu()
