#!/usr/bin/env python3
# -*- coding: utf-8; py-indent-offset: 4 -*-
#
# Author:  Linuxfabrik GmbH, Zurich, Switzerland
# Contact: info (at) linuxfabrik (dot) ch
#          https://www.linuxfabrik.ch/
# License: The Unlicense, see LICENSE file.

# https://github.com/Linuxfabrik/monitoring-plugins/blob/main/CONTRIBUTING.md

"""See the check's README for more details.
"""

import argparse  # pylint: disable=C0413
import datetime  # pylint: disable=C0413
import json  # pylint: disable=C0413
import os  # pylint: disable=C0413
import sys  # pylint: disable=C0413

import lib.args  # pylint: disable=C0413
import lib.base  # pylint: disable=C0413
import lib.db_sqlite  # pylint: disable=C0413
import lib.human  # pylint: disable=C0413
import lib.shell  # pylint: disable=C0413
import lib.lftest  # pylint: disable=C0413
import lib.time  # pylint: disable=C0413
from lib.globals import (STATE_OK, STATE_UNKNOWN)


__author__ = 'Linuxfabrik GmbH, Zurich/Switzerland'
__version__ = '2025042101'

DESCRIPTION = """\
Checks the health and status of Kubernetes Pods by running `kubectl get pods` and parsing the
results.

Prints a table listing namespace, pod name, readiness, status, restart count, pod age, and IP
address. Adds performance data for each pod status (Running, Pending, Failed, Succeeded, Unknown).

By default, only shows pods from the current namespace. Use `--all-namespaces` to check across all
namespaces. The plugin also stores a temporary local SQLite database during runtime
(no persistent history). Results can therefore be filtered with a custom SQL `--query`
(e.g., by namespace, pod name, or status). See the README for more details.

Pending and Failed pods can trigger a WARNING or CRITICAL state (configurable via `--severity`),
while Unknown pods result in an UNKNOWN state.

Intended for use with Nagios/Icinga to detect Kubernetes pod issues like stuck, failing, or
unreachable pods.
"""


DEFAULT_ALL_NAMESPACES = False
DEFAULT_KUBECONFIG = '/var/spool/icinga2/.kubeconfig'
DEFAULT_QUERY = '1'
DEFAULT_SEVERITY = 'crit'


def parse_args():
    """Parse command line arguments using argparse.
    """
    parser = argparse.ArgumentParser(description=DESCRIPTION)

    parser.add_argument(
        '-V', '--version',
        action='version',
        version=f'%(prog)s: v{__version__} by {__author__}'
    )

    parser.add_argument(
        '--always-ok',
        help='Always returns OK.',
        dest='ALWAYS_OK',
        action='store_true',
        default=False,
    )

    parser.add_argument(
        '--all-namespaces',
        help='If present, list the requested object(s) across all namespaces. Namespace in current '
             'context is ignored even if specified with `--namespace`. '
             'Default: %(default)s',
        dest='ALL_NAMESPACES',
        action='store_true',
        default=DEFAULT_ALL_NAMESPACES,
    )

    parser.add_argument(
        '--kubeconfig',
        help='Path to the kubeconfig file. '
             'Default: %(default)s',
        dest='KUBECONFIG',
        default=DEFAULT_KUBECONFIG,
    )

    parser.add_argument(
        '--query',
        help='Provide the SQL `WHEN` statement part to narrow down results. '
             ' Example: '
             "`namespace = 'mynamespace' and name like 'prod-%%' and status != 'running'`. "
             'Have a look at the README for a list of available columns. '
             'Default: %(default)s',
        dest='QUERY',
        default=DEFAULT_QUERY,
    )

    parser.add_argument(
        '--severity',
        help='Severity for alerting. '
             'Default: %(default)s',
        dest='SEVERITY',
        default=DEFAULT_SEVERITY,
        choices=['warn', 'crit'],
    )

    parser.add_argument(
        '--test',
        help='For unit tests. Needs "path-to-stdout-file,path-to-stderr-file,expected-retc".',
        dest='TEST',
        type=lib.args.csv,
    )

    return parser.parse_args()


def main():
    """The main function. Hier spielt die Musik.
    """

    # parse the command line, exit with UNKNOWN if it fails
    try:
        args = parse_args()
    except SystemExit:
        sys.exit(STATE_UNKNOWN)

    # build the kubectl command
    cmd = [
        'kubectl',
        f'--kubeconfig={args.KUBECONFIG.replace("~", os.path.expanduser("~"))}',
        'get', 'pods',
        '--all-namespaces' if args.ALL_NAMESPACES else '',
        '--output=json',
        '--request-timeout=8',
    ]
    cmd = ' '.join(cmd)

    # returns (in JSON):
    # NAMESPACE  NAME                           READY  STATUS    RESTARTS  AGE  IP
    # fleet      fleet-agent-f7dc57db7-bks9q    1/1    Running   0         29d  192.0.2.36
    # system     cluster-agent-74c77868c-7wnbh  1/1    Running   0         29d  192.0.2.39

    # fetch data
    if args.TEST is None:
        stdout, stderr, _ = lib.base.coe(
            lib.shell.shell_exec(cmd, timeout=8),
        )
        if stderr:
            lib.base.cu(stderr)
    else:
        # do not call the command, put in test data
        stdout, stderr, _ = lib.lftest.test(args.TEST)

    # init some vars
    msg = ''
    state = STATE_OK
    perfdata = ''
    table_data = []

    # create the db table
    definition = '''
        namespace TEXT DEFAULT NULL,
        name TEXT DEFAULT NULL,
        ready TEXT DEFAULT NULL,
        status TEXT DEFAULT NULL,
        restarts INT DEFAULT NULL,
        age INT DEFAULT NULL,
        ip TEXT DEFAULT NULL
    '''
    conn = lib.base.coe(lib.db_sqlite.connect(
        filename='linuxfabrik-monitoring-plugins-kubectl-get-pods.db',
    ))
    lib.base.coe(lib.db_sqlite.create_table(
        conn,
        definition,
        table='kubectl_get_pods',
        drop_table_first=True,  # we don't need historical data
    ))

    # fetch data
    now = lib.time.now()
    try:
        result = json.loads(stdout)
    except Exception as e:
        lib.base.cu(f'Invalid JSON returned by kubectl: {e}')
    for item in result['items']:
        age = item['status'].get('startTime', 0)
        if age:
            age = lib.human.seconds2human(
                now - lib.time.timestr2epoch(
                    age,
                    pattern='%Y-%m-%dT%H:%M:%SZ',
                    tzinfo=datetime.timezone.utc,
                ),
            )
        ready = 0
        for i in item['status'].get('containerStatuses', []):
            if i['ready']:
                ready += 1

        data = {
            'namespace': item['metadata'].get('namespace', ''),
            'name': item['metadata'].get('name', ''),
            'ready': '',
            'status': f'{item["status"].get("phase", "")}',
            'restarts': 0,
            'age': age,
            'ip': item['status'].get('podIP'),
        }
        if 'containerStatuses' in item['status']:
            data['ready'] = f'{ready}/{len(item["status"]["containerStatuses"])}'
            data['restarts'] = item['status']['containerStatuses'][0].get('restartCount', 0)

        lib.base.coe(lib.db_sqlite.insert(conn, data, table='kubectl_get_pods'))
        table_data.append(data)

    # store table_data in local sqlite database
    lib.base.coe(lib.db_sqlite.commit(conn))

    # fetch desired objects only, and set sqlite3 to be case insensitive when string comparing
    sql = f'''
        SELECT *
        FROM kubectl_get_pods
        WHERE {args.QUERY}
        COLLATE NOCASE
    '''
    result = lib.base.coe(lib.db_sqlite.select(conn, sql))
    lib.db_sqlite.close(conn)

    # get state
    # `kubectl get pods` statuses are: Running, Succeeded, Pending, Failed, Unknown
    pod_states = {
        'Failed': 0,
        'Pending': 0,
        'Running': 0,
        'Succeeded': 0,
        'Unknown': 0,
    }
    severity = lib.base.str2state(args.SEVERITY)
    for i, item in enumerate(result):
        pod_states[item['status']] += 1
        if item['status'] in ['Pending', 'Failed']:
            state = lib.base.get_worst(state, severity)
            item['status'] += lib.base.state2str(severity, prefix=' ')
        if item['status'] in ['Unknown']:
            state = lib.base.get_worst(state, STATE_UNKNOWN)
            item['status'] += lib.base.state2str(STATE_UNKNOWN, prefix=' ')

    # build the message
    if state == STATE_OK:
        msg += 'Everything is ok.\n'
    elif state == STATE_UNKNOWN:
        msg += ('State of a pod could not be obtained, usually due to an error in communicating '
                'with the host where the pod is supposed to be running.\n')
    else:
        if pod_states['Failed']:
            msg += ('Failed Pods: At least one container in the pod has terminated in failure. '
                    'A container is considered to have failed if it exited with a non-zero '
                    'status.\n')
        if pod_states['Pending']:
            msg += ('Pending Pods: Have been accepted by the Kubernetes system, but one or more '
                    'of the container images have not been created. This could be because the '
                    'system is pulling the image from a remote registry, there is insufficient '
                    'capacity in the cluster, or some other scheduling issue.\n')
    if result:
        keys = [
            'namespace',
            'name',
            'ready',
            'restarts',
            'age',
            'ip',
            'status',
        ]
        headers = [
            'NAMESPACE',
            'NAME',
            'RDY',
            'RSTRT',
            'AGE',
            'IP',
            'STATUS',
        ]
        msg +=  '\n' + lib.base.get_table(result, keys, header=headers)
    for key, value in pod_states.items():
        perfdata += lib.base.get_perfdata(
            key.lower(),
            value,
            _min=0,
        )

    # over and out
    lib.base.oao(msg, state, perfdata, always_ok=args.ALWAYS_OK)


if __name__ == '__main__':
    try:
        main()
    except Exception:   # pylint: disable=W0703
        lib.base.cu()
