#!/usr/bin/env python3
# -*- coding: utf-8; py-indent-offset: 4 -*-
#
# Author:  Linuxfabrik GmbH, Zurich, Switzerland
# Contact: info (at) linuxfabrik (dot) ch
#          https://www.linuxfabrik.ch/
# License: The Unlicense, see LICENSE file.

# https://github.com/Linuxfabrik/monitoring-plugins/blob/main/CONTRIBUTING.md

"""See the check's README for more details."""

import argparse
import datetime
import json
import os
import sys

import lib.args
import lib.base
import lib.db_sqlite
import lib.human
import lib.lftest
import lib.shell
import lib.time
from lib.globals import STATE_OK, STATE_UNKNOWN

__author__ = 'Linuxfabrik GmbH, Zurich/Switzerland'
__version__ = '2025100601'

DESCRIPTION = """Checks the health and status of Kubernetes pods by running "kubectl get pods" and
parsing the results. Lists namespace, pod name, readiness, status, restart count, age,
and IP address. Alerts on pending and failed pods with configurable severity. Results
can be filtered with a custom SQL query. Supports all namespaces or a single one."""


DEFAULT_ALL_NAMESPACES = False
DEFAULT_KUBECONFIG = '/var/spool/icinga2/.kubeconfig'
DEFAULT_QUERY = '1'
DEFAULT_SEVERITY = 'crit'


def parse_args():
    """Parse command line arguments using argparse."""
    parser = argparse.ArgumentParser(description=DESCRIPTION)

    parser.add_argument(
        '-V',
        '--version',
        action='version',
        version=f'%(prog)s: v{__version__} by {__author__}',
    )

    parser.add_argument(
        '--always-ok',
        help=lib.args.help('--always-ok'),
        dest='ALWAYS_OK',
        action='store_true',
        default=False,
    )

    parser.add_argument(
        '--all-namespaces',
        help='List pods across all namespaces. '
        'Namespace in current context is ignored even if specified with `--namespace`. '
        'Default: %(default)s',
        dest='ALL_NAMESPACES',
        action='store_true',
        default=DEFAULT_ALL_NAMESPACES,
    )

    parser.add_argument(
        '--kubeconfig',
        help='Path to the kubeconfig file. Default: %(default)s',
        dest='KUBECONFIG',
        default=DEFAULT_KUBECONFIG,
    )

    parser.add_argument(
        '--query',
        help='SQL WHERE clause to narrow down results. '
        'Have a look at the README for a list of available columns. '
        'Example: '
        "`namespace = 'mynamespace' and name like 'prod-%%' and status != 'running'`. "
        'Default: %(default)s',
        dest='QUERY',
        default=DEFAULT_QUERY,
    )

    parser.add_argument(
        '--severity',
        help=lib.args.help('--severity') + ' Default: %(default)s',
        dest='SEVERITY',
        default=DEFAULT_SEVERITY,
        choices=['warn', 'crit'],
    )

    parser.add_argument(
        '--test',
        help=lib.args.help('--test'),
        dest='TEST',
        type=lib.args.csv,
    )

    args, _ = parser.parse_known_args()
    return args


def main():
    """The main function. This is where the magic happens."""

    # parse the command line
    try:
        args = parse_args()
    except SystemExit:
        sys.exit(STATE_UNKNOWN)

    # build the kubectl command
    cmd = [
        'kubectl',
        f'--kubeconfig={args.KUBECONFIG.replace("~", os.path.expanduser("~"))}',
        'get',
        'pods',
        '--all-namespaces' if args.ALL_NAMESPACES else '',
        '--output=json',
        '--request-timeout=8',
    ]
    cmd = ' '.join(cmd)

    # returns (in JSON):
    # NAMESPACE  NAME                           READY  STATUS    RESTARTS  AGE  IP
    # fleet      fleet-agent-f7dc57db7-bks9q    1/1    Running   0         29d  192.0.2.36
    # system     cluster-agent-74c77868c-7wnbh  1/1    Running   0         29d  192.0.2.39

    # fetch data
    if args.TEST is None:
        stdout, stderr, _ = lib.base.coe(
            lib.shell.shell_exec(cmd, timeout=8),
        )
        if stderr:
            lib.base.cu(stderr)
    else:
        # do not call the command, put in test data
        stdout, stderr, _ = lib.lftest.test(args.TEST)

    # init some vars
    msg = ''
    state = STATE_OK
    perfdata = ''
    table_data = []

    # create the db table
    definition = """
        namespace TEXT DEFAULT NULL,
        name TEXT DEFAULT NULL,
        ready TEXT DEFAULT NULL,
        status TEXT DEFAULT NULL,
        restarts INT DEFAULT NULL,
        age INT DEFAULT NULL,
        ip TEXT DEFAULT NULL
    """
    conn = lib.base.coe(
        lib.db_sqlite.connect(
            filename='linuxfabrik-monitoring-plugins-kubectl-get-pods.db',
        )
    )
    lib.base.coe(
        lib.db_sqlite.create_table(
            conn,
            definition,
            table='kubectl_get_pods',
            drop_table_first=True,  # we don't need historical data
        )
    )

    # fetch data
    now = lib.time.now()
    try:
        result = json.loads(stdout)
    except Exception as e:
        lib.base.cu(f'Invalid JSON returned by kubectl: {e}')
    for item in result['items']:
        age = item['status'].get('startTime', 0)
        if age:
            age = lib.human.seconds2human(
                now
                - lib.time.timestr2epoch(
                    age,
                    pattern='%Y-%m-%dT%H:%M:%SZ',
                    tzinfo=datetime.timezone.utc,
                ),
            )
        ready = 0
        for i in item['status'].get('containerStatuses', []):
            if i['ready']:
                ready += 1

        data = {
            'namespace': item['metadata'].get('namespace', ''),
            'name': item['metadata'].get('name', ''),
            'ready': '',
            'status': f'{item["status"].get("phase", "")}',
            'restarts': 0,
            'age': age,
            'ip': item['status'].get('podIP'),
        }
        if 'containerStatuses' in item['status']:
            data['ready'] = f'{ready}/{len(item["status"]["containerStatuses"])}'
            data['restarts'] = item['status']['containerStatuses'][0].get(
                'restartCount', 0
            )

        lib.base.coe(lib.db_sqlite.insert(conn, data, table='kubectl_get_pods'))
        table_data.append(data)

    # store table_data in local sqlite database
    lib.base.coe(lib.db_sqlite.commit(conn))

    # fetch desired objects only, and set sqlite3 to be case insensitive when string comparing
    # QUERY is by design an admin-provided SQL WHERE clause (documented feature)
    sql = f"""
        SELECT *
        FROM kubectl_get_pods
        WHERE {args.QUERY}
        COLLATE NOCASE
    """  # nosec B608
    result = lib.base.coe(lib.db_sqlite.select(conn, sql))
    lib.db_sqlite.close(conn)

    # get state
    # `kubectl get pods` statuses are: Running, Succeeded, Pending, Failed, Unknown
    pod_states = {
        'Failed': 0,
        'Pending': 0,
        'Running': 0,
        'Succeeded': 0,
        'Unknown': 0,
    }
    severity = lib.base.str2state(args.SEVERITY)
    for item in result:
        pod_states[item['status']] += 1
        if item['status'] in ['Pending', 'Failed']:
            state = lib.base.get_worst(state, severity)
            item['status'] += lib.base.state2str(severity, prefix=' ')
        if item['status'] in ['Unknown']:
            state = lib.base.get_worst(state, STATE_UNKNOWN)
            item['status'] += lib.base.state2str(STATE_UNKNOWN, prefix=' ')

    # build the message
    if state == STATE_OK:
        msg += 'Everything is ok.\n'
    elif state == STATE_UNKNOWN:
        msg += (
            'State of a pod could not be obtained, usually due to an error in communicating '
            'with the host where the pod is supposed to be running.\n'
        )
    else:
        if pod_states['Failed']:
            msg += (
                'Failed Pods: At least one container in the pod has terminated in failure. '
                'A container is considered to have failed if it exited with a non-zero '
                'status.\n'
            )
        if pod_states['Pending']:
            msg += (
                'Pending Pods: Have been accepted by the Kubernetes system, but one or more '
                'of the container images have not been created. This could be because the '
                'system is pulling the image from a remote registry, there is insufficient '
                'capacity in the cluster, or some other scheduling issue.\n'
            )
    if result:
        keys = [
            'namespace',
            'name',
            'ready',
            'restarts',
            'age',
            'ip',
            'status',
        ]
        headers = [
            'NAMESPACE',
            'NAME',
            'RDY',
            'RSTRT',
            'AGE',
            'IP',
            'STATUS',
        ]
        msg += '\n' + lib.base.get_table(result, keys, header=headers)
    for key, value in pod_states.items():
        perfdata += lib.base.get_perfdata(
            key.lower(),
            value,
            _min=0,
        )

    # over and out
    lib.base.oao(msg, state, perfdata, always_ok=args.ALWAYS_OK)


if __name__ == '__main__':
    try:
        main()
    except Exception:
        lib.base.cu()
