#!/usr/bin/env python3
# -*- coding: utf-8; py-indent-offset: 4 -*-
#
# Author:  Linuxfabrik GmbH, Zurich, Switzerland
# Contact: info (at) linuxfabrik (dot) ch
#          https://www.linuxfabrik.ch/
# License: The Unlicense, see LICENSE file.

# https://github.com/Linuxfabrik/monitoring-plugins/blob/main/CONTRIBUTING.md

"""See the check's README for more details.
"""

import argparse  # pylint: disable=C0413
import json  # pylint: disable=C0413
import re  # pylint: disable=C0413
import sys  # pylint: disable=C0413

import lib.args  # pylint: disable=C0413
import lib.base  # pylint: disable=C0413
import lib.human  # pylint: disable=C0413
import lib.shell  # pylint: disable=C0413
import lib.lftest  # pylint: disable=C0413
import lib.time  # pylint: disable=C0413
from lib.globals import (STATE_CRIT, STATE_OK,  # pylint: disable=C0413
                          STATE_UNKNOWN, STATE_WARN)

__author__ = 'Linuxfabrik GmbH, Zurich/Switzerland'
__version__ = '2025021501'

DESCRIPTION = """Query the systemd journal and alert on any events found.
                 For help on any of the journalctl-specific parameters, see `man journalctl`."""

DEFAULT_FACILITY = None
DEFAULT_IDENTIFIER = None
DEFAULT_PRIORITY = 'emerg..err'
DEFAULT_SERVERITY = 'warn'
DEFAULT_SINCE = '-8h'
DEFAULT_UNIT = None
DEFAULT_USER_UNIT = None

# don't sort JOURNALD_PRIOS alphabetically, we need the indexes (0 = emerg etc.)
JOURNALD_PRIOS = [
    'emerg',
    'alert',
    'crit',
    'err',
    'warning',
    'notice',
    'info',
    'debug',
]


def parse_args():
    """Parse command line arguments using argparse.
    """
    parser = argparse.ArgumentParser(description=DESCRIPTION)

    parser.add_argument(
        '-V', '--version',
        action='version',
        version='%(prog)s: v{} by {}'.format(__version__, __author__)
    )

    parser.add_argument(
        '--always-ok',
        help='Always returns OK.',
        dest='ALWAYS_OK',
        action='store_true',
        default=False,
    )

    parser.add_argument(
        '--facility',
        help='journalctl: Filter output by syslog facility. Takes a '
             'comma-separated list of numbers or facility names. '
             'Default: %(default)s',
        dest='FACILITY',
        default=DEFAULT_FACILITY,
    )

    parser.add_argument(
        '--identifier',
        help='journalctl: Show messages for the specified syslog identifier. '
             'Default: %(default)s',
        dest='IDENTIFIER',
        default=DEFAULT_IDENTIFIER,
    )

    parser.add_argument(
        '--ignore-pattern',
        help='Any line containing this case-sensitive pattern on the MESSAGE field will be '
             'ignored (repeating). So, unlike `journalctl`, you can easily use '
             'strings to ignore certain messages.',
        action='append',
        default=[],
        dest='IGNORE_PATTERN',
    )

    parser.add_argument(
        '--ignore-regex',
        help='Any line matching this Python regex on the MESSAGE field will be '
             'ignored (repeating). So, unlike `journalctl`, you can easily use '
             'a regex to ignore certain messages. '
             'Example: \'(?i)linuxfabrik\' for a case-insensitive search for '
             '"linuxfabrik".',
        action='append',
        default=[],
        dest='IGNORE_REGEX',
    )

    parser.add_argument(
        '--priority',
        help='journalctl: Filter output by message priorities or priority '
             'ranges. '
             'Default: %(default)s',
        dest='PRIORITY',
        default=DEFAULT_PRIORITY,
    )

    parser.add_argument(
        '--severity',
        help='Severity for alerts if journalctl returns results. One of '
             '"warn" or "crit". '
             'Default: %(default)s',
        dest='SEVERITY',
        default=DEFAULT_SERVERITY,
        choices=['warn', 'crit'],
    )

    parser.add_argument(
        '--since',
        help='journalctl: Start showing entries on or newer than the '
             'specified date. '
             'Default: >= %(default)s',
        dest='SINCE',
        default=DEFAULT_SINCE,
    )

    parser.add_argument(
        '--test',
        help='For unit tests. '
             'Needs "path-to-stdout-file,path-to-stderr-file,expected-retc".',
        dest='TEST',
        type=lib.args.csv,
    )

    parser.add_argument(
        '--unit',
        help='journalctl: Show messages for the specified systemd unit '
             'UNIT|PATTERN. This parameter can be specified multiple times. '
             'Default: %(default)s',
        dest='UNIT',
        default=DEFAULT_UNIT,
        action='append',
    )

    parser.add_argument(
        '--user-unit',
        help='journalctl: Show messages for the specified user session unit. '
             'This parameter can be specified multiple times. '
             'Default: %(default)s',
        dest='USER_UNIT',
        default=DEFAULT_USER_UNIT,
        action='append',
    )

    return parser.parse_args()


def main():
    """The main function. Hier spielt die Musik.
    """

    # parse the command line, exit with UNKNOWN if it fails
    try:
        args = parse_args()
    except SystemExit:
        sys.exit(STATE_UNKNOWN)

    # fetch data
    if args.TEST is None:
        cmd = 'journalctl '
        # cmd += '--boot '  # logs for the current boot will be shown
        cmd += '--reverse '
        cmd += '--quiet '
        cmd += '--output=json '
        cmd += '--priority={} '.format(args.PRIORITY)
        cmd += '--since={} '.format(args.SINCE)
        if args.FACILITY:
            cmd += '--facility={} '.format(args.FACILITY)
        if args.IDENTIFIER:
            cmd += '--identifier={} '.format(args.IDENTIFIER)
        # unfortunately only for newer journalctl commands:
        #cmd += '--output-fields=UNIT,_SYSTEMD_UNIT,_SYSTEMD_SLICE,PRIORITY,MESSAGE '
        if args.UNIT is None and args.USER_UNIT is None:
            # Pre-define a standard set on basic system services we want to warn about,
            # found on fresh rhel 7+, ubuntu 16+ and debian 9+ systems altogether, if no unit
            # is provided. And yes, if called without any --unit parameter(s), we therefore ignore
            # errors on any specific application services like httpd etc. To check for application
            # services, call this check separately using --unit=httpd, for example.
            # Attention: '*' is the only wildcard that works.
            units = [
                '--unit="accounts-daemon.service"',
                '--unit="acpid.service"',
                '--unit="apparmor.service"',
                '--unit="apport.service"',
                '--unit="auditd.service"',
                '--unit="cron.service"',
                '--unit="crond.service"',
                '--unit="dbus.service"',
                '--unit="dracut-*.service"',
                '--unit="haveged.service"',
                '--unit="ifplugd.service"',
                '--unit="ifup@*.service"',
                '--unit="init.scope"',
                '--unit="irqbalance.service"',
                '--unit="iscsid.service"',
                '--unit="lvm2-*.service"',
                '--unit="lxcfs.service"',
                '--unit="mdadm.service"',
                '--unit="network.service"',
                '--unit="NetworkManager*.service"',
                '--unit="open-iscsi.service"',
                '--unit="polkit.service"',
                '--unit="polkitd.service"',
                '--unit="qemu-guest-agent.service"',
                '--unit="rsyslog.service"',
                '--unit="session-*.scope"',
                '--unit="snapd*.service"',
                '--unit="ssh.service"',
                '--unit="sshd*.service"',
                '--unit="sssd.service"',
                '--unit="sysstat.service"',
                '--unit="systemd-*.service"',
                '--unit="user@*.service"',
            ]
            cmd += ' '.join(units)
        if args.UNIT is not None:
            for unit in args.UNIT:
                cmd += '--unit="{}" '.format(unit)
        if args.USER_UNIT is not None:
            for unit in args.USER_UNIT:
                cmd += '--user-unit="{}" '.format(unit)
        cmd = cmd.strip()
        stdout, stderr, retc = lib.base.coe(lib.shell.shell_exec(cmd)) # pylint: disable=W0612
        if stderr:
            lib.base.cu(stderr)
    else:
        # do not call the command, put in test data
        cmd = 'no-real-command-used'
        stdout, stderr, retc = lib.lftest.test(args.TEST)

    # init some vars
    cnt = 0
    table_data = []

    # analyze data
    if stdout:
        # found something, so nothing good
        state = lib.base.str2state(args.SEVERITY)
        result = stdout.splitlines()

        compiled_ignore_regex = [re.compile(item) for item in args.IGNORE_REGEX]
        for item in result:
            try:
                event = json.loads(item)
            except:
                lib.base.cu('Unable to interpret journald event: {}'.format(item))

            if event['MESSAGE'] is None:
                continue
            if any(ignore_pattern in event['MESSAGE'] for ignore_pattern in args.IGNORE_PATTERN) \
            or any(item.search(event['MESSAGE']) for item in compiled_ignore_regex):
                continue
            # shorten message if necessary
            if len(event['MESSAGE']) > 80:
                event['MESSAGE'] = event['MESSAGE'][0:77] + '...'

            try:
                event['unit'] = event['UNIT'].replace('.service', '')
            except:
                try:
                    event['unit'] = event['_SYSTEMD_UNIT'].replace('.service', '')
                except:
                    event['unit'] = event['_SYSTEMD_SLICE'].replace('.service', '')
            event['priority'] = JOURNALD_PRIOS[int(event['PRIORITY'])]
            event['timestamp'] = lib.time.epoch2iso(
                int(event['__REALTIME_TIMESTAMP']) / 1000000
            )

            table_data.append(event)

        cnt = len(table_data)
        if cnt > 10:
            # shorten the message
            table_data = table_data[0:5] + table_data[-5:]
            shortened = True
        else:
            shortened = False

    # build the message
    if table_data:
        msg = '{} {}. Latest event at {} from {}, level {}: `{}`{}'.format(
            cnt,
            lib.txt.pluralize('event', cnt),
            table_data[0]['timestamp'],
            table_data[0]['unit'],
            table_data[0]['priority'],
            table_data[0]['MESSAGE'],
            lib.base.state2str(lib.base.str2state(args.SEVERITY), prefix=' '),
        )
        if shortened:
            msg += '\nAttention: Table below is truncated, showing the 5 newest and ' \
                   'the 5 oldest messages.'
        msg += '\n\n' + lib.base.get_table(
            table_data,
            [
              'timestamp',
              'unit',
              'priority',
              'MESSAGE',
            ],
            header=[
              'Timestamp',
              'Unit',
              'Prio',
              'Message',
            ],
        )
        if args.UNIT is None:
            msg += '\nUse `journalctl --reverse --priority={} --since={}` as a starting point ' \
                   'for debugging. Be aware of the fact that you may see even more messages ' \
                   'then, as we use a lot of unit filters to get only messages from basic ' \
                   'system services.'.format(
                args.PRIORITY,
                args.SINCE,
            )
    else:
        msg = 'Everything is ok.'
        state = STATE_OK

    msg += '\nThe full command used was:\n`{}`'.format(
        cmd.replace(' --quiet', '').replace(' --output=json', '').replace('\\', ''),
    )
    perfdata = lib.base.get_perfdata('journald-query', cnt, None, None, None, 0, None)

    # over and out
    lib.base.oao(msg, state, perfdata, always_ok=args.ALWAYS_OK)


if __name__ == '__main__':
    try:
        main()
    except Exception:   # pylint: disable=W0703
        lib.base.cu()
