#!/usr/bin/env python3
# -*- coding: utf-8; py-indent-offset: 4 -*-
#
# Author:  Linuxfabrik GmbH, Zurich, Switzerland
# Contact: info (at) linuxfabrik (dot) ch
#          https://www.linuxfabrik.ch/
# License: The Unlicense, see LICENSE file.

# https://github.com/Linuxfabrik/monitoring-plugins/blob/main/CONTRIBUTING.md

"""See the check's README for more details."""

import argparse
import os
import os.path
import sys

import lib.args
import lib.base
import lib.disk
import lib.txt
from lib.globals import STATE_OK, STATE_UNKNOWN, STATE_WARN

try:
    import yaml
except ImportError:
    print('Python module "yaml" is not installed.')
    sys.exit(STATE_UNKNOWN)


__author__ = 'Linuxfabrik GmbH, Zurich/Switzerland'
__version__ = '2026050402'

DESCRIPTION = """Scans the system for approximately 170 known rootkits by checking for their
characteristic files, directories, and kernel symbols. Each finding includes the year
the rootkit was first publicly disclosed when known. New rootkit definitions can be
added by dropping YAML files into the assets folder.
Alerts when rootkit indicators are found."""

DEFAULT_SERVERITY = 'crit'


def parse_args():
    """Parse command line arguments using argparse."""
    parser = argparse.ArgumentParser(description=DESCRIPTION)

    parser.add_argument(
        '-V',
        '--version',
        action='version',
        version=f'%(prog)s: v{__version__} by {__author__}',
    )

    parser.add_argument(
        '--severity',
        help='Severity for alerts when rootkit indicators are found. '
        'One of "warn" or "crit". '
        'Default: %(default)s',
        dest='SEVERITY',
        default=DEFAULT_SERVERITY,
        choices=['warn', 'crit'],
    )

    args, _ = parser.parse_known_args()
    return args


def load_kernel_symbols():
    """Read `/proc/kallsyms` (or legacy `/proc/ksyms`) and return the set of
    exact kernel symbol names. Using a set gives us whole-symbol matching
    instead of the substring matching a raw `str in file` would do, which
    previously triggered on partial matches like `is_invisible_helper`
    shadowing the `is_invisible` rootkit signature.
    """
    content = ''
    if lib.disk.file_exists('/proc/kallsyms', allow_empty=True):
        content = lib.base.coe(lib.disk.read_file('/proc/kallsyms'))
    elif lib.disk.file_exists('/proc/ksyms', allow_empty=True):
        content = lib.base.coe(lib.disk.read_file('/proc/ksyms'))
    ksyms = set()
    for line in content.splitlines():
        parts = line.split()
        if len(parts) >= 3:
            ksyms.add(parts[2])
    return ksyms


def main():
    """The main function. This is where the magic happens."""

    # parse the command line
    try:
        args = parse_args()
    except SystemExit:
        sys.exit(STATE_UNKNOWN)

    # init some vars
    msg = ''
    errors = []  # lists any yaml parsing errors
    state = STATE_OK
    perfdata = ''
    rkdef_path = os.path.join(os.path.dirname(__file__), 'assets')
    rkscanned = 0  # number of rootkit signatures loaded (used as perfdata _max)
    rkfound = []  # detail lines per indicator hit (one line per file/dir/ksym match)
    rkpossible = []  # detail lines per possible-only indicator hit (cl < 100)
    rkfound_count = 0  # number of distinct rootkits with at least one confirmed hit
    rkpossible_count = 0  # number of distinct rootkits with at least one possible hit

    # get a list of rootkit definition filenames
    rootkits = lib.disk.walk_directory(
        rkdef_path,
        exclude_pattern=r'',
        include_pattern=r'.*\.yml$',
        relative=False,
    )

    # get the set of exact kernel symbol names
    ksyms = load_kernel_symbols()

    # analyze system
    for rootkit in rootkits:
        try:
            # load the rootkit definition file
            rk = yaml.safe_load(lib.base.coe(lib.disk.read_file(rootkit)))
            rkscanned += 1

            # build a name prefix that includes the disclosure date (year, year-month
            # or year-month-day) when the signature provides it, so admins immediately
            # see whether the indicator is from the 2003 rkhunter era or from a recent
            # threat report
            label = (
                f'{rk["name"]} ({rk["discovered"]})'
                if rk.get('discovered')
                else rk['name']
            )
            confirmed = not ('cl' in rk and rk['cl'] < 100)
            target = rkfound if confirmed else rkpossible
            hit = False

            # see if any of the known files exists
            for item in rk['files']:
                if lib.disk.file_exists(item, allow_empty=True):
                    target.append(f'* {label}: {item} (File)')
                    hit = True

            # see if any of the directories exist. We use os.path.isdir directly
            # because lib.disk.file_exists is os.path.isfile under the hood and
            # would silently return False for every directory indicator.
            for item in rk['dirs']:
                if os.path.isdir(item):
                    target.append(f'* {label}: {item} (Dir)')
                    hit = True

            # scan kernel symbols for signs of rootkits or other malicious software
            if ksyms:
                for item in rk['ksyms']:
                    if item in ksyms:
                        target.append(f'* {label}: {item} (Kernel Symbol)')
                        hit = True

            # count this rootkit once if any of its indicators triggered, so the
            # perfdata value reflects distinct rootkits found and not the total
            # number of indicator hits (one rootkit can trigger many indicators)
            if hit:
                if confirmed:
                    rkfound_count += 1
                else:
                    rkpossible_count += 1

        except KeyError as e:
            # missing a yaml attribute like 'files' or 'dirs'
            errors.append(f'* {os.path.basename(rootkit)}: Key Error {e}')
        except yaml.YAMLError as e:
            # got a yaml file that is broken in any way
            errors.append(f'* {os.path.basename(rootkit)}: YAML error: {e}')

    # build the message
    if rkscanned == 0:
        lib.base.cu(f'No rootkit definition files found in `{rkdef_path}`.')
    if not rkfound and not rkpossible:
        msg += (
            f'Everything is ok. Scanned for {rkscanned}'
            f' {lib.txt.pluralize("rootkit", rkscanned)}.'
        )
    else:
        if rkpossible:
            state = lib.base.get_worst(state, STATE_WARN)
        if rkfound:
            state = lib.base.get_worst(state, lib.base.str2state(args.SEVERITY))
        msg += (
            f'Found {rkfound_count}'
            f' {lib.txt.pluralize("rootkit", rkfound_count)}.'
            f' {rkpossible_count} possible'
            f' {lib.txt.pluralize("rootkit", rkpossible_count)}'
            f' found. {lib.base.state2str(state)}'
        )
        msg += '\n\nRootkits:\n' + '\n'.join(rkfound) if rkfound else ''
        msg += '\n\nPossible Rootkits:\n' + '\n'.join(rkpossible) if rkpossible else ''
    msg += '\n\nScanfile Errors:\n' + '\n'.join(errors) if errors else ''

    # rootkit_items / rootkit_possible carry the count of distinct rootkits with
    # at least one indicator match. _max is the total number of signatures loaded,
    # so admins immediately see the upper bound and can graph signature-database
    # growth via the perfdata max field.
    perfdata += lib.base.get_perfdata(
        'rootkit_items',
        rkfound_count,
        _min=0,
        _max=rkscanned,
    )
    perfdata += lib.base.get_perfdata(
        'rootkit_possible',
        rkpossible_count,
        _min=0,
        _max=rkscanned,
    )

    # over and out
    lib.base.oao(msg, state, perfdata)


if __name__ == '__main__':
    try:
        main()
    except Exception:
        lib.base.cu()
