#!/usr/bin/env python3
# -*- coding: utf-8; py-indent-offset: 4 -*-
#
# Author:  Linuxfabrik GmbH, Zurich, Switzerland
# Contact: info (at) linuxfabrik (dot) ch
#          https://www.linuxfabrik.ch/
# License: The Unlicense, see LICENSE file.

# https://github.com/Linuxfabrik/monitoring-plugins/blob/main/CONTRIBUTING.md

"""See the check's README for more details.
"""

import argparse  # pylint: disable=C0413
import sys  # pylint: disable=C0413
import time  # pylint: disable=C0413

import lib.args  # pylint: disable=C0413
import lib.base  # pylint: disable=C0413
import lib.shell  # pylint: disable=C0413
import lib.lftest  # pylint: disable=C0413
import lib.txt # pylint: disable=C0413
import lib.time  # pylint: disable=C0413
import lib.url  # pylint: disable=C0413
from lib.globals import (STATE_CRIT, STATE_OK,  # pylint: disable=C0413
                          STATE_UNKNOWN, STATE_WARN)


__author__ = 'Linuxfabrik GmbH, Zurich/Switzerland'
__version__ = '2025071601'

DESCRIPTION = """Analyses URLs to detect malware and other breaches using VirusTotal."""

DEFAULT_INSECURE = False
DEFAULT_NO_PROXY = False
DEFAULT_SEVERITY = 'warn'  # due to the many false positives
DEFAULT_TIMEOUT  = 8


def parse_args():
    """Parse command line arguments using argparse.
    """
    parser = argparse.ArgumentParser(description=DESCRIPTION)

    parser.add_argument(
        '-V', '--version',
        action='version',
        version=f'%(prog)s: v{__version__} by {__author__}'
    )

    parser.add_argument(
        '--always-ok',
        help='Always returns OK.',
        dest='ALWAYS_OK',
        action='store_true',
        default=False,
    )

    parser.add_argument(
        '--insecure',
        help='This option explicitly allows to perform "insecure" SSL connections. '
             'Default: %(default)s',
        dest='INSECURE',
        action='store_true',
        default=DEFAULT_INSECURE,
    )

    parser.add_argument(
        '--no-proxy',
        help='Do not use a proxy. '
             'Default: %(default)s',
        dest='NO_PROXY',
        action='store_true',
        default=DEFAULT_NO_PROXY,
    )

    parser.add_argument(
        '--severity',
        help='Severity for alerting. '
             'Default: %(default)s',
        dest='SEVERITY',
        default=DEFAULT_SEVERITY,
        choices=['warn', 'crit'],
    )

    parser.add_argument(
        '--test',
        help='For unit tests. Needs "path-to-stdout-file,path-to-stderr-file,expected-retc".',
        dest='TEST',
        type=lib.args.csv,
    )

    parser.add_argument(
        '--timeout',
        help='Network timeout in seconds. '
             'Default: %(default)s (seconds)',
        dest='TIMEOUT',
        type=int,
        default=DEFAULT_TIMEOUT,
    )

    parser.add_argument(
        '--token',
        help='VirusTotal API token',
        dest='TOKEN',
        required=True,
    )

    parser.add_argument(
        '--url',
        help='URL to scan. ',
        dest='URL',
        required=True,
    )


    return parser.parse_args()


def vt_urls(token, url, insecure=False, no_proxy=False, timeout=8):
    """https://docs.virustotal.com/reference/scan-url
    Submit a URL for scanning to VirusTotal and return its analysis identifier.

    This function posts the given URL to the VirusTotal v3 `/urls` endpoint, which triggers
    a new URL analysis. On success, the returned JSON contains a `data.id` value that serves
    as the Analysis ID; you can later retrieve the full report via the Analysis endpoint
    (`GET /api/v3/analyses/{analysis_id}`).

    ### Parameters
    - **token** (`str`): Your VirusTotal API key.
    - **url** (`str`): The URL to analyze (e.g. `"http://example.com"`).
    - **insecure** (`bool`, optional): If `True`, disables SSL certificate verification.  
      Defaults to `False`.
    - **no_proxy** (`bool`, optional): If `True`, bypasses any configured HTTP proxy.  
      Defaults to `False`.
    - **timeout** (`int` or `float`, optional): Maximum time, in seconds, to wait for a response.  
      Defaults to `8`.

    ### Returns
    - **tuple**:
      1. **`bool`** – `True` if the request succeeded and an analysis ID was returned;  
         `False` otherwise.
      2. **`dict`** or **`str`** – On success, the parsed JSON response from VT
         (contains `data.id`); on failure, an error message string.

    ### Notes
    - A typical successful response looks like:
      ```json
      {
        "data": {
          "type": "analysis",
          "id": "u-c0...6405",
          "links": {
            "self": "https://www.virustotal.com/api/v3/analyses/u-c0...6405"
          }
        }
      }
      ```
    - This helper uses `lib.url.fetch_json()` under the hood.

    ### Example
    ```python
    >>> success, result = vt_urls(API_KEY, 'http://example.com')
    >>> if not success:
    ...     print('Error submitting URL:', result)
    ... else:
    ...     analysis_id = result['data']['id']
    ...     print('VirusTotal analysis submitted, ID =', analysis_id)
    ```
    """
    header = {
        'accept': 'application/json',
        'content-type': 'application/x-www-form-urlencoded',
        'x-apikey': token,
    }
    data = {'url': url}

    success, result = lib.url.fetch_json(
        'https://www.virustotal.com/api/v3/urls',
        header=header,
        data=data,
        insecure=insecure,
        no_proxy=no_proxy,
        timeout=timeout,
    )

    if not success or not result:
        return False, f'Error fetching {url}: {result}'
    return True, result


def vt_analyses(token, url, insecure=False, no_proxy=False, timeout=8):
    """https://docs.virustotal.com/reference/analysis
    Retrieve a URL or file analysis report from VirusTotal.

    This function sends a GET request to a VirusTotal Analysis endpoint (e.g.
    `https://www.virustotal.com/api/v3/analyses/ANALYSIS_ID`) and returns the
    full JSON payload containing the scan results and metadata.

    ### Parameters
    - **token** (`str`): Your VirusTotal API key.
    - **url** (`str`): The full VirusTotal Analysis endpoint URL to query.  
      Example: `"https://www.virustotal.com/api/v3/analyses/ANALYSIS_ID"`.
    - **insecure** (`bool`, optional): If `True`, disables SSL certificate verification.  
      Defaults to `False`.
    - **no_proxy** (`bool`, optional): If `True`, bypasses any configured HTTP proxy.  
      Defaults to `False`.
    - **timeout** (`int` or `float`, optional): Maximum time, in seconds, to wait for a response.  
      Defaults to `8`.

    ### Returns
    - **tuple**:
      1. **`bool`** – `True` if the request succeeded and valid JSON was returned;  
         `False` otherwise.
      2. **`dict`** or **`str`** – On success, the parsed JSON object with analysis data;  
         on failure, an error message string.

    ### Notes
    - A typical response includes fields such as `data.attributes.stats` (detection counts),
      `data.attributes.results` (per-engine verdicts), and timestamps.
    - To initiate a new URL analysis, use `vt_urls()` to obtain an analysis ID first.
    - This helper relies on `lib.url.fetch_json()` for HTTP interaction.

    ### Example
    ```python
    >>> # First submit the URL and get an analysis ID:
    >>> success, submit_result = vt_urls(API_KEY, 'http://example.com')
    >>> endpoint = submit_result['data']['links']['self'] if success else None
    >>> 
    >>> # Then retrieve the full report:
    >>> success, report = vt_analyses(API_KEY, endpoint)
    >>> print('Detections:', report['data']['attributes']['stats'])
    ```
    """
    header = {
        'accept': 'application/json',
        'content-type': 'application/x-www-form-urlencoded',
        'x-apikey': token,
    }

    success, result = lib.url.fetch_json(
        url,
        header=header,
        insecure=insecure,
        no_proxy=no_proxy,
        timeout=timeout,
    )

    if not success or not result:
        return False, f'Error fetching {url}: {result}'
    return True, result


def main():
    """The main function. Hier spielt die Musik.
    """

    # parse the command line, exit with UNKNOWN if it fails
    try:
        args = parse_args()
    except SystemExit:
        sys.exit(STATE_UNKNOWN)

    # fetch data
    if args.TEST is None:
        success, result = vt_urls(
            args.TOKEN,
            args.URL,
            insecure=args.INSECURE,
            no_proxy=args.NO_PROXY,
            timeout=args.TIMEOUT,
        )
        if not success:
            lib.base.cu(result)
        time.sleep(60)
        try:
            success, result = vt_analyses(
                args.TOKEN,
                result['data']['links']['self'],
                insecure=args.INSECURE,
                no_proxy=args.NO_PROXY,
                timeout=args.TIMEOUT,
            )
            if not success:
                lib.base.cu(result)
        except KeyError:
            lib.base.cu(f'Unexpected result from VirusTotal: {result}')
    else:
        # do not call the command, put in test data
        import json
        stdout, _, _ = lib.lftest.test(args.TEST)
        result = json.loads(stdout)

    if result['data']['attributes']['status'] == 'queued':
        lib.base.cu(f'{args.URL} is waiting to be analysed, ' \
                     'the analysis object has empty results and stats.')
    if result['data']['attributes']['status'] == 'in-progress':
        lib.base.cu(f'{args.URL} is being analysed, ' \
                     'the analysis object has partial analysis results and stats.')

    # init some vars
    msg = ''
    state = STATE_OK
    perfdata = ''
    table_data = []

    # analyze data
    # https://docs.virustotal.com/reference/analyses-object
    harmless = result['data']['attributes']['stats']['harmless']
    malicious = result['data']['attributes']['stats']['malicious']
    suspicious = result['data']['attributes']['stats']['suspicious']
    timeout = result['data']['attributes']['stats']['timeout']
    undetected = result['data']['attributes']['stats']['undetected']
    vendors = harmless + malicious + suspicious + timeout + undetected

    # list details about the scan engines if there is something interesting
    for item in result['data']['attributes']['results'].values():
        if item['category'] in ('harmless', 'undetected'):
            continue
        if item['category'] == 'malicious':
            state = STATE_CRIT if args.SEVERITY == 'crit' else STATE_WARN
            item['category'] += lib.base.state2str(state, prefix=' ')
        table_data.append(item)

    # build the message
    if malicious:
        msg += f'{malicious}/{vendors} security {lib.txt.pluralize("vendor", vendors)} ' \
               f'flagged {args.URL} as malicious.'
    else:
        msg += f'Everything is ok. Checked {args.URL} against {vendors} ' \
               f'{lib.txt.pluralize("vendor", vendors)}.'
    if table_data:
        msg += '\n\n' + lib.base.get_table(
            table_data,
            ['engine_name', 'result', 'method', 'category', ],
            header=['Engine', 'Result', 'Method', 'Category'],
            sort_by_key='engine_name',
        )
    perfdata += lib.base.get_perfdata(
        'harmless',
        harmless,
        uom=None,
        warn=None,
        crit=None,
        _min=0,
        _max=None,
    )
    perfdata += lib.base.get_perfdata(
        'malicious',
        malicious,
        uom=None,
        warn=None,
        crit=None,
        _min=0,
        _max=None,
    )
    perfdata += lib.base.get_perfdata(
        'suspicious',
        suspicious,
        uom=None,
        warn=None,
        crit=None,
        _min=0,
        _max=None,
    )
    perfdata += lib.base.get_perfdata(
        'timeout',
        timeout,
        uom=None,
        warn=None,
        crit=None,
        _min=0,
        _max=None,
    )
    perfdata += lib.base.get_perfdata(
        'undetected',
        undetected,
        uom=None,
        warn=None,
        crit=None,
        _min=0,
        _max=None,
    )
    perfdata += lib.base.get_perfdata(
        'vendors',
        vendors,
        uom=None,
        warn=None,
        crit=None,
        _min=0,
        _max=None,
    )

    # over and out
    lib.base.oao(msg, state, perfdata, always_ok=args.ALWAYS_OK)


if __name__ == '__main__':
    try:
        main()
    except Exception:   # pylint: disable=W0703
        lib.base.cu()
