#!/usr/bin/env python3

# Syncs all the relevant docs/metadata from the agent to the integrations repo.

import copy
import json
import yaml

from collections import OrderedDict
from jinja2 import Environment, FileSystemLoader
from nltk.tokenize import sent_tokenize

try:
    import nltk
except ImportError:
    print("You must install the nltk library with 'pip3 install nltk==3.4.3'", file=sys.stderr)
    sys.exit(1)

nltk.download("punkt")

from integration_doc_helper import AGENT_ROOT, INTEGRATIONS_DOC_TEMPLATE_DIR, INTEGRATIONS_REPO
from integration_doc_helper import fixup_relative_monitor_paths, sync_agent_info

# Key is of the form <monitor/package/path:monitorType>, value is the
# integration directory in the integrations repo where metrics should go (not
# including the `docs/` path suffix).
MONITORS_TO_INTEGRATIONS_DIR = {
    ("internal/monitors/collectd/activemq", "collectd/activemq"): "activemq",
    ("internal/monitors/collectd/apache", "collectd/apache"): "apache",
    ("internal/monitors/elasticsearch", "elasticsearch"): "elasticsearch",
    ("internal/monitors/collectd/cpu", "collectd/cpu"): "collectd-cpu",
    ("internal/monitors/collectd/cassandra", "collectd/cassandra"): "cassandra",
    ("internal/monitors/collectd/consul", "collectd/consul"): "consul",
    ("internal/monitors/collectd/couchbase", "collectd/couchbase"): "couchbase",
    ("internal/monitors/collectd/etcd", "collectd/etcd"): "etcd",
    ("internal/monitors/docker", "docker-container-stats"): "docker",
    ("internal/monitors/gitlab", "gitlab"): "gitlab",
    ("internal/monitors/collectd/hadoop", "collectd/hadoop"): "hadoop",
    ("internal/monitors/collectd/haproxy", "collectd/haproxy"): "haproxy",
    ("internal/monitors/collectd/genericjmx", "collectd/genericjmx"): "java",
    ("internal/monitors/collectd/jenkins", "collectd/jenkins"): "jenkins",
    ("internal/monitors/collectd/kafka", "collectd/kafka"): "kafka",
    ("internal/monitors/collectd/kong", "collectd/kong"): "kong",
    ("internal/monitors/collectd/marathon", "collectd/marathon"): "marathon",
    ("internal/monitors/collectd/memcached", "collectd/memcached"): "memcached",
    ("internal/monitors/collectd/mongodb", "collectd/mongodb"): "mongodb",
    ("internal/monitors/collectd/mysql", "collectd/mysql"): "mysql",
    ("internal/monitors/collectd/nginx", "collectd/nginx"): "nginx",
    ("internal/monitors/collectd/openstack", "collectd/openstack"): "openstack",
    ("internal/monitors/postgresql", "postgresql"): "postgresql",
    ("internal/monitors/collectd/rabbitmq", "collectd/rabbitmq"): "rabbitmq",
    ("internal/monitors/collectd/redis", "collectd/redis"): "redis",
    ("internal/monitors/collectd/solr", "collectd/solr"): "solr",
    ("internal/monitors/statsd", "statsd"): "statsd",
    ("internal/monitors/collectd/spark", "collectd/spark"): "spark",
    ("internal/monitors/collectd/zookeeper", "collectd/zookeeper"): "zookeeper",
    ("internal/monitors/logstash/logstash", "logstash"): "logstash",
    ("internal/monitors/logstash/tcp", "logstash-tcp"): "logstash",
    ("internal/monitors/internalmetrics", "internal-metrics"): "signalfx-agent",
}


def load_monitor_docs_from_self_describe_json():
    with open(AGENT_ROOT / "selfdescribe.json", "r", encoding="utf-8") as self_describe_json:
        self_describe = json.load(self_describe_json)

    assert "Monitors" in self_describe
    return self_describe["Monitors"]


def surround_by_tick(a_list):
    return ["`%s`" % an_element for an_element in a_list]


def has_atleast_one_monitor_with_section(section):
    """
    Returns true if there's at least one monitor with
    the section in the doc
    """
    for doc in section.values():
        if doc:
            return True
    return False


def send_all_metrics_from_all_monitors(send_metrics_field):
    """
    Returns true if all monitors of an integration sends all
    metrics by default
    """
    for v in send_metrics_field.values():
        if not v:
            return False
    return True


def append_metric_name(metric_name, metric_info):
    out = copy.deepcopy(metric_info)

    if not out["group"]:
        out["group"] = ""
    out.update({"name": metric_name})
    return out


def process_metrics_from_self_describe(monitor_type, metrics):
    """
    Takes in a list of metrics from selfdescribe json and processes
    it for easy consumption by the Jinja2 template
    """
    if not metrics:
        return {monitor_type: None}
    return {monitor_type: [append_metric_name(m, n) for m, n in metrics.items()]}


def process_config_field(field):
    if field.get("type").startswith("int") or field.get("type").startswith("uint"):
        field["type"] = "integer"
    elif field.get("type") == "slice":
        field["type"] = "list"
    elif field.get("type") == "struct":
        field["type"] = "object"

    if field.get("elementKind").startswith("int") or field.get("elementKind").startswith("uint"):
        field["type"] = "integer"
    elif field.get("elementKind") == "struct":
        field["elementKind"] = "object"

    # To stringify boolean defaults
    if isinstance(field["default"], bool):
        field["default"] = str(field["default"]).lower()

    # To stringify int defaults
    if isinstance(field["default"], int):
        field["default"] = str(field["default"])

    if isinstance(field["default"], list):
        field["default"] = str(field["default"]).replace("'", "")

    processed_field = copy.deepcopy(field)
    return processed_field


def process_config_fields_helper(config, yamlname="Config"):
    """
    Processes config fields for a monitor. Specifically,
    this method enumerates nested config fields to simplify
    config table construction in the template
    """

    # key represents a table (nested field) in the config and
    # the value consists of relevant config fields that constitute
    # the table. Non-nested config fields are grouped  into
    # "Config", following the convention in selfdescribe.json
    processed_config = OrderedDict({
        yamlname: [],
    })

    for field in config.get("fields", []):
        # Add the current field to the table defined by yamlname
        processed_config[yamlname].append(process_config_field(field))

        # Recurse into nested config
        if field.get("elementKind", None) and field.get("elementStruct", None) is not None:
            processed_config.update(process_config_fields_helper(field.get("elementStruct"), field.get("yamlName")))

    # Return empty dictionary if there are no config options
    if len(processed_config.get(yamlname)) == 0:
        processed_config.clear()

    return processed_config


def process_config_fields(config):
    return process_config_fields_helper(config)


def monitor_docs_per_integrations_repo(monitor_docs):
    """s
     Returns a dict which is a processed version of
     selfdescribe for easy consumption by Jinja2 template
    """

    # {% set required = "no" %}
    # # {% if field.required %}
    # # {% set required = "**yes**" %}
    # # {% endif %}
    # {% set default = "" %}
    # {% if field.default %}
    # {% set default = "(**default:**) `{}`.format(field.default) %}
    # {% endif %}

    out = {}

    for monitor_doc in monitor_docs:
        integrations_dir = MONITORS_TO_INTEGRATIONS_DIR.get(
            (monitor_doc["config"]["package"], monitor_doc["monitorType"]), None)

        if not integrations_dir:
            continue

        if integrations_dir not in out:
            out[integrations_dir] = {}
            out[integrations_dir]["monitor_types"] = []
            out[integrations_dir]["send_all_metrics"] = OrderedDict()
            out[integrations_dir]["description"] = OrderedDict()
            out[integrations_dir]["configuration"] = OrderedDict()
            out[integrations_dir]["metrics"] = OrderedDict()
            out[integrations_dir]["dimensions"] = OrderedDict()
            out[integrations_dir]["properties"] = OrderedDict()

        monitor_type = monitor_doc["monitorType"]
        out[integrations_dir]["monitor_types"].append(monitor_type)
        out[integrations_dir]["send_all_metrics"].update({monitor_type: monitor_doc["sendAll"]})
        out[integrations_dir]["description"].update({monitor_type: monitor_doc["doc"]})
        out[integrations_dir]["configuration"].update({monitor_type: process_config_fields(monitor_doc["config"])})
        out[integrations_dir]["metrics"].update(
            process_metrics_from_self_describe(monitor_type, monitor_doc["metrics"]))
        out[integrations_dir]["dimensions"].update({monitor_type: monitor_doc["dimensions"]})
        out[integrations_dir]["properties"].update({monitor_type: monitor_doc["properties"]})

    return out


def get_jinja_env():
    j2_env = Environment(loader=FileSystemLoader(str(INTEGRATIONS_DOC_TEMPLATE_DIR)), trim_blocks=True)

    # Add custom filters
    j2_env.filters["surround_by_tick"] = surround_by_tick
    j2_env.filters["has_atleast_one_monitor_with_section"] = has_atleast_one_monitor_with_section
    j2_env.filters["send_all_metrics_from_all_monitors"] = send_all_metrics_from_all_monitors

    return j2_env


def get_template():
    j2_env = get_jinja_env()
    template = j2_env.get_template("integrations_doc.md.jinja2")
    return template


def sync_docs(integration_dirs):
    integrations_dirs_to_skip = set(["signalfx-agent"])
    template = get_template()

    for integrations_dir, monitors_info in integration_dirs.items():
        if integrations_dir in integrations_dirs_to_skip:
            continue

        out = template.render(integration=monitors_info)

        print(f"Syncing docs to {integrations_dir} directory")
        target_path = INTEGRATIONS_REPO / integrations_dir / "SMART_AGENT_MONITOR.md"
        target_path.write_text(fixup_relative_monitor_paths(out), encoding="utf-8", )


def generate_metric_yaml(monitor, metrics, send_all):
    processed_metrics = {}
    for metric in metrics:
        assert metric["name"]

        name = metric["name"]
        custom = send_all or not metric.get("default", False)
        desc_sentences = sent_tokenize(metric.get("description", ""))
        processed_metrics[name] = {
            "title": name,
            "brief": desc_sentences[0].rstrip(".").strip() if desc_sentences else "",
            "description": metric.get("description", "").strip(),
            "custom": custom,
            "monitor": monitor,
            "metric_type": metric["type"],
        }

    out = "\n"
    # Add a blank line in between each metric descriptor.
    for name, met in sorted(processed_metrics.items(), key=lambda t: t[0]):
        out += yaml.dump({name: met})
        out += "\n"
    return out


def sync_metrics(integration_dirs):
    for integrations_dir, monitors_info in integration_dirs.items():
        metric_yaml = ""
        for monitor, monitor_metrics in monitors_info.get("metrics", {}).items():
            if not monitor_metrics:
                continue

            is_send_all_metrics = monitors_info.get("send_all_metrics", {}).get(monitor, False)
            metric_yaml += generate_metric_yaml(monitor, monitor_metrics, is_send_all_metrics)

        if metric_yaml == "":
            continue

        print(f"Syncing metrics to {integrations_dir} directory")

        metric_yaml = "# This file was generated in the Smart Agent repo and copied here, DO NOT EDIT HERE.\n" + metric_yaml
        out_path = INTEGRATIONS_REPO / integrations_dir / "metrics.yaml"
        out_path.write_text(metric_yaml, encoding="utf-8")


def run():
    monitor_docs = load_monitor_docs_from_self_describe_json()
    integration_dirs = monitor_docs_per_integrations_repo(monitor_docs)

    sync_docs(integration_dirs)
    sync_metrics(integration_dirs)

    sync_agent_info()


if __name__ == "__main__":
    run()