#!/usr/bin/env python3 # Syncs all the relevant docs/metadata from the agent to the integrations repo. import copy import json import yaml from collections import OrderedDict from jinja2 import Environment, FileSystemLoader from nltk.tokenize import sent_tokenize try: import nltk except ImportError: print("You must install the nltk library with 'pip3 install nltk==3.4.3'", file=sys.stderr) sys.exit(1) nltk.download("punkt") from integration_doc_helper import AGENT_ROOT, INTEGRATIONS_DOC_TEMPLATE_DIR, INTEGRATIONS_REPO from integration_doc_helper import fixup_relative_monitor_paths, sync_agent_info # Key is of the form , value is the # integration directory in the integrations repo where metrics should go (not # including the `docs/` path suffix). MONITORS_TO_INTEGRATIONS_DIR = { ("internal/monitors/collectd/activemq", "collectd/activemq"): "activemq", ("internal/monitors/collectd/apache", "collectd/apache"): "apache", ("internal/monitors/elasticsearch", "elasticsearch"): "elasticsearch", ("internal/monitors/collectd/cpu", "collectd/cpu"): "collectd-cpu", ("internal/monitors/collectd/cassandra", "collectd/cassandra"): "cassandra", ("internal/monitors/collectd/consul", "collectd/consul"): "consul", ("internal/monitors/collectd/couchbase", "collectd/couchbase"): "couchbase", ("internal/monitors/collectd/etcd", "collectd/etcd"): "etcd", ("internal/monitors/docker", "docker-container-stats"): "docker", ("internal/monitors/gitlab", "gitlab"): "gitlab", ("internal/monitors/collectd/hadoop", "collectd/hadoop"): "hadoop", ("internal/monitors/collectd/haproxy", "collectd/haproxy"): "haproxy", ("internal/monitors/collectd/genericjmx", "collectd/genericjmx"): "java", ("internal/monitors/collectd/jenkins", "collectd/jenkins"): "jenkins", ("internal/monitors/collectd/kafka", "collectd/kafka"): "kafka", ("internal/monitors/collectd/kong", "collectd/kong"): "kong", ("internal/monitors/collectd/marathon", "collectd/marathon"): "marathon", ("internal/monitors/collectd/memcached", "collectd/memcached"): "memcached", ("internal/monitors/collectd/mongodb", "collectd/mongodb"): "mongodb", ("internal/monitors/collectd/mysql", "collectd/mysql"): "mysql", ("internal/monitors/collectd/nginx", "collectd/nginx"): "nginx", ("internal/monitors/collectd/openstack", "collectd/openstack"): "openstack", ("internal/monitors/postgresql", "postgresql"): "postgresql", ("internal/monitors/collectd/rabbitmq", "collectd/rabbitmq"): "rabbitmq", ("internal/monitors/collectd/redis", "collectd/redis"): "redis", ("internal/monitors/collectd/solr", "collectd/solr"): "solr", ("internal/monitors/statsd", "statsd"): "statsd", ("internal/monitors/collectd/spark", "collectd/spark"): "spark", ("internal/monitors/collectd/zookeeper", "collectd/zookeeper"): "zookeeper", ("internal/monitors/logstash/logstash", "logstash"): "logstash", ("internal/monitors/logstash/tcp", "logstash-tcp"): "logstash", ("internal/monitors/internalmetrics", "internal-metrics"): "signalfx-agent", } def load_monitor_docs_from_self_describe_json(): with open(AGENT_ROOT / "selfdescribe.json", "r", encoding="utf-8") as self_describe_json: self_describe = json.load(self_describe_json) assert "Monitors" in self_describe return self_describe["Monitors"] def surround_by_tick(a_list): return ["`%s`" % an_element for an_element in a_list] def has_atleast_one_monitor_with_section(section): """ Returns true if there's at least one monitor with the section in the doc """ for doc in section.values(): if doc: return True return False def send_all_metrics_from_all_monitors(send_metrics_field): """ Returns true if all monitors of an integration sends all metrics by default """ for v in send_metrics_field.values(): if not v: return False return True def append_metric_name(metric_name, metric_info): out = copy.deepcopy(metric_info) if not out["group"]: out["group"] = "" out.update({"name": metric_name}) return out def process_metrics_from_self_describe(monitor_type, metrics): """ Takes in a list of metrics from selfdescribe json and processes it for easy consumption by the Jinja2 template """ if not metrics: return {monitor_type: None} return {monitor_type: [append_metric_name(m, n) for m, n in metrics.items()]} def process_config_field(field): if field.get("type").startswith("int") or field.get("type").startswith("uint"): field["type"] = "integer" elif field.get("type") == "slice": field["type"] = "list" elif field.get("type") == "struct": field["type"] = "object" if field.get("elementKind").startswith("int") or field.get("elementKind").startswith("uint"): field["type"] = "integer" elif field.get("elementKind") == "struct": field["elementKind"] = "object" # To stringify boolean defaults if isinstance(field["default"], bool): field["default"] = str(field["default"]).lower() # To stringify int defaults if isinstance(field["default"], int): field["default"] = str(field["default"]) if isinstance(field["default"], list): field["default"] = str(field["default"]).replace("'", "") processed_field = copy.deepcopy(field) return processed_field def process_config_fields_helper(config, yamlname="Config"): """ Processes config fields for a monitor. Specifically, this method enumerates nested config fields to simplify config table construction in the template """ # key represents a table (nested field) in the config and # the value consists of relevant config fields that constitute # the table. Non-nested config fields are grouped into # "Config", following the convention in selfdescribe.json processed_config = OrderedDict({ yamlname: [], }) for field in config.get("fields", []): # Add the current field to the table defined by yamlname processed_config[yamlname].append(process_config_field(field)) # Recurse into nested config if field.get("elementKind", None) and field.get("elementStruct", None) is not None: processed_config.update(process_config_fields_helper(field.get("elementStruct"), field.get("yamlName"))) # Return empty dictionary if there are no config options if len(processed_config.get(yamlname)) == 0: processed_config.clear() return processed_config def process_config_fields(config): return process_config_fields_helper(config) def monitor_docs_per_integrations_repo(monitor_docs): """s Returns a dict which is a processed version of selfdescribe for easy consumption by Jinja2 template """ # {% set required = "no" %} # # {% if field.required %} # # {% set required = "**yes**" %} # # {% endif %} # {% set default = "" %} # {% if field.default %} # {% set default = "(**default:**) `{}`.format(field.default) %} # {% endif %} out = {} for monitor_doc in monitor_docs: integrations_dir = MONITORS_TO_INTEGRATIONS_DIR.get( (monitor_doc["config"]["package"], monitor_doc["monitorType"]), None) if not integrations_dir: continue if integrations_dir not in out: out[integrations_dir] = {} out[integrations_dir]["monitor_types"] = [] out[integrations_dir]["send_all_metrics"] = OrderedDict() out[integrations_dir]["description"] = OrderedDict() out[integrations_dir]["configuration"] = OrderedDict() out[integrations_dir]["metrics"] = OrderedDict() out[integrations_dir]["dimensions"] = OrderedDict() out[integrations_dir]["properties"] = OrderedDict() monitor_type = monitor_doc["monitorType"] out[integrations_dir]["monitor_types"].append(monitor_type) out[integrations_dir]["send_all_metrics"].update({monitor_type: monitor_doc["sendAll"]}) out[integrations_dir]["description"].update({monitor_type: monitor_doc["doc"]}) out[integrations_dir]["configuration"].update({monitor_type: process_config_fields(monitor_doc["config"])}) out[integrations_dir]["metrics"].update( process_metrics_from_self_describe(monitor_type, monitor_doc["metrics"])) out[integrations_dir]["dimensions"].update({monitor_type: monitor_doc["dimensions"]}) out[integrations_dir]["properties"].update({monitor_type: monitor_doc["properties"]}) return out def get_jinja_env(): j2_env = Environment(loader=FileSystemLoader(str(INTEGRATIONS_DOC_TEMPLATE_DIR)), trim_blocks=True) # Add custom filters j2_env.filters["surround_by_tick"] = surround_by_tick j2_env.filters["has_atleast_one_monitor_with_section"] = has_atleast_one_monitor_with_section j2_env.filters["send_all_metrics_from_all_monitors"] = send_all_metrics_from_all_monitors return j2_env def get_template(): j2_env = get_jinja_env() template = j2_env.get_template("integrations_doc.md.jinja2") return template def sync_docs(integration_dirs): integrations_dirs_to_skip = set(["signalfx-agent"]) template = get_template() for integrations_dir, monitors_info in integration_dirs.items(): if integrations_dir in integrations_dirs_to_skip: continue out = template.render(integration=monitors_info) print(f"Syncing docs to {integrations_dir} directory") target_path = INTEGRATIONS_REPO / integrations_dir / "SMART_AGENT_MONITOR.md" target_path.write_text(fixup_relative_monitor_paths(out), encoding="utf-8", ) def generate_metric_yaml(monitor, metrics, send_all): processed_metrics = {} for metric in metrics: assert metric["name"] name = metric["name"] custom = send_all or not metric.get("default", False) desc_sentences = sent_tokenize(metric.get("description", "")) processed_metrics[name] = { "title": name, "brief": desc_sentences[0].rstrip(".").strip() if desc_sentences else "", "description": metric.get("description", "").strip(), "custom": custom, "monitor": monitor, "metric_type": metric["type"], } out = "\n" # Add a blank line in between each metric descriptor. for name, met in sorted(processed_metrics.items(), key=lambda t: t[0]): out += yaml.dump({name: met}) out += "\n" return out def sync_metrics(integration_dirs): for integrations_dir, monitors_info in integration_dirs.items(): metric_yaml = "" for monitor, monitor_metrics in monitors_info.get("metrics", {}).items(): if not monitor_metrics: continue is_send_all_metrics = monitors_info.get("send_all_metrics", {}).get(monitor, False) metric_yaml += generate_metric_yaml(monitor, monitor_metrics, is_send_all_metrics) if metric_yaml == "": continue print(f"Syncing metrics to {integrations_dir} directory") metric_yaml = "# This file was generated in the Smart Agent repo and copied here, DO NOT EDIT HERE.\n" + metric_yaml out_path = INTEGRATIONS_REPO / integrations_dir / "metrics.yaml" out_path.write_text(metric_yaml, encoding="utf-8") def run(): monitor_docs = load_monitor_docs_from_self_describe_json() integration_dirs = monitor_docs_per_integrations_repo(monitor_docs) sync_docs(integration_dirs) sync_metrics(integration_dirs) sync_agent_info() if __name__ == "__main__": run()