From 26b7b461a3d861aafb06290bc714967fa2304ec0 Mon Sep 17 00:00:00 2001 From: beatz174-bit Date: Mon, 13 Apr 2026 16:47:26 +1000 Subject: [PATCH] docs: generate Prometheus-driven monitoring docs and Mermaid diagrams --- README.md | 6 + docs/architecture.md | 37 +++ docs/diagrams/architecture.mmd | 17 + docs/diagrams/monitoring-coverage.mmd | 90 ++++++ docs/monitoring-coverage.md | 77 +++++ docs/network.md | 52 ++++ scripts/render_prometheus_docs.py | 427 ++++++++++++++++++++++++++ 7 files changed, 706 insertions(+) create mode 100644 docs/diagrams/architecture.mmd create mode 100644 docs/diagrams/monitoring-coverage.mmd create mode 100644 docs/monitoring-coverage.md create mode 100644 docs/network.md create mode 100755 scripts/render_prometheus_docs.py diff --git a/README.md b/README.md index ce16ef7..ed9a3d7 100644 --- a/README.md +++ b/README.md @@ -46,6 +46,12 @@ flowchart TB For a request-flow/network view and architecture notes, see [docs/architecture.md](docs/architecture.md). ## Prometheus Runtime Inventory Export +Regenerate derived docs/diagrams from inventory: + +```bash +python3 scripts/render_prometheus_docs.py --inventory-file docs/runtime/prometheus-inventory.json +``` + Use `scripts/export_prometheus_inventory.py` to snapshot Prometheus-observed runtime inventory into versionable artifacts for docs/diagram workflows. diff --git a/docs/architecture.md b/docs/architecture.md index ba357ef..82036da 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -113,3 +113,40 @@ Unknowns (left intentionally as placeholders): - **Inter-host VLAN/subnet layout beyond Docker bridges:** _unknown from repo contents._ If you want, this section can be replaced with a concrete Proxmox topology once you add an inventory source (e.g., Terraform, Ansible inventory, or a diagram export). + +## Runtime visibility from Prometheus + + + +Prometheus inventory provides **observed runtime coverage** of scrape targets. It complements (but does not replace) declared architecture in Compose files and static docs. + +- Inventory timestamp: `2026-04-13T06:36:45Z` +- Observed jobs: `8` +- Observed instances: `19` +- Observed services (label-derived): `1` + +### Observed monitoring view + +| job | targets | unhealthy | +| --- | --- | --- | +| container-updates | 2 | 0 | +| kuma | 2 | 0 | +| node | 7 | 0 | +| pihole | 1 | 0 | +| prometheus | 1 | 0 | +| proxmox-storage | 2 | 0 | +| telegraf | 2 | 0 | +| traefik | 2 | 0 | + +### Data sources + +- `docs/runtime/prometheus-inventory.json` (normalized runtime export) +- Prometheus scrape metadata (`targets` + label sets) +- Existing repository architecture docs for declared topology + +### Notes from inventory + +- The `up` query indicates scrape success from Prometheus perspective only. +- Use static repository architecture docs and deployment configs with this runtime export for complete diagrams. + + diff --git a/docs/diagrams/architecture.mmd b/docs/diagrams/architecture.mmd new file mode 100644 index 0000000..a14a6dd --- /dev/null +++ b/docs/diagrams/architecture.mmd @@ -0,0 +1,17 @@ +flowchart TB + Declared[Declared architecture\n(Compose + docs)] + Runtime[Observed runtime\n(Prometheus inventory)] + Declared --> Runtime + + subgraph Monitoring["Prometheus observed jobs"] + job_container_updates["container-updates"] + job_kuma["kuma"] + job_node["node"] + job_pihole["pihole"] + job_prometheus["prometheus"] + job_proxmox_storage["proxmox-storage"] + job_telegraf["telegraf"] + job_traefik["traefik"] + end + + Runtime --> Monitoring diff --git a/docs/diagrams/monitoring-coverage.mmd b/docs/diagrams/monitoring-coverage.mmd new file mode 100644 index 0000000..7d18e3f --- /dev/null +++ b/docs/diagrams/monitoring-coverage.mmd @@ -0,0 +1,90 @@ +flowchart LR + Prom[Prometheus] + + classDef scrape stroke-dasharray: 5 5; + subgraph host_docker_update_exporter["Host: docker-update-exporter"] + container_updates_docker_update_exporter_9105["container-updates\ndocker-update-exporter:9105"] + end + subgraph host_kuma_lan_ddnsgeek_com["Host: kuma.lan.ddnsgeek.com"] + kuma_kuma_lan_ddnsgeek_com["kuma\nkuma.lan.ddnsgeek.com"] + end + subgraph host_monitor_kuma["Host: monitor-kuma"] + kuma_monitor_kuma_3001["kuma\nmonitor-kuma:3001"] + end + subgraph host_nix_cache["Host: nix-cache"] + node_nix_cache_9100["node\nnix-cache:9100"] + end + subgraph host_node_exporter["Host: node-exporter"] + node_node_exporter_9100["node\nnode-exporter:9100"] + end + subgraph host_pbs_sweet_home["Host: pbs.sweet.home"] + node_pbs_sweet_home_9100["node\npbs.sweet.home:9100"] + proxmox_storage_pbs_sweet_home_9102["proxmox-storage\npbs.sweet.home:9102"] + end + subgraph host_pihole["Host: pihole"] + node_pihole_9100["node\npihole:9100"] + end + subgraph host_pihole_exporter["Host: pihole-exporter"] + pihole_pihole_exporter_9617["pihole\npihole-exporter:9617"] + end + subgraph host_prometheus["Host: prometheus"] + prometheus_prometheus_9090["prometheus\nprometheus:9090"] + end + subgraph host_pve_sweet_home["Host: pve.sweet.home"] + node_pve_sweet_home_9100["node\npve.sweet.home:9100"] + proxmox_storage_pve_sweet_home_9101["proxmox-storage\npve.sweet.home:9101"] + end + subgraph host_raspberrypi_tail13f623_ts_net["Host: raspberrypi.tail13f623.ts.net"] + container_updates_raspberrypi_tail13f623_ts_net_9105["container-updates\nraspberrypi.tail13f623.ts.net:9105"] + node_raspberrypi_tail13f623_ts_net_9100["node\nraspberrypi.tail13f623.ts.net:9100"] + telegraf_raspberrypi_tail13f623_ts_net_9273["telegraf\nraspberrypi.tail13f623.ts.net:9273"] + traefik_raspberrypi_tail13f623_ts_net_8080["traefik\nraspberrypi.tail13f623.ts.net:8080"] + end + subgraph host_server["Host: server"] + node_server_9100["node\nserver:9100"] + end + subgraph host_telegraf["Host: telegraf"] + telegraf_telegraf_9273["telegraf\ntelegraf:9273"] + end + subgraph host_traefik_lan_ddnsgeek_com["Host: traefik.lan.ddnsgeek.com"] + traefik_traefik_lan_ddnsgeek_com_8080["traefik\ntraefik.lan.ddnsgeek.com:8080"] + end + + Prom -. scrape .-> container_updates_docker_update_exporter_9105 + class container_updates_docker_update_exporter_9105 scrape; + Prom -. scrape .-> container_updates_raspberrypi_tail13f623_ts_net_9105 + class container_updates_raspberrypi_tail13f623_ts_net_9105 scrape; + Prom -. scrape .-> kuma_kuma_lan_ddnsgeek_com + class kuma_kuma_lan_ddnsgeek_com scrape; + Prom -. scrape .-> kuma_monitor_kuma_3001 + class kuma_monitor_kuma_3001 scrape; + Prom -. scrape .-> node_nix_cache_9100 + class node_nix_cache_9100 scrape; + Prom -. scrape .-> node_node_exporter_9100 + class node_node_exporter_9100 scrape; + Prom -. scrape .-> node_pbs_sweet_home_9100 + class node_pbs_sweet_home_9100 scrape; + Prom -. scrape .-> node_pihole_9100 + class node_pihole_9100 scrape; + Prom -. scrape .-> node_pve_sweet_home_9100 + class node_pve_sweet_home_9100 scrape; + Prom -. scrape .-> node_raspberrypi_tail13f623_ts_net_9100 + class node_raspberrypi_tail13f623_ts_net_9100 scrape; + Prom -. scrape .-> node_server_9100 + class node_server_9100 scrape; + Prom -. scrape .-> pihole_pihole_exporter_9617 + class pihole_pihole_exporter_9617 scrape; + Prom -. scrape .-> prometheus_prometheus_9090 + class prometheus_prometheus_9090 scrape; + Prom -. scrape .-> proxmox_storage_pbs_sweet_home_9102 + class proxmox_storage_pbs_sweet_home_9102 scrape; + Prom -. scrape .-> proxmox_storage_pve_sweet_home_9101 + class proxmox_storage_pve_sweet_home_9101 scrape; + Prom -. scrape .-> telegraf_raspberrypi_tail13f623_ts_net_9273 + class telegraf_raspberrypi_tail13f623_ts_net_9273 scrape; + Prom -. scrape .-> telegraf_telegraf_9273 + class telegraf_telegraf_9273 scrape; + Prom -. scrape .-> traefik_raspberrypi_tail13f623_ts_net_8080 + class traefik_raspberrypi_tail13f623_ts_net_8080 scrape; + Prom -. scrape .-> traefik_traefik_lan_ddnsgeek_com_8080 + class traefik_traefik_lan_ddnsgeek_com_8080 scrape; diff --git a/docs/monitoring-coverage.md b/docs/monitoring-coverage.md new file mode 100644 index 0000000..1144008 --- /dev/null +++ b/docs/monitoring-coverage.md @@ -0,0 +1,77 @@ +# Monitoring Coverage + +## Overview + +This page is generated from Prometheus-observed runtime inventory. It supplements declared architecture docs and does not replace static source-of-truth configuration. + +- Inventory timestamp: `2026-04-13T06:36:45Z` +- Prometheus URL: `http://prometheus:9090` +- Active scrape targets observed: `19` +- Unhealthy scrape targets observed: `0` + +## Coverage by job + +| job | active targets | unhealthy targets | +| --- | --- | --- | +| container-updates | 2 | 0 | +| kuma | 2 | 0 | +| node | 7 | 0 | +| pihole | 1 | 0 | +| prometheus | 1 | 0 | +| proxmox-storage | 2 | 0 | +| telegraf | 2 | 0 | +| traefik | 2 | 0 | + +## Coverage by instance + +| instance | jobs | health | +| --- | --- | --- | +| docker-update-exporter:9105 | container-updates | 1/1 up | +| kuma.lan.ddnsgeek.com | kuma | 1/1 up | +| monitor-kuma:3001 | kuma | 1/1 up | +| nix-cache:9100 | node | 1/1 up | +| node-exporter:9100 | node | 1/1 up | +| pbs.sweet.home:9100 | node | 1/1 up | +| pbs.sweet.home:9102 | proxmox-storage | 1/1 up | +| pihole-exporter:9617 | pihole | 1/1 up | +| pihole:9100 | node | 1/1 up | +| prometheus:9090 | prometheus | 1/1 up | +| pve.sweet.home:9100 | node | 1/1 up | +| pve.sweet.home:9101 | proxmox-storage | 1/1 up | +| raspberrypi.tail13f623.ts.net:8080 | traefik | 1/1 up | +| raspberrypi.tail13f623.ts.net:9100 | node | 1/1 up | +| raspberrypi.tail13f623.ts.net:9105 | container-updates | 1/1 up | +| raspberrypi.tail13f623.ts.net:9273 | telegraf | 1/1 up | +| server:9100 | node | 1/1 up | +| telegraf:9273 | telegraf | 1/1 up | +| traefik.lan.ddnsgeek.com:8080 | traefik | 1/1 up | + +## Coverage by service + +| service | instances | health | +| --- | --- | --- | +| unknown | docker-update-exporter:9105, kuma.lan.ddnsgeek.com, monitor-kuma:3001, nix-cache:9100, node-exporter:9100, pbs.sweet.home:9100, pbs.sweet.home:9102, pihole-exporter:9617, pihole:9100, prometheus:9090, pve.sweet.home:9100, pve.sweet.home:9101, raspberrypi.tail13f623.ts.net:8080, raspberrypi.tail13f623.ts.net:9100, raspberrypi.tail13f623.ts.net:9105, raspberrypi.tail13f623.ts.net:9273, server:9100, telegraf:9273, traefik.lan.ddnsgeek.com:8080 | 19/19 up | + +## Unhealthy targets + +| job | instance | scrape URL | health | last error | +| --- | --- | --- | --- | --- | +| none | | | | | + +## Unknowns / missing metadata + +| label | targets missing | +| --- | --- | +| exposure | 19 | +| hostname | 19 | +| hypervisor | 19 | +| network | 19 | +| service | 19 | + +Unknown or missing metadata is treated as `unknown` in generated summaries to avoid over-claiming topology. + +## Regeneration instructions + +```bash +python3 scripts/render_prometheus_docs.py --inventory-file docs/runtime/prometheus-inventory.json +``` diff --git a/docs/network.md b/docs/network.md new file mode 100644 index 0000000..725642e --- /dev/null +++ b/docs/network.md @@ -0,0 +1,52 @@ +# Network and Exposure View (Prometheus Observed) + +## Overview + +This document is generated from Prometheus scrape metadata and endpoint URLs. It is an observed monitoring view and not a physical network map. + +- Inventory timestamp: `2026-04-13T06:36:45Z` +- Physical topology, VLAN mapping, and bridge membership remain unknown unless explicitly documented elsewhere. + +## Observed scrape endpoints + +| job | instance | scrape URL | network label | exposure label | +| --- | --- | --- | --- | --- | +| container-updates | docker-update-exporter:9105 | http://docker-update-exporter:9105/metrics | unknown | unknown | +| container-updates | raspberrypi.tail13f623.ts.net:9105 | http://raspberrypi.tail13f623.ts.net:9105/metrics | unknown | unknown | +| kuma | kuma.lan.ddnsgeek.com | http://kuma.lan.ddnsgeek.com/metrics | unknown | unknown | +| kuma | monitor-kuma:3001 | http://monitor-kuma:3001/metrics | unknown | unknown | +| node | nix-cache:9100 | http://nix-cache:9100/metrics | unknown | unknown | +| node | node-exporter:9100 | http://node-exporter:9100/metrics | unknown | unknown | +| node | pbs.sweet.home:9100 | http://pbs.sweet.home:9100/metrics | unknown | unknown | +| node | pihole:9100 | http://pihole:9100/metrics | unknown | unknown | +| node | pve.sweet.home:9100 | http://pve.sweet.home:9100/metrics | unknown | unknown | +| node | raspberrypi.tail13f623.ts.net:9100 | http://raspberrypi.tail13f623.ts.net:9100/metrics | unknown | unknown | +| node | server:9100 | http://server:9100/metrics | unknown | unknown | +| pihole | pihole-exporter:9617 | http://pihole-exporter:9617/metrics | unknown | unknown | +| prometheus | prometheus:9090 | http://prometheus:9090/metrics | unknown | unknown | +| proxmox-storage | pbs.sweet.home:9102 | http://pbs.sweet.home:9102/metrics | unknown | unknown | +| proxmox-storage | pve.sweet.home:9101 | http://pve.sweet.home:9101/metrics | unknown | unknown | +| telegraf | raspberrypi.tail13f623.ts.net:9273 | http://raspberrypi.tail13f623.ts.net:9273/metrics | unknown | unknown | +| telegraf | telegraf:9273 | http://telegraf:9273/metrics | unknown | unknown | +| traefik | raspberrypi.tail13f623.ts.net:8080 | http://raspberrypi.tail13f623.ts.net:8080/metrics | unknown | unknown | +| traefik | traefik.lan.ddnsgeek.com:8080 | http://traefik.lan.ddnsgeek.com:8080/metrics | unknown | unknown | + +## Internal vs public indicators + +| exposure label | targets | +| --- | --- | +| unknown | 19 | + +All indicators above are label-derived. Missing labels are rendered as `unknown`. + +## Monitoring paths + +| metrics path | observed targets | +| --- | --- | +| /metrics | 19 | + +## Unknowns and limits + +- Prometheus can confirm scrape reachability but not ownership or placement boundaries. +- No VLAN, switch, or hypervisor placement is inferred unless present in inventory labels. +- Treat this as runtime evidence to pair with declared architecture docs. diff --git a/scripts/render_prometheus_docs.py b/scripts/render_prometheus_docs.py new file mode 100755 index 0000000..18cfc2a --- /dev/null +++ b/scripts/render_prometheus_docs.py @@ -0,0 +1,427 @@ +#!/usr/bin/env python3 +"""Render Prometheus inventory into documentation and Mermaid diagrams.""" + +from __future__ import annotations + +import argparse +import json +import re +from collections import defaultdict +from pathlib import Path +from typing import Any +from urllib.parse import urlparse + +GENERATED_BEGIN = "" +GENERATED_END = "" + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "--inventory-file", + default="docs/runtime/prometheus-inventory.json", + help="Path to normalized Prometheus inventory JSON.", + ) + parser.add_argument("--docs-dir", default="docs", help="Documentation directory.") + parser.add_argument("--diagrams-dir", default="docs/diagrams", help="Diagram output directory.") + parser.add_argument("--readme-file", default="README.md", help="README path for regeneration notes.") + parser.add_argument("--architecture-file", default="docs/architecture.md", help="Architecture markdown path.") + parser.add_argument("--network-file", default="docs/network.md", help="Network markdown path.") + parser.add_argument("--coverage-file", default="docs/monitoring-coverage.md", help="Coverage markdown path.") + parser.add_argument("--dry-run", action="store_true", help="Print changes instead of writing files.") + parser.add_argument("--verbose", action="store_true", help="Print detailed processing output.") + return parser.parse_args() + + +def load_json(path: Path) -> dict[str, Any]: + with path.open("r", encoding="utf-8") as handle: + data = json.load(handle) + if not isinstance(data, dict): + raise ValueError(f"Inventory must be a JSON object: {path}") + return data + + +def merged_labels(target: dict[str, Any]) -> dict[str, str]: + discovered = target.get("discovered_labels") or {} + labels = target.get("labels") or {} + merged = {k: str(v) for k, v in discovered.items()} + merged.update({k: str(v) for k, v in labels.items()}) + return merged + + +def normalize_targets(inventory: dict[str, Any]) -> list[dict[str, Any]]: + normalized: list[dict[str, Any]] = [] + for target in inventory.get("targets") or []: + if not isinstance(target, dict): + continue + labels = merged_labels(target) + parsed = urlparse(str(target.get("scrape_url") or "")) + host = parsed.hostname or labels.get("hostname") or "unknown" + endpoint = parsed.path or "/metrics" + normalized.append( + { + "job": str(target.get("job") or labels.get("job") or ""), + "instance": str(target.get("instance") or labels.get("instance") or ""), + "health": str(target.get("health") or "unknown"), + "scrape_url": str(target.get("scrape_url") or ""), + "last_error": str(target.get("last_error") or ""), + "host": host, + "endpoint": endpoint, + "service": labels.get("service", "unknown"), + "role": labels.get("role", "unknown"), + "hypervisor": labels.get("hypervisor", "unknown"), + "network": labels.get("network", "unknown"), + "exposure": labels.get("exposure", "unknown"), + } + ) + normalized.sort(key=lambda t: (t["job"], t["instance"], t["scrape_url"])) + return normalized + + +def markdown_table(headers: list[str], rows: list[list[str]]) -> str: + line = "| " + " | ".join(headers) + " |" + sep = "| " + " | ".join(["---"] * len(headers)) + " |" + body = ["| " + " | ".join(row) + " |" for row in rows] + return "\n".join([line, sep, *body]) + + +def summarize_targets(targets: list[dict[str, Any]], unhealthy: list[dict[str, Any]]) -> dict[str, Any]: + by_job: dict[str, dict[str, int]] = defaultdict(lambda: {"active": 0, "unhealthy": 0}) + by_instance: dict[str, dict[str, Any]] = defaultdict(lambda: {"jobs": set(), "unhealthy": 0, "total": 0}) + by_service: dict[str, dict[str, Any]] = defaultdict(lambda: {"instances": set(), "unhealthy": 0, "total": 0}) + by_exposure: dict[str, int] = defaultdict(int) + + unhealthy_set = {(u["job"], u["instance"], u["scrape_url"]) for u in unhealthy} + + for target in targets: + job = target["job"] + instance = target["instance"] + service = target["service"] if target["service"] != "" else "unknown" + by_job[job]["active"] += 1 + key = (target["job"], target["instance"], target["scrape_url"]) + if key in unhealthy_set or target["health"] != "up": + by_job[job]["unhealthy"] += 1 + by_instance[instance]["unhealthy"] += 1 + by_service[service]["unhealthy"] += 1 + + by_instance[instance]["jobs"].add(job) + by_instance[instance]["total"] += 1 + by_service[service]["instances"].add(instance) + by_service[service]["total"] += 1 + by_exposure[target["exposure"]] += 1 + + return { + "by_job": dict(sorted(by_job.items())), + "by_instance": { + key: { + "jobs": sorted(value["jobs"]), + "unhealthy": value["unhealthy"], + "total": value["total"], + } + for key, value in sorted(by_instance.items()) + }, + "by_service": { + key: { + "instances": sorted(value["instances"]), + "unhealthy": value["unhealthy"], + "total": value["total"], + } + for key, value in sorted(by_service.items()) + }, + "by_exposure": dict(sorted(by_exposure.items())), + } + + +def render_monitoring_coverage(inventory: dict[str, Any], targets: list[dict[str, Any]]) -> str: + unhealthy = normalize_targets({"targets": inventory.get("unhealthy_targets") or []}) + summaries = summarize_targets(targets, unhealthy) + missing = (inventory.get("unknowns") or {}).get("missing_label_counts") or {} + + lines = [ + "# Monitoring Coverage", + "", + "## Overview", + "", + "This page is generated from Prometheus-observed runtime inventory. It supplements declared architecture docs and does not replace static source-of-truth configuration.", + "", + f"- Inventory timestamp: `{inventory.get('generated_at', 'unknown')}`", + f"- Prometheus URL: `{inventory.get('prometheus_url', 'unknown')}`", + f"- Active scrape targets observed: `{len(targets)}`", + f"- Unhealthy scrape targets observed: `{len(unhealthy)}`", + "", + "## Coverage by job", + "", + ] + + job_rows = [ + [job, str(data["active"]), str(data["unhealthy"])] + for job, data in summaries["by_job"].items() + ] or [["none", "0", "0"]] + lines.append(markdown_table(["job", "active targets", "unhealthy targets"], job_rows)) + + lines.extend(["", "## Coverage by instance", ""]) + instance_rows = [ + [instance, ", ".join(data["jobs"]), f"{data['total'] - data['unhealthy']}/{data['total']} up"] + for instance, data in summaries["by_instance"].items() + ] or [["none", "", ""]] + lines.append(markdown_table(["instance", "jobs", "health"], instance_rows)) + + lines.extend(["", "## Coverage by service", ""]) + service_rows = [ + [service, ", ".join(data["instances"]), f"{data['total'] - data['unhealthy']}/{data['total']} up"] + for service, data in summaries["by_service"].items() + ] or [["unknown", "", ""]] + lines.append(markdown_table(["service", "instances", "health"], service_rows)) + + lines.extend(["", "## Unhealthy targets", ""]) + unhealthy_rows = [ + [u["job"], u["instance"], u["scrape_url"], u["health"], u["last_error"] or "none"] + for u in unhealthy + ] or [["none", "", "", "", ""]] + lines.append(markdown_table(["job", "instance", "scrape URL", "health", "last error"], unhealthy_rows)) + + lines.extend(["", "## Unknowns / missing metadata", ""]) + missing_rows = [[k, str(v)] for k, v in sorted(missing.items())] or [["none", "0"]] + lines.append(markdown_table(["label", "targets missing"], missing_rows)) + lines.extend( + [ + "", + "Unknown or missing metadata is treated as `unknown` in generated summaries to avoid over-claiming topology.", + "", + "## Regeneration instructions", + "", + "```bash", + "python3 scripts/render_prometheus_docs.py --inventory-file docs/runtime/prometheus-inventory.json", + "```", + "", + ] + ) + return "\n".join(lines) + + +def render_network_doc(inventory: dict[str, Any], targets: list[dict[str, Any]]) -> str: + summaries = summarize_targets(targets, normalize_targets({"targets": inventory.get("unhealthy_targets") or []})) + + endpoint_rows = [[t["job"], t["instance"], t["scrape_url"], t["network"], t["exposure"]] for t in targets] + endpoint_rows = endpoint_rows or [["none", "", "", "", ""]] + + exposure_rows = [[exp, str(count)] for exp, count in summaries["by_exposure"].items()] or [["unknown", "0"]] + + paths = sorted({t["endpoint"] for t in targets}) + path_rows = [[path, str(sum(1 for t in targets if t["endpoint"] == path))] for path in paths] or [["/metrics", "0"]] + + lines = [ + "# Network and Exposure View (Prometheus Observed)", + "", + "## Overview", + "", + "This document is generated from Prometheus scrape metadata and endpoint URLs. It is an observed monitoring view and not a physical network map.", + "", + f"- Inventory timestamp: `{inventory.get('generated_at', 'unknown')}`", + "- Physical topology, VLAN mapping, and bridge membership remain unknown unless explicitly documented elsewhere.", + "", + "## Observed scrape endpoints", + "", + markdown_table(["job", "instance", "scrape URL", "network label", "exposure label"], endpoint_rows), + "", + "## Internal vs public indicators", + "", + markdown_table(["exposure label", "targets"], exposure_rows), + "", + "All indicators above are label-derived. Missing labels are rendered as `unknown`.", + "", + "## Monitoring paths", + "", + markdown_table(["metrics path", "observed targets"], path_rows), + "", + "## Unknowns and limits", + "", + "- Prometheus can confirm scrape reachability but not ownership or placement boundaries.", + "- No VLAN, switch, or hypervisor placement is inferred unless present in inventory labels.", + "- Treat this as runtime evidence to pair with declared architecture docs.", + "", + ] + return "\n".join(lines) + + +def render_architecture_section(inventory: dict[str, Any], targets: list[dict[str, Any]]) -> str: + summaries = summarize_targets(targets, normalize_targets({"targets": inventory.get("unhealthy_targets") or []})) + notes = inventory.get("notes") or [] + + lines = [ + "## Runtime visibility from Prometheus", + "", + GENERATED_BEGIN, + "", + "Prometheus inventory provides **observed runtime coverage** of scrape targets. It complements (but does not replace) declared architecture in Compose files and static docs.", + "", + f"- Inventory timestamp: `{inventory.get('generated_at', 'unknown')}`", + f"- Observed jobs: `{len(summaries['by_job'])}`", + f"- Observed instances: `{len(summaries['by_instance'])}`", + f"- Observed services (label-derived): `{len(summaries['by_service'])}`", + "", + "### Observed monitoring view", + "", + markdown_table( + ["job", "targets", "unhealthy"], + [[job, str(data["active"]), str(data["unhealthy"])] for job, data in summaries["by_job"].items()] or [["none", "0", "0"]], + ), + "", + "### Data sources", + "", + "- `docs/runtime/prometheus-inventory.json` (normalized runtime export)", + "- Prometheus scrape metadata (`targets` + label sets)", + "- Existing repository architecture docs for declared topology", + ] + if notes: + lines.extend(["", "### Notes from inventory", ""]) + for note in notes: + lines.append(f"- {note}") + lines.extend(["", GENERATED_END, ""]) + return "\n".join(lines) + + +def upsert_generated_section(path: Path, section_markdown: str, dry_run: bool, verbose: bool) -> None: + existing = path.read_text(encoding="utf-8") if path.exists() else "" + section_body = section_markdown + + if GENERATED_BEGIN in existing and GENERATED_END in existing: + pattern = re.compile( + rf"{re.escape(GENERATED_BEGIN)}.*?{re.escape(GENERATED_END)}", + re.DOTALL, + ) + replacement = "\n".join( + line for line in section_body.splitlines() if line.strip() not in {"## Runtime visibility from Prometheus"} + ) + updated = pattern.sub(replacement.strip(), existing) + else: + updated = existing.rstrip() + "\n\n" + section_body.strip() + "\n" + + write_file(path, updated, dry_run=dry_run, verbose=verbose) + + +def mermaid_safe_id(value: str) -> str: + safe = re.sub(r"[^a-zA-Z0-9_]", "_", value) + safe = re.sub(r"_+", "_", safe).strip("_") + return safe or "unknown" + + +def render_monitoring_mermaid(targets: list[dict[str, Any]]) -> str: + by_host: dict[str, list[dict[str, Any]]] = defaultdict(list) + for target in targets: + by_host[target["host"]].append(target) + + lines = [ + "flowchart LR", + " Prom[Prometheus]", + "", + " classDef scrape stroke-dasharray: 5 5;", + ] + + for host, host_targets in sorted(by_host.items()): + host_id = mermaid_safe_id(f"host_{host}") + lines.append(f' subgraph {host_id}["Host: {host}"]') + for target in sorted(host_targets, key=lambda t: (t["job"], t["instance"])): + tid = mermaid_safe_id(f"{target['job']}_{target['instance']}") + label = f"{target['job']}\\n{target['instance']}" + lines.append(f' {tid}["{label}"]') + lines.append(" end") + + lines.append("") + for target in targets: + tid = mermaid_safe_id(f"{target['job']}_{target['instance']}") + lines.append(f" Prom -. scrape .-> {tid}") + lines.append(f" class {tid} scrape;") + + return "\n".join(lines) + "\n" + + +def render_architecture_mermaid(targets: list[dict[str, Any]]) -> str: + jobs = sorted({t["job"] for t in targets}) + lines = [ + "flowchart TB", + " Declared[Declared architecture\\n(Compose + docs)]", + " Runtime[Observed runtime\\n(Prometheus inventory)]", + " Declared --> Runtime", + "", + ' subgraph Monitoring["Prometheus observed jobs"]', + ] + for job in jobs: + jid = mermaid_safe_id(f"job_{job}") + lines.append(f' {jid}["{job}"]') + lines.extend([" end", "", " Runtime --> Monitoring", ""]) + return "\n".join(lines) + + +def write_file(path: Path, content: str, dry_run: bool, verbose: bool) -> None: + if dry_run: + print(f"[DRY RUN] Would write: {path}") + return + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(content, encoding="utf-8") + if verbose: + print(f"Wrote {path}") + + +def update_readme(path: Path, dry_run: bool, verbose: bool) -> None: + if not path.exists(): + return + existing = path.read_text(encoding="utf-8") + marker = "## Prometheus Runtime Inventory Export" + snippet = ( + "\nRegenerate derived docs/diagrams from inventory:\n\n" + "```bash\n" + "python3 scripts/render_prometheus_docs.py --inventory-file docs/runtime/prometheus-inventory.json\n" + "```\n" + ) + if marker in existing and "scripts/render_prometheus_docs.py" not in existing: + updated = existing.replace(marker, marker + snippet) + write_file(path, updated, dry_run=dry_run, verbose=verbose) + + +def main() -> int: + args = parse_args() + inventory_path = Path(args.inventory_file) + docs_dir = Path(args.docs_dir) + diagrams_dir = Path(args.diagrams_dir) + + inventory = load_json(inventory_path) + targets = normalize_targets(inventory) + + coverage_path = Path(args.coverage_file) + network_path = Path(args.network_file) + architecture_path = Path(args.architecture_file) + + coverage_md = render_monitoring_coverage(inventory, targets) + network_md = render_network_doc(inventory, targets) + architecture_section = render_architecture_section(inventory, targets) + monitoring_mmd = render_monitoring_mermaid(targets) + architecture_mmd = render_architecture_mermaid(targets) + + def resolve_doc_path(path: Path) -> Path: + if path.is_absolute(): + return path + if len(path.parts) == 1: + return docs_dir / path + return path + + write_file(resolve_doc_path(coverage_path), coverage_md, args.dry_run, args.verbose) + write_file(resolve_doc_path(network_path), network_md, args.dry_run, args.verbose) + upsert_generated_section( + resolve_doc_path(architecture_path), + architecture_section, + args.dry_run, + args.verbose, + ) + + write_file(diagrams_dir / "monitoring-coverage.mmd", monitoring_mmd, args.dry_run, args.verbose) + write_file(diagrams_dir / "architecture.mmd", architecture_mmd, args.dry_run, args.verbose) + update_readme(Path(args.readme_file), args.dry_run, args.verbose) + + if args.verbose: + print(f"Processed {len(targets)} targets from {inventory_path}") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main())