From cd855028d64dd0b1733f99ae92e11efd6606b0d2 Mon Sep 17 00:00:00 2001 From: beatz174-bit Date: Sun, 5 Apr 2026 16:42:30 +1000 Subject: [PATCH] Fix docker exporter digest checks for multi-arch images --- monitoring/docker-exporter/exporter.py | 122 ++++++++++++++----------- 1 file changed, 68 insertions(+), 54 deletions(-) diff --git a/monitoring/docker-exporter/exporter.py b/monitoring/docker-exporter/exporter.py index 5746a72..0dcf6f9 100644 --- a/monitoring/docker-exporter/exporter.py +++ b/monitoring/docker-exporter/exporter.py @@ -6,7 +6,6 @@ import time import json import logging import docker -import requests import yaml from prometheus_client import Gauge, start_http_server @@ -21,7 +20,7 @@ logger = logging.getLogger("docker-update-exporter") # --- Config --- EXPORTER_PORT = 9105 CHECK_INTERVAL = 60 -CACHE_TTL = 6 * 3600 +CACHE_TTL = int(os.getenv("CACHE_TTL", "300")) SERVICES_UP_SCRIPT = os.getenv("SERVICES_UP_SCRIPT", "/compose/services-up.sh") CACHE_FILE = os.getenv("CACHE_FILE", "/data/remote_digest_cache.json") DRY_RUN = os.getenv("DRY_RUN", "false").lower() in ("1", "true", "yes") @@ -91,6 +90,12 @@ def save_cache(): logger.error(f"Failed to save cache: {e}") REMOTE_DIGEST_CACHE = load_cache() +now = time.time() +REMOTE_DIGEST_CACHE = { + image: (digest, ts) + for image, (digest, ts) in REMOTE_DIGEST_CACHE.items() + if now - ts < CACHE_TTL +} # --- Helpers --- def get_project_prefix_from_script(script_path): @@ -108,78 +113,71 @@ def get_project_prefix_from_script(script_path): return prefix def get_local_digest(image_name): + """ + Return the local digest for the specific image reference. + """ if client is None: return None + try: img = client.images.get(image_name) digests = img.attrs.get("RepoDigests", []) - if digests: - digest = digests[0].split("@")[1] - logger.debug(f"Local digest for {image_name}: {digest}") - return digest - logger.debug(f"No local digest found for {image_name}") - except Exception: - pass + + logger.debug(f"RepoDigests for {image_name}: {digests}") + + for entry in digests: + if "@" in entry: + digest = entry.split("@", 1)[1] + logger.debug(f"Local digest for {image_name}: {digest}") + return digest + + logger.debug(f"No RepoDigest found for {image_name}") + + except Exception as e: + logger.debug(f"Could not get local digest for {image_name}: {e}") + return None def get_remote_digest(image_name): + """ + Return the upstream digest for the exact platform-specific image that Docker + would pull on this host. This avoids false positives with multi-arch images + where the registry manifest-list digest differs from the pulled image digest. + """ now = time.time() - if image_name in REMOTE_DIGEST_CACHE: - digest, ts = REMOTE_DIGEST_CACHE[image_name] + + cached = REMOTE_DIGEST_CACHE.get(image_name) + if cached: + digest, ts = cached if now - ts < CACHE_TTL: + logger.debug(f"Using cached remote digest for {image_name}: {digest}") return digest + if client is None: + return None + try: - if "/" not in image_name: - registry = "docker.io" - repo = "library/" + image_name - else: - parts = image_name.split("/") - if "." in parts[0] or ":" in parts[0]: - registry = parts[0] - repo = "/".join(parts[1:]) - else: - registry = "docker.io" - repo = image_name + registry_data = client.images.get_registry_data(image_name) - if ":" in repo: - repo, tag = repo.rsplit(":", 1) - else: - tag = "latest" + digest = None - if registry in ["docker.io", "registry-1.docker.io"]: - token_res = requests.get( - "https://auth.docker.io/token", - params={"service": "registry.docker.io", "scope": f"repository:{repo}:pull"}, - timeout=10 + # docker SDK versions differ; try the common fields in order + if hasattr(registry_data, "id") and registry_data.id: + digest = registry_data.id + elif hasattr(registry_data, "attrs"): + digest = ( + registry_data.attrs.get("Descriptor", {}).get("digest") + or registry_data.attrs.get("digest") ) - token = token_res.json().get("token") - manifest_url = f"https://registry-1.docker.io/v2/{repo}/manifests/{tag}" - elif registry == "ghcr.io": - token_res = requests.get( - "https://ghcr.io/token", - params={"service": "ghcr.io", "scope": f"repository:{repo}:pull"}, - timeout=10 - ) - token = token_res.json().get("token") - manifest_url = f"https://ghcr.io/v2/{repo}/manifests/{tag}" - else: - logger.warning(f"Unsupported registry {registry} for {image_name}") - return None - if not token: - return None - - res = requests.get( - manifest_url, - headers={"Authorization": f"Bearer {token}", "Accept": "application/vnd.docker.distribution.manifest.v2+json"}, - timeout=10 - ) - if res.status_code == 200: - digest = res.headers.get("Docker-Content-Digest") + if digest: REMOTE_DIGEST_CACHE[image_name] = (digest, now) save_cache() + logger.debug(f"Remote digest for {image_name}: {digest}") return digest + + logger.warning(f"No remote digest found for {image_name}") + return None except Exception as e: logger.debug(f"Error fetching remote digest for {image_name}: {e}") @@ -431,6 +429,16 @@ def check_containers(): remote_target = compose_image or running remote_digest = get_remote_digest(remote_target) + logger.info( + "Digest comparison: container=%s service=%s running=%s target=%s local=%s remote=%s", + container.name, + svc, + running, + remote_target, + local_digest, + remote_digest, + ) + if local_digest and remote_digest and local_digest != remote_digest: update_flag = 1 @@ -480,6 +488,12 @@ if __name__ == "__main__": SERVICES_UP_SCRIPT = args.services_up_script CACHE_FILE = args.cache_file REMOTE_DIGEST_CACHE = load_cache() + now = time.time() + REMOTE_DIGEST_CACHE = { + image: (digest, ts) + for image, (digest, ts) in REMOTE_DIGEST_CACHE.items() + if now - ts < CACHE_TTL + } if DRY_RUN or args.dry_run: dump_service_image_mapping()