#!/usr/bin/env python3 import os import re import time import json import logging import docker import requests import yaml from prometheus_client import Gauge, start_http_server # --- Logging --- LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO").upper() logging.basicConfig( level=getattr(logging, LOG_LEVEL, logging.INFO), format="%(asctime)s [%(levelname)s] %(message)s" ) logger = logging.getLogger("docker-update-exporter") # --- Config --- EXPORTER_PORT = 9105 CHECK_INTERVAL = 60 CACHE_TTL = 6 * 3600 SERVICES_UP_SCRIPT = "/compose/services-up.sh" CACHE_FILE = "/data/remote_digest_cache.json" client = docker.from_env() # --- Metrics --- CONTAINER_UPDATE = Gauge( "docker_container_update_available", "1 if container image is out of date (compose drift or registry), 0 otherwise", ["container", "compose_image", "running_image", "com_docker_compose_project"] ) LAST_CHECK = Gauge( "docker_image_update_last_check_timestamp", "Last time the update check ran (unix timestamp)" ) # --- Persistent Cache --- def load_cache(): if not os.path.exists(CACHE_FILE): logger.info(f"Cache file does not exist yet: {CACHE_FILE}") return {} try: with open(CACHE_FILE, "r") as f: cache = json.load(f) logger.info(f"Loaded {len(cache)} cached remote digests") logger.debug(f"Cache contents: {cache}") return cache except Exception as e: logger.error(f"Failed to load cache from {CACHE_FILE}: {e}") return {} def save_cache(): try: os.makedirs(os.path.dirname(CACHE_FILE), exist_ok=True) with open(CACHE_FILE, "w") as f: json.dump(REMOTE_DIGEST_CACHE, f) logger.debug( f"Saved {len(REMOTE_DIGEST_CACHE)} entries to cache file {CACHE_FILE}" ) except Exception as e: logger.error(f"Failed to save cache to {CACHE_FILE}: {e}") REMOTE_DIGEST_CACHE = load_cache() # --- Helpers --- def get_project_prefix_from_script(script_path): project_prefix = "core-" # fallback if not os.path.exists(script_path): logger.warning( f"services-up script not found at {script_path}, using fallback project prefix {project_prefix}" ) return project_prefix try: with open(script_path, "r") as f: for line in f: line = line.strip() m = re.match(r'PROJECT\s*=\s*["\']?([^"\']+)["\']?', line) if m: project_prefix = m.group(1) + "-" logger.debug( f"Detected compose project prefix from script: {project_prefix}" ) break except Exception as e: logger.error(f"Failed reading project prefix from {script_path}: {e}") return project_prefix def get_local_digest(image_name): try: img = client.images.get(image_name) digests = img.attrs.get("RepoDigests", []) logger.debug(f"Local RepoDigests for {image_name}: {digests}") if digests: digest = digests[0].split("@")[1] logger.debug(f"Local digest for {image_name}: {digest}") return digest logger.info(f"No local digest found for image {image_name}") except Exception as e: logger.warning(f"Failed to retrieve local digest for {image_name}: {e}") return None def get_remote_digest(image_name): now = time.time() original = image_name # Cache hit if original in REMOTE_DIGEST_CACHE: digest, ts = REMOTE_DIGEST_CACHE[original] age = now - ts if age < CACHE_TTL: logger.debug( f"Using cached remote digest for {original} " f"(age={int(age)}s, ttl={CACHE_TTL}s): {digest}" ) return digest logger.info( f"Cache entry expired for {original} " f"(age={int(age)}s > ttl={CACHE_TTL}s)" ) try: if "/" not in image_name: registry = "docker.io" repo = "library/" + image_name else: parts = image_name.split("/") if "." in parts[0] or ":" in parts[0]: registry = parts[0] repo = "/".join(parts[1:]) else: registry = "docker.io" repo = image_name if ":" in repo: repo, tag = repo.rsplit(":", 1) else: tag = "latest" logger.debug( f"Resolving remote digest for {original}: " f"registry={registry}, repo={repo}, tag={tag}" ) token = None manifest_url = None if registry in ["docker.io", "registry-1.docker.io"]: logger.debug(f"Requesting Docker Hub token for {repo}") token_res = requests.get( "https://auth.docker.io/token", params={ "service": "registry.docker.io", "scope": f"repository:{repo}:pull" }, timeout=10 ) logger.debug( f"Docker Hub token response for {repo}: " f"status={token_res.status_code}" ) token = token_res.json().get("token") manifest_url = ( f"https://registry-1.docker.io/v2/{repo}/manifests/{tag}" ) elif registry == "ghcr.io": logger.debug(f"Requesting GHCR token for {repo}") token_res = requests.get( "https://ghcr.io/token", params={ "service": "ghcr.io", "scope": f"repository:{repo}:pull" }, timeout=10 ) logger.debug( f"GHCR token response for {repo}: " f"status={token_res.status_code}" ) token = token_res.json().get("token") manifest_url = f"https://ghcr.io/v2/{repo}/manifests/{tag}" else: logger.warning( f"Unsupported registry '{registry}' for image {original}" ) return None if not token: logger.warning( f"No authentication token returned for {original}" ) return None logger.debug(f"Requesting manifest for {original}: {manifest_url}") res = requests.get( manifest_url, headers={ "Authorization": f"Bearer {token}", "Accept": "application/vnd.docker.distribution.manifest.v2+json" }, timeout=10 ) logger.debug( f"Manifest response for {original}: " f"status={res.status_code}" ) if res.status_code == 200: digest = res.headers.get("Docker-Content-Digest") logger.info( f"Fetched remote digest for {original}: {digest}" ) REMOTE_DIGEST_CACHE[original] = (digest, now) save_cache() logger.debug( f"Cached remote digest for {original}: {digest}" ) return digest if res.status_code == 429: logger.warning( f"Registry rate limit hit while fetching {original}" ) elif res.status_code in [401, 403]: logger.warning( f"Authentication failed while fetching {original}: " f"status={res.status_code}" ) else: logger.warning( f"Unexpected manifest response for {original}: " f"status={res.status_code}, body={res.text[:250]}" ) except Exception as e: logger.error(f"Failed to fetch remote digest for {original}: {e}") return None def get_compose_files_from_script(script_path): files = [] if not os.path.exists(script_path): logger.error(f"services-up script not found: {script_path}") return files base_dir = os.path.dirname(script_path) try: with open(script_path, "r") as f: content = f.read() match = re.search(r'FILES\s*=\s*\((.*?)\)', content, re.DOTALL) if not match: logger.warning( f"No FILES=(...) block found in {script_path}" ) return files lines = match.group(1).splitlines() for line in lines: line = line.strip() if line.startswith("-f"): rel_path = line[2:].strip() if rel_path: full_path = os.path.normpath( os.path.join(base_dir, rel_path) ) logger.debug( f"Resolved compose file: {rel_path} -> {full_path}" ) files.append(full_path) logger.info(f"Found {len(files)} compose files") except Exception as e: logger.error(f"Failed parsing compose files from {script_path}: {e}") return files def parse_compose_files(compose_files): service_to_image = {} for f in compose_files: if not os.path.exists(f): logger.warning(f"Compose file missing: {f}") continue try: with open(f, "r") as stream: data = yaml.safe_load(stream) or {} services = data.get("services", {}) logger.debug( f"Parsing {len(services)} services from compose file {f}" ) for service_name, service_def in services.items(): image = service_def.get("image") is_built = False if not image: is_built = True build_ctx = service_def.get("build") logger.debug( f"Service {service_name} is build-based, build config={build_ctx}" ) if isinstance(build_ctx, dict): context_path = build_ctx.get("context", ".") dockerfile_path = os.path.join( context_path, build_ctx.get("dockerfile", "Dockerfile") ) elif isinstance(build_ctx, str): context_path = build_ctx dockerfile_path = os.path.join( context_path, "Dockerfile" ) else: dockerfile_path = None if dockerfile_path and os.path.exists(dockerfile_path): try: with open(dockerfile_path, "r") as df: for line in df: line = line.strip() if ( line.upper().startswith("LABEL") and "image=" in line ): m = re.search( r'image=["\']?([^"\']+)["\']?', line ) if m: image = m.group(1) logger.debug( f"Found upstream image label for {service_name}: {image}" ) break except Exception as e: logger.warning( f"Failed reading Dockerfile {dockerfile_path}: {e}" ) if not image: image = f"{service_name}:latest" logger.info( f"No image label found for build service {service_name}, " f"defaulting to {image}" ) service_to_image[service_name] = (image, is_built) except Exception as e: logger.error(f"Failed parsing compose file {f}: {e}") logger.info(f"Mapped {len(service_to_image)} compose services to images") logger.debug(f"Service/image mapping: {service_to_image}") return service_to_image def check_containers(): logger.info("Starting container update check") CONTAINER_UPDATE.clear() project_prefix = get_project_prefix_from_script(SERVICES_UP_SCRIPT) compose_files = get_compose_files_from_script(SERVICES_UP_SCRIPT) service_to_image = parse_compose_files(compose_files) containers = client.containers.list() logger.info(f"Checking {len(containers)} running containers") for container in containers: project_label = container.labels.get("com.docker.compose.project") if not project_label: logger.debug( f"Skipping non-compose container {container.name}" ) continue service_label = container.labels.get("com.docker.compose.service") running_image = container.attrs["Config"]["Image"] logger.debug( f"Evaluating container={container.name}, " f"service={service_label}, project={project_label}, " f"running_image={running_image}" ) compose_image = None is_built = False if service_label and service_label in service_to_image: compose_image, is_built = service_to_image[service_label] if is_built: compose_image_name, _, _ = compose_image.partition(":") compose_image = f"{project_prefix}{compose_image_name}" update_flag = 0 if is_built: if running_image != compose_image: logger.info( f"Update detected for build-based container {container.name}: " f"running image {running_image} != expected {compose_image}" ) update_flag = 1 else: local_digest = get_local_digest(running_image) remote_digest = get_remote_digest( service_to_image[service_label][0] ) if local_digest and remote_digest and local_digest != remote_digest: logger.info( f"Remote image update available for {container.name}: " f"{local_digest} != {remote_digest}" ) update_flag = 1 else: if running_image != compose_image: logger.info( f"Compose drift detected for {container.name}: " f"running image {running_image} != compose image {compose_image}" ) update_flag = 1 else: local_digest = get_local_digest(running_image) remote_digest = get_remote_digest(running_image) if local_digest and remote_digest and local_digest != remote_digest: logger.info( f"Registry update available for {container.name}: " f"{local_digest} != {remote_digest}" ) update_flag = 1 CONTAINER_UPDATE.labels( container=container.name, compose_image=compose_image if compose_image else "unknown", running_image=running_image, com_docker_compose_project=project_label ).set(update_flag) logger.info( f"Container {container.name}: " f"running={running_image}, " f"compose={compose_image}, " f"update_available={update_flag}" ) LAST_CHECK.set(time.time()) logger.info("Container update check complete") if __name__ == "__main__": logger.info( f"Starting Docker update exporter on port {EXPORTER_PORT} " f"with LOG_LEVEL={LOG_LEVEL}" ) start_http_server(EXPORTER_PORT) while True: try: check_containers() except Exception as e: logger.exception(f"Unhandled error during update check: {e}") time.sleep(CHECK_INTERVAL)