Files
docker/monitoring/docker-exporter/exporter.py
T
2026-04-01 09:45:57 +10:00

411 lines
14 KiB
Python

#!/usr/bin/env python3
import argparse
import os
import re
import time
import json
import logging
import docker
import requests
import yaml
from prometheus_client import Gauge, start_http_server
# --- Logging ---
LOG_LEVEL = os.getenv("LOG_LEVEL", "DEBUG").upper()
logging.basicConfig(
level=getattr(logging, LOG_LEVEL, logging.DEBUG),
format="%(asctime)s [%(levelname)s] %(message)s"
)
logger = logging.getLogger("docker-update-exporter")
# --- Config ---
EXPORTER_PORT = 9105
CHECK_INTERVAL = 60
CACHE_TTL = 6 * 3600
SERVICES_UP_SCRIPT = os.getenv("SERVICES_UP_SCRIPT", "/compose/services-up.sh")
CACHE_FILE = os.getenv("CACHE_FILE", "/data/remote_digest_cache.json")
DRY_RUN = os.getenv("DRY_RUN", "false").lower() in ("1", "true", "yes")
try:
client = docker.from_env()
except Exception as e:
logger.warning(f"Docker client unavailable at startup: {e}")
client = None
# --- Metrics ---
CONTAINER_UPDATE = Gauge(
"docker_container_update_available",
"1 if container image is out of date (compose drift or registry), 0 otherwise",
["container", "compose_image", "running_image", "com_docker_compose_project"]
)
LAST_CHECK = Gauge(
"docker_image_update_last_check_timestamp",
"Last time the update check ran (unix timestamp)"
)
# --- Persistent Cache ---
def load_cache():
if not os.path.exists(CACHE_FILE):
logger.info(f"Cache file does not exist yet: {CACHE_FILE}")
return {}
try:
with open(CACHE_FILE, "r") as f:
cache = json.load(f)
logger.info(f"Loaded {len(cache)} cached remote digests")
return cache
except Exception as e:
logger.error(f"Failed to load cache: {e}")
return {}
def save_cache():
try:
os.makedirs(os.path.dirname(CACHE_FILE), exist_ok=True)
with open(CACHE_FILE, "w") as f:
json.dump(REMOTE_DIGEST_CACHE, f)
logger.debug(f"Saved {len(REMOTE_DIGEST_CACHE)} remote digests to cache")
except Exception as e:
logger.error(f"Failed to save cache: {e}")
REMOTE_DIGEST_CACHE = load_cache()
# --- Helpers ---
def get_project_prefix_from_script(script_path):
prefix = "core-"
if not os.path.exists(script_path):
return prefix
try:
with open(script_path) as f:
for line in f:
m = re.match(r'PROJECT\s*=\s*["\']?([^"\']+)', line)
if m:
return m.group(1) + "-"
except Exception as e:
logger.warning(f"Failed reading project prefix: {e}")
return prefix
def get_local_digest(image_name):
if client is None:
return None
try:
img = client.images.get(image_name)
digests = img.attrs.get("RepoDigests", [])
if digests:
digest = digests[0].split("@")[1]
logger.debug(f"Local digest for {image_name}: {digest}")
return digest
logger.debug(f"No local digest found for {image_name}")
except Exception:
pass
return None
def get_remote_digest(image_name):
now = time.time()
if image_name in REMOTE_DIGEST_CACHE:
digest, ts = REMOTE_DIGEST_CACHE[image_name]
if now - ts < CACHE_TTL:
return digest
try:
if "/" not in image_name:
registry = "docker.io"
repo = "library/" + image_name
else:
parts = image_name.split("/")
if "." in parts[0] or ":" in parts[0]:
registry = parts[0]
repo = "/".join(parts[1:])
else:
registry = "docker.io"
repo = image_name
if ":" in repo:
repo, tag = repo.rsplit(":", 1)
else:
tag = "latest"
if registry in ["docker.io", "registry-1.docker.io"]:
token_res = requests.get(
"https://auth.docker.io/token",
params={"service": "registry.docker.io", "scope": f"repository:{repo}:pull"},
timeout=10
)
token = token_res.json().get("token")
manifest_url = f"https://registry-1.docker.io/v2/{repo}/manifests/{tag}"
elif registry == "ghcr.io":
token_res = requests.get(
"https://ghcr.io/token",
params={"service": "ghcr.io", "scope": f"repository:{repo}:pull"},
timeout=10
)
token = token_res.json().get("token")
manifest_url = f"https://ghcr.io/v2/{repo}/manifests/{tag}"
else:
logger.warning(f"Unsupported registry {registry} for {image_name}")
return None
if not token:
return None
res = requests.get(
manifest_url,
headers={"Authorization": f"Bearer {token}", "Accept": "application/vnd.docker.distribution.manifest.v2+json"},
timeout=10
)
if res.status_code == 200:
digest = res.headers.get("Docker-Content-Digest")
REMOTE_DIGEST_CACHE[image_name] = (digest, now)
save_cache()
return digest
except Exception as e:
logger.debug(f"Error fetching remote digest for {image_name}: {e}")
return None
# --- Dockerfile Image Extraction ---
def parse_dockerfile_for_image(dockerfile_path):
if not os.path.exists(dockerfile_path):
return None
image_name = None
try:
with open(dockerfile_path) as df:
for line in df:
line = line.strip()
# Prefer LABEL with image if present
if "LABEL" in line and "image=" in line:
match = re.search(r'image=["\']?([^"\']+)["\']?', line)
if match:
image_name = match.group(1)
logger.debug(f"Found LABEL image={image_name} in {dockerfile_path}")
return image_name
# If no LABEL, use the last FROM line as fallback
df.seek(0)
last_from = None
for line in df:
line = line.strip()
if line.upper().startswith("FROM "):
parts = line.split()
if len(parts) >= 2:
last_from = parts[1]
if last_from:
logger.debug(f"Found base FROM {last_from} in {dockerfile_path}")
return last_from
except Exception as e:
logger.debug(f"Error reading Dockerfile {dockerfile_path}: {e}")
return image_name
def normalize_image_name(image_name):
if not image_name:
return None
if "@" in image_name:
return image_name
if ":" in image_name.rsplit("/", 1)[-1]:
return image_name
return f"{image_name}:latest"
def expand_compose_path(path_value, project_root):
raw = str(path_value)
raw = raw.replace("${PROJECT_ROOT}", project_root).replace("$PROJECT_ROOT", project_root)
return os.path.expandvars(raw)
# --- Compose parsing ---
def get_compose_files_from_script(script_path):
files = []
if not os.path.exists(script_path):
return files
base_dir = os.path.dirname(script_path)
try:
with open(script_path) as f:
content = f.read()
match = re.search(r'FILES\s*=\s*\((.*?)\)', content, re.DOTALL)
if match:
for line in match.group(1).splitlines():
line = line.strip()
if line.startswith("-f"):
path = line[2:].strip()
if path:
full = os.path.normpath(os.path.join(base_dir, path))
files.append(full)
except Exception as e:
logger.warning(f"Failed parsing services-up.sh: {e}")
return files
def parse_project_name_from_script(script_path):
project = "core"
if not os.path.exists(script_path):
return project
try:
with open(script_path) as f:
for line in f:
m = re.match(r'PROJECT\s*=\s*["\']?([^"\']+)', line)
if m:
project = m.group(1)
break
except Exception as e:
logger.warning(f"Failed reading project name: {e}")
return project
def resolve_local_build_image(service_name, project_name):
if client is None:
return None
try:
images = client.images.list(filters={"label": f"com.docker.compose.service={service_name}"})
for image in images:
labels = image.attrs.get("Config", {}).get("Labels", {}) or {}
if labels.get("com.docker.compose.project") != project_name:
continue
for tag in image.tags:
if tag and "<none>" not in tag:
logger.debug(f"Resolved local compose image for {service_name}: {tag}")
return normalize_image_name(tag)
except Exception as e:
logger.debug(f"Could not inspect local build metadata for {service_name}: {e}")
return None
def parse_compose_services(compose_files, project_name, project_root):
svc_map = {}
for f in compose_files:
if not os.path.exists(f):
logger.warning(f"Compose file from services-up.sh is missing: {f}")
continue
try:
with open(f) as stream:
data = yaml.safe_load(stream) or {}
for svc_name, svc_def in data.get("services", {}).items():
image = normalize_image_name(svc_def.get("image"))
profiles = svc_def.get("profiles", [])
build_ctx = svc_def.get("build")
dockerfile_path = None
from_dockerfile = None
local_built_image = None
if build_ctx:
if isinstance(build_ctx, dict):
context = build_ctx.get("context", ".")
dockerfile = build_ctx.get("dockerfile", "Dockerfile")
else:
context = build_ctx
dockerfile = "Dockerfile"
compose_dir = os.path.dirname(f)
context_expanded = expand_compose_path(context, project_root)
if os.path.isabs(context_expanded):
context_path = context_expanded
else:
context_path = os.path.normpath(os.path.join(compose_dir, context_expanded))
dockerfile_expanded = expand_compose_path(dockerfile, project_root)
dockerfile_path = os.path.normpath(os.path.join(context_path, dockerfile_expanded))
from_dockerfile = normalize_image_name(parse_dockerfile_for_image(dockerfile_path))
local_built_image = resolve_local_build_image(svc_name, project_name)
resolved_image = image or local_built_image or from_dockerfile or f"{project_name}-{svc_name}:latest"
svc_map[svc_name] = {
"image": resolved_image,
"profiles": profiles,
"build_context": build_ctx,
"compose_file": f,
"dockerfile": dockerfile_path
}
except Exception as e:
logger.warning(f"Failed parsing {f}: {e}")
logger.debug(f"Service image mapping: {svc_map}")
return svc_map
# --- Main check ---
def check_containers():
if client is None:
logger.error("Docker client is unavailable; skipping check cycle")
return
CONTAINER_UPDATE.clear()
project_name = parse_project_name_from_script(SERVICES_UP_SCRIPT)
project_root = os.path.dirname(SERVICES_UP_SCRIPT)
compose_files = get_compose_files_from_script(SERVICES_UP_SCRIPT)
svc_map = parse_compose_services(compose_files, project_name, project_root)
containers = client.containers.list()
for container in containers:
proj = container.labels.get("com.docker.compose.project")
if not proj:
continue
svc = container.labels.get("com.docker.compose.service")
running = container.attrs["Config"]["Image"]
compose_image = None
if svc in svc_map:
compose_image = svc_map[svc]["image"]
update_flag = 0
local_digest = get_local_digest(running)
remote_target = compose_image or running
remote_digest = get_remote_digest(remote_target)
if local_digest and remote_digest and local_digest != remote_digest:
update_flag = 1
CONTAINER_UPDATE.labels(
container=container.name,
compose_image=compose_image or "unknown",
running_image=running,
com_docker_compose_project=proj
).set(update_flag)
def dump_service_image_mapping():
project_name = parse_project_name_from_script(SERVICES_UP_SCRIPT)
project_root = os.path.dirname(SERVICES_UP_SCRIPT)
compose_files = get_compose_files_from_script(SERVICES_UP_SCRIPT)
svc_map = parse_compose_services(compose_files, project_name, project_root)
mapping = {name: data["image"] for name, data in sorted(svc_map.items())}
logger.info("Service to image mapping:")
logger.info(json.dumps(mapping, indent=2, sort_keys=True))
return mapping
# --- Runner ---
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Docker image update exporter")
parser.add_argument("--dry-run", action="store_true", help="Only print service->image mapping and exit")
parser.add_argument(
"--services-up-script",
default=SERVICES_UP_SCRIPT,
help=f"Path to services-up script (default: {SERVICES_UP_SCRIPT})",
)
parser.add_argument(
"--cache-file",
default=CACHE_FILE,
help=f"Path to digest cache file (default: {CACHE_FILE})",
)
parser.add_argument(
"--log-level",
default=LOG_LEVEL,
help=f"Logging level (default: {LOG_LEVEL})",
)
args = parser.parse_args()
effective_log_level = str(args.log_level).upper()
logging.getLogger().setLevel(getattr(logging, effective_log_level, logging.DEBUG))
logger.setLevel(getattr(logging, effective_log_level, logging.DEBUG))
SERVICES_UP_SCRIPT = args.services_up_script
CACHE_FILE = args.cache_file
REMOTE_DIGEST_CACHE = load_cache()
if DRY_RUN or args.dry_run:
dump_service_image_mapping()
raise SystemExit(0)
start_http_server(EXPORTER_PORT)
while True:
try:
check_containers()
except Exception as e:
logger.exception(f"update check failed: {e}")
time.sleep(CHECK_INTERVAL)