import time import docker from prometheus_client import start_http_server, Gauge, Counter from concurrent.futures import ThreadPoolExecutor client = docker.from_env() cpu_gauge = Gauge("container_cpu_percent", "CPU usage %", ["name", "container_label_coolify_projectName", "container_label_coolify_serviceName"]) mem_gauge = Gauge("container_mem_bytes", "Memory usage bytes", ["name", "container_label_coolify_projectName", "container_label_coolify_serviceName"]) net_rx_counter = Counter("container_net_rx_bytes", "Network RX bytes", ["name", "container_label_coolify_projectName", "container_label_coolify_serviceName"]) net_tx_counter = Counter("container_net_tx_bytes", "Network TX bytes", ["name", "container_label_coolify_projectName", "container_label_coolify_serviceName"]) net_rx_prev = {} net_tx_prev = {} def calc_cpu(stats): cd = stats["cpu_stats"]["cpu_usage"]["total_usage"] - stats["precpu_stats"]["cpu_usage"]["total_usage"] sd = stats["cpu_stats"]["system_cpu_usage"] - stats["precpu_stats"]["system_cpu_usage"] cpus = stats["cpu_stats"].get("online_cpus", 1) return (cd / sd) * cpus * 100 if sd > 0 else 0 def collect_one(c): try: stats = c.stats(stream=False) name = c.name labels = c.labels project = labels.get("coolify.projectName", labels.get("com.docker.compose.project", "")) service = labels.get("coolify.service.subName", labels.get("com.docker.compose.service", "")) cpu_gauge.labels(name, project, service).set(calc_cpu(stats)) mem_gauge.labels(name, project, service).set(stats["memory_stats"].get("usage", 0)) net = stats.get("networks", {}) rx = sum(v["rx_bytes"] for v in net.values()) tx = sum(v["tx_bytes"] for v in net.values()) key = name if key in net_rx_prev: rx_delta = max(0, rx - net_rx_prev[key]) tx_delta = max(0, tx - net_tx_prev[key]) net_rx_counter.labels(name, project, service).inc(rx_delta) net_tx_counter.labels(name, project, service).inc(tx_delta) net_rx_prev[key] = rx net_tx_prev[key] = tx except Exception: pass def collect(): containers = client.containers.list() with ThreadPoolExecutor(max_workers=10) as executor: executor.map(collect_one, containers) start_http_server(9338) while True: collect() time.sleep(15)