consentos/docker-compose.prod.yml

# Single-host production deployment.
#
# Differences from ``docker-compose.yml`` (dev):
#   - Ports bound to ``127.0.0.1`` only — expects a reverse proxy on
#     the host (e.g. Caddy) to terminate TLS and forward.
#   - A one-shot ``consentos-bootstrap`` init container owns all
#     database setup (alembic + initial admin provisioning); every
#     long-running service that touches the DB waits for it via
#     ``service_completed_successfully``.
#   - Per-service resource limits, healthchecks, and dependency
#     ordering so ``docker compose up -d`` gives a consistent start.
#   - The scanner gets its own scoped ``environment:`` block rather
#     than ``env_file: .env`` so unrelated variables (``PORT``,
#     ``HOST``, …) from the shared env can't rebind its settings.
#   - ``shm_size: 1gb`` on the scanner — Playwright/Chromium crashes
#     under the default 64 MB ``/dev/shm``.

services:
  # ── Init container: migrations + initial admin bootstrap ──────────
  consentos-bootstrap:
    build:
      context: apps/api
      dockerfile: Dockerfile
    container_name: consentos-bootstrap
    env_file: .env
    working_dir: /app
    command:
      - "sh"
      - "-c"
      - "python -m alembic upgrade head && python -m src.cli.bootstrap_admin && python -m src.cli.seed_known_cookies"
    restart: "no"
    depends_on:
      postgres:
        condition: service_healthy
    deploy:
      resources:
        limits:
          memory: 256M

  # ── API ──────────────────────────────────────────────────────────
  consentos-api:
    build:
      context: apps/api
      dockerfile: Dockerfile
    container_name: consentos-api
    env_file: .env
    restart: unless-stopped
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
      interval: 30s
      timeout: 5s
      start_period: 15s
      retries: 3
    ports:
      - "127.0.0.1:11001:8000"
    deploy:
      resources:
        limits:
          memory: 512M
    depends_on:
      consentos-bootstrap:
        condition: service_completed_successfully
      redis:
        condition: service_healthy

  # ── Celery worker ────────────────────────────────────────────────
  consentos-worker:
    build:
      context: apps/api
      dockerfile: Dockerfile
    container_name: consentos-worker
    env_file: .env
    working_dir: /app
    command: >
      celery -A src.celery_app worker
      --loglevel=info --concurrency=2
    restart: unless-stopped
    healthcheck:
      test: ["CMD-SHELL", "celery -A src.celery_app inspect ping -d celery@$${HOSTNAME} || exit 1"]
      interval: 30s
      timeout: 10s
      start_period: 30s
      retries: 3
    depends_on:
      consentos-bootstrap:
        condition: service_completed_successfully
      consentos-scanner:
        condition: service_healthy
      redis:
        condition: service_healthy
    deploy:
      resources:
        limits:
          memory: 512M

  # ── Celery beat ──────────────────────────────────────────────────
  consentos-beat:
    build:
      context: apps/api
      dockerfile: Dockerfile
    container_name: consentos-beat
    env_file: .env
    working_dir: /app
    command: >
      celery -A src.celery_app beat
      --loglevel=info
    restart: unless-stopped
    # Beat has no HTTP surface and no inspect endpoint — rely on the
    # container exit status rather than a fake healthcheck so it
    # doesn't permanently show as "unhealthy".
    healthcheck:
      disable: true
    depends_on:
      consentos-bootstrap:
        condition: service_completed_successfully
      redis:
        condition: service_healthy
    deploy:
      resources:
        limits:
          memory: 256M

  # ── Scanner (Playwright / Chromium) ──────────────────────────────
  consentos-scanner:
    build:
      context: apps/scanner
      dockerfile: Dockerfile
    container_name: consentos-scanner
    # Scoped environment — do NOT env_file the shared .env here or
    # vars like PORT bleed across and rebind the scanner off its
    # default 8001 (which is what SCANNER_SERVICE_URL expects).
    environment:
      LOG_LEVEL: ${LOG_LEVEL:-INFO}
      CRAWLER_HEADLESS: "true"
      CRAWLER_TIMEOUT_MS: "30000"
      MAX_PAGES_PER_SCAN: "50"
    restart: unless-stopped
    # Chromium crashes under /dev/shm pressure on sites with many
    # iframes or heavy DOM trees. Default is 64 MB — not enough.
    shm_size: "1gb"
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8001/health"]
      interval: 30s
      timeout: 10s
      start_period: 30s
      retries: 3
    deploy:
      resources:
        limits:
          memory: 1G

  # ── Admin UI + banner CDN (single nginx image) ───────────────────
  consentos-admin:
    build:
      # Context is the repo root so the Dockerfile can pull in
      # apps/banner/ alongside apps/admin-ui/ and bake the banner
      # output at the nginx root — see apps/admin-ui/Dockerfile.
      context: .
      dockerfile: apps/admin-ui/Dockerfile
    container_name: consentos-admin
    restart: unless-stopped
    ports:
      - "127.0.0.1:11002:80"
    deploy:
      resources:
        limits:
          memory: 128M

  # ── Postgres ─────────────────────────────────────────────────────
  postgres:
    image: postgres:17-alpine
    container_name: consentos-postgres
    environment:
      POSTGRES_USER: ${POSTGRES_USER}
      POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
      POSTGRES_DB: ${POSTGRES_DB}
    volumes:
      - pgdata:/var/lib/postgresql/data
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -U $$POSTGRES_USER"]
      interval: 5s
      timeout: 5s
      retries: 5
    restart: unless-stopped
    deploy:
      resources:
        limits:
          memory: 512M

  # ── Redis ────────────────────────────────────────────────────────
  redis:
    image: redis:7-alpine
    container_name: consentos-redis
    command: redis-server --requirepass ${REDIS_PASSWORD} --appendonly yes
    volumes:
      - redisdata:/data
    healthcheck:
      test: ["CMD-SHELL", "redis-cli -a $$REDIS_PASSWORD ping"]
      interval: 2s
      timeout: 3s
      retries: 10
    restart: unless-stopped
    deploy:
      resources:
        limits:
          memory: 128M

volumes:
  pgdata:
  redisdata: