Files
consentos/docker-compose.prod.yml
James Cottrill e0f1dd43e8 fix(scanner): reliable cookie discovery, auto-categorisation, and scan scheduling UI (#7)
Scanner fixes:
- Remove conflicting ``path`` from consent pre-seed cookie (Playwright
  rejects cookies with both ``url`` and ``path``).
- Switch to ``networkidle`` + 5s + 2s delayed second-pass for reliable
  cookie capture.
- Check sitemap Content-Type to skip SPA HTML fallbacks.
- Propagate ``auto_category`` from scan results to the cookies table
  during sync (was silently dropped).
- Add ``_gcl_ls`` to the Open Cookie Database CSV.
- Classify ``_consentos_*`` cookies as necessary directly in the
  classification engine.
- Add ``seed_known_cookies`` to the bootstrap init container command.

Admin UI:
- Add scan schedule control to the Scans tab — preset options
  (disabled/daily/weekly/fortnightly/monthly) plus custom cron input.
  Saves ``scan_schedule_cron`` on the site config.
2026-04-18 20:14:32 +01:00

211 lines
6.8 KiB
YAML

# Single-host production deployment.
#
# Differences from ``docker-compose.yml`` (dev):
# - Ports bound to ``127.0.0.1`` only — expects a reverse proxy on
# the host (e.g. Caddy) to terminate TLS and forward.
# - A one-shot ``consentos-bootstrap`` init container owns all
# database setup (alembic + initial admin provisioning); every
# long-running service that touches the DB waits for it via
# ``service_completed_successfully``.
# - Per-service resource limits, healthchecks, and dependency
# ordering so ``docker compose up -d`` gives a consistent start.
# - The scanner gets its own scoped ``environment:`` block rather
# than ``env_file: .env`` so unrelated variables (``PORT``,
# ``HOST``, …) from the shared env can't rebind its settings.
# - ``shm_size: 1gb`` on the scanner — Playwright/Chromium crashes
# under the default 64 MB ``/dev/shm``.
services:
# ── Init container: migrations + initial admin bootstrap ──────────
consentos-bootstrap:
build:
context: apps/api
dockerfile: Dockerfile
container_name: consentos-bootstrap
env_file: .env
working_dir: /app
command:
- "sh"
- "-c"
- "python -m alembic upgrade head && python -m src.cli.bootstrap_admin && python -m src.cli.seed_known_cookies"
restart: "no"
depends_on:
postgres:
condition: service_healthy
deploy:
resources:
limits:
memory: 256M
# ── API ──────────────────────────────────────────────────────────
consentos-api:
build:
context: apps/api
dockerfile: Dockerfile
container_name: consentos-api
env_file: .env
restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
interval: 30s
timeout: 5s
start_period: 15s
retries: 3
ports:
- "127.0.0.1:11001:8000"
deploy:
resources:
limits:
memory: 512M
depends_on:
consentos-bootstrap:
condition: service_completed_successfully
redis:
condition: service_healthy
# ── Celery worker ────────────────────────────────────────────────
consentos-worker:
build:
context: apps/api
dockerfile: Dockerfile
container_name: consentos-worker
env_file: .env
working_dir: /app
command: >
celery -A src.celery_app worker
--loglevel=info --concurrency=2
restart: unless-stopped
healthcheck:
test: ["CMD-SHELL", "celery -A src.celery_app inspect ping -d celery@$${HOSTNAME} || exit 1"]
interval: 30s
timeout: 10s
start_period: 30s
retries: 3
depends_on:
consentos-bootstrap:
condition: service_completed_successfully
consentos-scanner:
condition: service_healthy
redis:
condition: service_healthy
deploy:
resources:
limits:
memory: 512M
# ── Celery beat ──────────────────────────────────────────────────
consentos-beat:
build:
context: apps/api
dockerfile: Dockerfile
container_name: consentos-beat
env_file: .env
working_dir: /app
command: >
celery -A src.celery_app beat
--loglevel=info
restart: unless-stopped
# Beat has no HTTP surface and no inspect endpoint — rely on the
# container exit status rather than a fake healthcheck so it
# doesn't permanently show as "unhealthy".
healthcheck:
disable: true
depends_on:
consentos-bootstrap:
condition: service_completed_successfully
redis:
condition: service_healthy
deploy:
resources:
limits:
memory: 256M
# ── Scanner (Playwright / Chromium) ──────────────────────────────
consentos-scanner:
build:
context: apps/scanner
dockerfile: Dockerfile
container_name: consentos-scanner
# Scoped environment — do NOT env_file the shared .env here or
# vars like PORT bleed across and rebind the scanner off its
# default 8001 (which is what SCANNER_SERVICE_URL expects).
environment:
LOG_LEVEL: ${LOG_LEVEL:-INFO}
CRAWLER_HEADLESS: "true"
CRAWLER_TIMEOUT_MS: "30000"
MAX_PAGES_PER_SCAN: "50"
restart: unless-stopped
# Chromium crashes under /dev/shm pressure on sites with many
# iframes or heavy DOM trees. Default is 64 MB — not enough.
shm_size: "1gb"
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8001/health"]
interval: 30s
timeout: 10s
start_period: 30s
retries: 3
deploy:
resources:
limits:
memory: 1G
# ── Admin UI + banner CDN (single nginx image) ───────────────────
consentos-admin:
build:
# Context is the repo root so the Dockerfile can pull in
# apps/banner/ alongside apps/admin-ui/ and bake the banner
# output at the nginx root — see apps/admin-ui/Dockerfile.
context: .
dockerfile: apps/admin-ui/Dockerfile
container_name: consentos-admin
restart: unless-stopped
ports:
- "127.0.0.1:11002:80"
deploy:
resources:
limits:
memory: 128M
# ── Postgres ─────────────────────────────────────────────────────
postgres:
image: postgres:17-alpine
container_name: consentos-postgres
environment:
POSTGRES_USER: ${POSTGRES_USER}
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
POSTGRES_DB: ${POSTGRES_DB}
volumes:
- pgdata:/var/lib/postgresql/data
healthcheck:
test: ["CMD-SHELL", "pg_isready -U $$POSTGRES_USER"]
interval: 5s
timeout: 5s
retries: 5
restart: unless-stopped
deploy:
resources:
limits:
memory: 512M
# ── Redis ────────────────────────────────────────────────────────
redis:
image: redis:7-alpine
container_name: consentos-redis
command: redis-server --requirepass ${REDIS_PASSWORD} --appendonly yes
volumes:
- redisdata:/data
healthcheck:
test: ["CMD-SHELL", "redis-cli -a $$REDIS_PASSWORD ping"]
interval: 2s
timeout: 3s
retries: 10
restart: unless-stopped
deploy:
resources:
limits:
memory: 128M
volumes:
pgdata:
redisdata: