Scanner fixes: - Remove conflicting ``path`` from consent pre-seed cookie (Playwright rejects cookies with both ``url`` and ``path``). - Switch to ``networkidle`` + 5s + 2s delayed second-pass for reliable cookie capture. - Check sitemap Content-Type to skip SPA HTML fallbacks. - Propagate ``auto_category`` from scan results to the cookies table during sync (was silently dropped). - Add ``_gcl_ls`` to the Open Cookie Database CSV. - Classify ``_consentos_*`` cookies as necessary directly in the classification engine. - Add ``seed_known_cookies`` to the bootstrap init container command. Admin UI: - Add scan schedule control to the Scans tab — preset options (disabled/daily/weekly/fortnightly/monthly) plus custom cron input. Saves ``scan_schedule_cron`` on the site config.
211 lines
6.8 KiB
YAML
211 lines
6.8 KiB
YAML
# Single-host production deployment.
|
|
#
|
|
# Differences from ``docker-compose.yml`` (dev):
|
|
# - Ports bound to ``127.0.0.1`` only — expects a reverse proxy on
|
|
# the host (e.g. Caddy) to terminate TLS and forward.
|
|
# - A one-shot ``consentos-bootstrap`` init container owns all
|
|
# database setup (alembic + initial admin provisioning); every
|
|
# long-running service that touches the DB waits for it via
|
|
# ``service_completed_successfully``.
|
|
# - Per-service resource limits, healthchecks, and dependency
|
|
# ordering so ``docker compose up -d`` gives a consistent start.
|
|
# - The scanner gets its own scoped ``environment:`` block rather
|
|
# than ``env_file: .env`` so unrelated variables (``PORT``,
|
|
# ``HOST``, …) from the shared env can't rebind its settings.
|
|
# - ``shm_size: 1gb`` on the scanner — Playwright/Chromium crashes
|
|
# under the default 64 MB ``/dev/shm``.
|
|
|
|
services:
|
|
# ── Init container: migrations + initial admin bootstrap ──────────
|
|
consentos-bootstrap:
|
|
build:
|
|
context: apps/api
|
|
dockerfile: Dockerfile
|
|
container_name: consentos-bootstrap
|
|
env_file: .env
|
|
working_dir: /app
|
|
command:
|
|
- "sh"
|
|
- "-c"
|
|
- "python -m alembic upgrade head && python -m src.cli.bootstrap_admin && python -m src.cli.seed_known_cookies"
|
|
restart: "no"
|
|
depends_on:
|
|
postgres:
|
|
condition: service_healthy
|
|
deploy:
|
|
resources:
|
|
limits:
|
|
memory: 256M
|
|
|
|
# ── API ──────────────────────────────────────────────────────────
|
|
consentos-api:
|
|
build:
|
|
context: apps/api
|
|
dockerfile: Dockerfile
|
|
container_name: consentos-api
|
|
env_file: .env
|
|
restart: unless-stopped
|
|
healthcheck:
|
|
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
|
|
interval: 30s
|
|
timeout: 5s
|
|
start_period: 15s
|
|
retries: 3
|
|
ports:
|
|
- "127.0.0.1:11001:8000"
|
|
deploy:
|
|
resources:
|
|
limits:
|
|
memory: 512M
|
|
depends_on:
|
|
consentos-bootstrap:
|
|
condition: service_completed_successfully
|
|
redis:
|
|
condition: service_healthy
|
|
|
|
# ── Celery worker ────────────────────────────────────────────────
|
|
consentos-worker:
|
|
build:
|
|
context: apps/api
|
|
dockerfile: Dockerfile
|
|
container_name: consentos-worker
|
|
env_file: .env
|
|
working_dir: /app
|
|
command: >
|
|
celery -A src.celery_app worker
|
|
--loglevel=info --concurrency=2
|
|
restart: unless-stopped
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "celery -A src.celery_app inspect ping -d celery@$${HOSTNAME} || exit 1"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
start_period: 30s
|
|
retries: 3
|
|
depends_on:
|
|
consentos-bootstrap:
|
|
condition: service_completed_successfully
|
|
consentos-scanner:
|
|
condition: service_healthy
|
|
redis:
|
|
condition: service_healthy
|
|
deploy:
|
|
resources:
|
|
limits:
|
|
memory: 512M
|
|
|
|
# ── Celery beat ──────────────────────────────────────────────────
|
|
consentos-beat:
|
|
build:
|
|
context: apps/api
|
|
dockerfile: Dockerfile
|
|
container_name: consentos-beat
|
|
env_file: .env
|
|
working_dir: /app
|
|
command: >
|
|
celery -A src.celery_app beat
|
|
--loglevel=info
|
|
restart: unless-stopped
|
|
# Beat has no HTTP surface and no inspect endpoint — rely on the
|
|
# container exit status rather than a fake healthcheck so it
|
|
# doesn't permanently show as "unhealthy".
|
|
healthcheck:
|
|
disable: true
|
|
depends_on:
|
|
consentos-bootstrap:
|
|
condition: service_completed_successfully
|
|
redis:
|
|
condition: service_healthy
|
|
deploy:
|
|
resources:
|
|
limits:
|
|
memory: 256M
|
|
|
|
# ── Scanner (Playwright / Chromium) ──────────────────────────────
|
|
consentos-scanner:
|
|
build:
|
|
context: apps/scanner
|
|
dockerfile: Dockerfile
|
|
container_name: consentos-scanner
|
|
# Scoped environment — do NOT env_file the shared .env here or
|
|
# vars like PORT bleed across and rebind the scanner off its
|
|
# default 8001 (which is what SCANNER_SERVICE_URL expects).
|
|
environment:
|
|
LOG_LEVEL: ${LOG_LEVEL:-INFO}
|
|
CRAWLER_HEADLESS: "true"
|
|
CRAWLER_TIMEOUT_MS: "30000"
|
|
MAX_PAGES_PER_SCAN: "50"
|
|
restart: unless-stopped
|
|
# Chromium crashes under /dev/shm pressure on sites with many
|
|
# iframes or heavy DOM trees. Default is 64 MB — not enough.
|
|
shm_size: "1gb"
|
|
healthcheck:
|
|
test: ["CMD", "curl", "-f", "http://localhost:8001/health"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
start_period: 30s
|
|
retries: 3
|
|
deploy:
|
|
resources:
|
|
limits:
|
|
memory: 1G
|
|
|
|
# ── Admin UI + banner CDN (single nginx image) ───────────────────
|
|
consentos-admin:
|
|
build:
|
|
# Context is the repo root so the Dockerfile can pull in
|
|
# apps/banner/ alongside apps/admin-ui/ and bake the banner
|
|
# output at the nginx root — see apps/admin-ui/Dockerfile.
|
|
context: .
|
|
dockerfile: apps/admin-ui/Dockerfile
|
|
container_name: consentos-admin
|
|
restart: unless-stopped
|
|
ports:
|
|
- "127.0.0.1:11002:80"
|
|
deploy:
|
|
resources:
|
|
limits:
|
|
memory: 128M
|
|
|
|
# ── Postgres ─────────────────────────────────────────────────────
|
|
postgres:
|
|
image: postgres:17-alpine
|
|
container_name: consentos-postgres
|
|
environment:
|
|
POSTGRES_USER: ${POSTGRES_USER}
|
|
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
|
|
POSTGRES_DB: ${POSTGRES_DB}
|
|
volumes:
|
|
- pgdata:/var/lib/postgresql/data
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "pg_isready -U $$POSTGRES_USER"]
|
|
interval: 5s
|
|
timeout: 5s
|
|
retries: 5
|
|
restart: unless-stopped
|
|
deploy:
|
|
resources:
|
|
limits:
|
|
memory: 512M
|
|
|
|
# ── Redis ────────────────────────────────────────────────────────
|
|
redis:
|
|
image: redis:7-alpine
|
|
container_name: consentos-redis
|
|
command: redis-server --requirepass ${REDIS_PASSWORD} --appendonly yes
|
|
volumes:
|
|
- redisdata:/data
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "redis-cli -a $$REDIS_PASSWORD ping"]
|
|
interval: 2s
|
|
timeout: 3s
|
|
retries: 10
|
|
restart: unless-stopped
|
|
deploy:
|
|
resources:
|
|
limits:
|
|
memory: 128M
|
|
|
|
volumes:
|
|
pgdata:
|
|
redisdata:
|