feat: initial public release

ConsentOS — a privacy-first cookie consent management platform.

Self-hosted, source-available alternative to OneTrust, Cookiebot, and
CookieYes. Full standards coverage (IAB TCF v2.2, GPP v1, Google
Consent Mode v2, GPC, Shopify Customer Privacy API), multi-tenant
architecture with role-based access, configuration cascade
(system → org → group → site → region), dark-pattern detection in
the scanner, and a tamper-evident consent record audit trail.

This is the initial public release. Prior development history is
retained internally.

See README.md for the feature list, architecture overview, and
quick-start instructions. Licensed under the Elastic Licence 2.0 —
self-host freely; do not resell as a managed service.
This commit is contained in:
James Cottrill
2026-04-13 14:20:15 +00:00
commit fbf26453f2
341 changed files with 62807 additions and 0 deletions

View File

@@ -0,0 +1,593 @@
"""Tests for scan scheduling, diff engine, and scan endpoints — CMP-24.
Covers:
- Scanner schemas (new additions)
- Scan service (job lifecycle, diff engine, cookie sync)
- Scanner router (trigger, list, detail, diff endpoints)
- Integration tests against live database
"""
import uuid
from datetime import UTC, datetime
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from httpx import ASGITransport, AsyncClient
from src.schemas.scanner import (
CookieDiffItem,
DiffStatus,
ScanDiffResponse,
ScanJobDetailResponse,
ScanResultResponse,
TriggerScanRequest,
)
# ── Schema tests ─────────────────────────────────────────────────────
class TestSchemas:
"""Validate scanner schema additions."""
def test_scan_result_response(self):
r = ScanResultResponse(
id=uuid.uuid4(),
scan_job_id=uuid.uuid4(),
page_url="https://example.com",
cookie_name="_ga",
cookie_domain=".example.com",
storage_type="cookie",
found_at=datetime.now(UTC),
created_at=datetime.now(UTC),
)
assert r.cookie_name == "_ga"
def test_scan_job_detail_response(self):
r = ScanJobDetailResponse(
id=uuid.uuid4(),
site_id=uuid.uuid4(),
status="completed",
trigger="manual",
pages_scanned=5,
pages_total=10,
cookies_found=3,
error_message=None,
started_at=datetime.now(UTC),
completed_at=datetime.now(UTC),
created_at=datetime.now(UTC),
updated_at=datetime.now(UTC),
results=[],
)
assert r.status == "completed"
assert r.results == []
def test_trigger_scan_request(self):
req = TriggerScanRequest(site_id=uuid.uuid4(), max_pages=100)
assert req.max_pages == 100
def test_trigger_scan_request_defaults(self):
req = TriggerScanRequest(site_id=uuid.uuid4())
assert req.max_pages == 50
def test_trigger_scan_max_pages_validation(self):
with pytest.raises(ValueError):
TriggerScanRequest(site_id=uuid.uuid4(), max_pages=0)
with pytest.raises(ValueError):
TriggerScanRequest(site_id=uuid.uuid4(), max_pages=501)
def test_diff_status_values(self):
assert DiffStatus.NEW == "new"
assert DiffStatus.REMOVED == "removed"
assert DiffStatus.CHANGED == "changed"
def test_cookie_diff_item(self):
item = CookieDiffItem(
name="_ga",
domain=".example.com",
storage_type="cookie",
diff_status=DiffStatus.NEW,
details="First scan",
)
assert item.diff_status == "new"
def test_scan_diff_response(self):
resp = ScanDiffResponse(
current_scan_id=uuid.uuid4(),
previous_scan_id=uuid.uuid4(),
new_cookies=[
CookieDiffItem(
name="_ga",
domain=".example.com",
storage_type="cookie",
diff_status=DiffStatus.NEW,
),
],
total_new=1,
)
assert resp.total_new == 1
assert len(resp.new_cookies) == 1
def test_scan_diff_response_no_previous(self):
resp = ScanDiffResponse(
current_scan_id=uuid.uuid4(),
previous_scan_id=None,
)
assert resp.previous_scan_id is None
assert resp.total_new == 0
# ── Diff engine unit tests ───────────────────────────────────────────
class TestDiffEngine:
"""Test the scan diff engine with mocked data."""
def _make_scan_result(
self,
name: str = "_ga",
domain: str = ".example.com",
storage_type: str = "cookie",
script_source: str | None = None,
auto_category: str | None = None,
attributes: dict | None = None,
):
"""Create a mock ScanResult."""
mock = MagicMock()
mock.cookie_name = name
mock.cookie_domain = domain
mock.storage_type = storage_type
mock.script_source = script_source
mock.auto_category = auto_category
mock.attributes = attributes
return mock
def test_result_key(self):
from src.services.scanner import _result_key
mock = self._make_scan_result("_ga", ".example.com", "cookie")
assert _result_key(mock) == ("_ga", ".example.com", "cookie")
def test_result_key_different_storage(self):
from src.services.scanner import _result_key
mock = self._make_scan_result("key", "example.com", "local_storage")
assert _result_key(mock) == ("key", "example.com", "local_storage")
# ── Scan service unit tests ──────────────────────────────────────────
class TestScanService:
"""Test scan service functions with mocked DB."""
@pytest.mark.asyncio
async def test_create_scan_job(self):
from src.services.scanner import create_scan_job
db = AsyncMock()
db.add = MagicMock()
db.flush = AsyncMock()
site_id = uuid.uuid4()
job = await create_scan_job(db, site_id=site_id, trigger="manual", max_pages=10)
assert job.site_id == site_id
assert job.status == "pending"
assert job.trigger == "manual"
assert job.pages_total == 10
db.add.assert_called_once()
@pytest.mark.asyncio
async def test_start_scan_job(self):
from src.services.scanner import start_scan_job
db = AsyncMock()
db.flush = AsyncMock()
job = MagicMock()
job.status = "pending"
job.started_at = None
result = await start_scan_job(db, job)
assert result.status == "running"
assert result.started_at is not None
@pytest.mark.asyncio
async def test_complete_scan_job_success(self):
from src.services.scanner import complete_scan_job
db = AsyncMock()
db.flush = AsyncMock()
job = MagicMock()
result = await complete_scan_job(db, job, pages_scanned=5, cookies_found=10)
assert result.status == "completed"
assert result.pages_scanned == 5
assert result.cookies_found == 10
assert result.completed_at is not None
@pytest.mark.asyncio
async def test_complete_scan_job_failure(self):
from src.services.scanner import complete_scan_job
db = AsyncMock()
db.flush = AsyncMock()
job = MagicMock()
result = await complete_scan_job(db, job, error_message="Connection failed")
assert result.status == "failed"
assert result.error_message == "Connection failed"
@pytest.mark.asyncio
async def test_add_scan_result(self):
from src.services.scanner import add_scan_result
db = AsyncMock()
db.add = MagicMock()
db.flush = AsyncMock()
scan_job_id = uuid.uuid4()
result = await add_scan_result(
db,
scan_job_id=scan_job_id,
page_url="https://example.com",
cookie_name="_ga",
cookie_domain=".example.com",
storage_type="cookie",
auto_category="analytics",
)
assert result.scan_job_id == scan_job_id
assert result.cookie_name == "_ga"
assert result.auto_category == "analytics"
db.add.assert_called_once()
# ── Router unit tests (mocked DB) ───────────────────────────────────
def _mock_auth_user():
"""Create a mock authenticated user."""
from src.schemas.auth import CurrentUser
return CurrentUser(
id=uuid.uuid4(),
organisation_id=uuid.uuid4(),
email="test@example.com",
role="owner",
)
async def _authed_client(app, db, user=None):
"""Create an authenticated test client with mocked DB."""
from src.db import get_db
from src.services.dependencies import get_current_user
if user is None:
user = _mock_auth_user()
async def _override_get_db():
yield db
app.dependency_overrides[get_db] = _override_get_db
app.dependency_overrides[get_current_user] = lambda: user
transport = ASGITransport(app=app)
return AsyncClient(transport=transport, base_url="http://test")
class TestTriggerScan:
"""Test POST /scanner/scans."""
@pytest.mark.asyncio
async def test_trigger_scan_success(self, app):
user = _mock_auth_user()
db = AsyncMock()
# Site exists and belongs to user's org
site_mock = MagicMock()
site_mock.organisation_id = user.organisation_id
site_id = uuid.uuid4()
job_id = uuid.uuid4()
now = datetime.now(UTC)
# Mock scan job returned by create_scan_job
mock_job = MagicMock()
mock_job.id = job_id
mock_job.site_id = site_id
mock_job.status = "pending"
mock_job.trigger = "manual"
mock_job.pages_scanned = 0
mock_job.pages_total = 25
mock_job.cookies_found = 0
mock_job.error_message = None
mock_job.started_at = None
mock_job.completed_at = None
mock_job.created_at = now
mock_job.updated_at = now
# First call: site lookup. Second call: running scan count.
call_count = 0
async def mock_execute(stmt):
nonlocal call_count
call_count += 1
result = MagicMock()
if call_count == 1:
# Site lookup
result.scalar_one_or_none.return_value = site_mock
elif call_count == 2:
# Active scan jobs query — none running
result.scalars.return_value.all.return_value = []
return result
db.execute = mock_execute
db.add = MagicMock()
db.flush = AsyncMock()
with (
patch(
"src.routers.scanner.create_scan_job",
new=AsyncMock(return_value=mock_job),
),
patch("src.tasks.scanner.run_scan", create=True),
):
async with await _authed_client(app, db, user) as client:
resp = await client.post(
"/api/v1/scanner/scans",
json={
"site_id": str(site_id),
"max_pages": 25,
},
)
assert resp.status_code == 201
@pytest.mark.asyncio
async def test_trigger_scan_site_not_found(self, app):
db = AsyncMock()
result = MagicMock()
result.scalar_one_or_none.return_value = None
db.execute = AsyncMock(return_value=result)
async with await _authed_client(app, db) as client:
resp = await client.post(
"/api/v1/scanner/scans",
json={
"site_id": str(uuid.uuid4()),
"max_pages": 50,
},
)
assert resp.status_code == 404
@pytest.mark.asyncio
async def test_trigger_scan_conflict(self, app):
user = _mock_auth_user()
db = AsyncMock()
# Build a non-stale active job so the router raises 409
active_job = MagicMock()
active_job.status = "running"
active_job.created_at = datetime.now(UTC)
active_job.started_at = datetime.now(UTC)
call_count = 0
async def mock_execute(stmt):
nonlocal call_count
call_count += 1
result = MagicMock()
if call_count == 1:
# Site lookup
site_mock = MagicMock()
site_mock.organisation_id = user.organisation_id
result.scalar_one_or_none.return_value = site_mock
elif call_count == 2:
# Active scan jobs query — return a non-stale job
result.scalars.return_value.all.return_value = [active_job]
return result
db.execute = mock_execute
async with await _authed_client(app, db, user) as client:
resp = await client.post(
"/api/v1/scanner/scans",
json={"site_id": str(uuid.uuid4())},
)
assert resp.status_code == 409
class TestListScans:
"""Test GET /scanner/scans/site/{site_id}."""
@pytest.mark.asyncio
async def test_list_scans_success(self, app):
user = _mock_auth_user()
db = AsyncMock()
call_count = 0
async def mock_execute(stmt):
nonlocal call_count
call_count += 1
result = MagicMock()
if call_count == 1:
# Site access check
site_mock = MagicMock()
site_mock.organisation_id = user.organisation_id
result.scalar_one_or_none.return_value = site_mock
else:
# Scan list
result.scalars.return_value.all.return_value = []
return result
db.execute = mock_execute
async with await _authed_client(app, db, user) as client:
resp = await client.get(f"/api/v1/scanner/scans/site/{uuid.uuid4()}")
assert resp.status_code == 200
assert resp.json() == []
class TestGetScan:
"""Test GET /scanner/scans/{scan_id}."""
@pytest.mark.asyncio
async def test_get_scan_not_found(self, app):
db = AsyncMock()
result = MagicMock()
result.scalar_one_or_none.return_value = None
db.execute = AsyncMock(return_value=result)
async with await _authed_client(app, db) as client:
resp = await client.get(f"/api/v1/scanner/scans/{uuid.uuid4()}")
assert resp.status_code == 404
class TestGetScanDiff:
"""Test GET /scanner/scans/{scan_id}/diff."""
@pytest.mark.asyncio
async def test_diff_scan_not_found(self, app):
db = AsyncMock()
result = MagicMock()
result.scalar_one_or_none.return_value = None
db.execute = AsyncMock(return_value=result)
async with await _authed_client(app, db) as client:
resp = await client.get(f"/api/v1/scanner/scans/{uuid.uuid4()}/diff")
assert resp.status_code == 404
# ── Integration tests ────────────────────────────────────────────────
try:
from tests.conftest import create_test_site, requires_db
except ImportError:
from conftest import create_test_site, requires_db
@requires_db
class TestScanIntegration:
"""Integration tests against a live database."""
async def test_trigger_scan(self, db_client, auth_headers):
site_id = await create_test_site(db_client, auth_headers, domain_prefix="scan-trigger")
resp = await db_client.post(
"/api/v1/scanner/scans",
json={"site_id": site_id, "max_pages": 10},
headers=auth_headers,
)
assert resp.status_code == 201
data = resp.json()
assert data["status"] == "pending"
assert data["trigger"] == "manual"
assert data["pages_total"] == 10
async def test_trigger_scan_conflict(self, db_client, auth_headers):
site_id = await create_test_site(db_client, auth_headers, domain_prefix="scan-conflict")
# First scan
resp1 = await db_client.post(
"/api/v1/scanner/scans",
json={"site_id": site_id},
headers=auth_headers,
)
assert resp1.status_code == 201
# Second scan — should conflict
resp2 = await db_client.post(
"/api/v1/scanner/scans",
json={"site_id": site_id},
headers=auth_headers,
)
assert resp2.status_code == 409
async def test_list_scans(self, db_client, auth_headers):
site_id = await create_test_site(db_client, auth_headers, domain_prefix="scan-list")
# Trigger a scan
await db_client.post(
"/api/v1/scanner/scans",
json={"site_id": site_id},
headers=auth_headers,
)
resp = await db_client.get(
f"/api/v1/scanner/scans/site/{site_id}",
headers=auth_headers,
)
assert resp.status_code == 200
scans = resp.json()
assert len(scans) >= 1
assert scans[0]["site_id"] == site_id
async def test_get_scan_detail(self, db_client, auth_headers):
site_id = await create_test_site(db_client, auth_headers, domain_prefix="scan-detail")
create_resp = await db_client.post(
"/api/v1/scanner/scans",
json={"site_id": site_id},
headers=auth_headers,
)
scan_id = create_resp.json()["id"]
resp = await db_client.get(
f"/api/v1/scanner/scans/{scan_id}",
headers=auth_headers,
)
assert resp.status_code == 200
data = resp.json()
assert data["id"] == scan_id
assert "results" in data
async def test_get_scan_diff(self, db_client, auth_headers):
site_id = await create_test_site(db_client, auth_headers, domain_prefix="scan-diff")
create_resp = await db_client.post(
"/api/v1/scanner/scans",
json={"site_id": site_id},
headers=auth_headers,
)
scan_id = create_resp.json()["id"]
resp = await db_client.get(
f"/api/v1/scanner/scans/{scan_id}/diff",
headers=auth_headers,
)
assert resp.status_code == 200
data = resp.json()
assert data["current_scan_id"] == scan_id
# No previous scan, so previous_scan_id should be null
assert data["previous_scan_id"] is None
async def test_scan_not_found(self, db_client, auth_headers):
resp = await db_client.get(
f"/api/v1/scanner/scans/{uuid.uuid4()}",
headers=auth_headers,
)
assert resp.status_code == 404
async def test_list_scans_pagination(self, db_client, auth_headers):
site_id = await create_test_site(db_client, auth_headers, domain_prefix="scan-page")
resp = await db_client.get(
f"/api/v1/scanner/scans/site/{site_id}?limit=5&offset=0",
headers=auth_headers,
)
assert resp.status_code == 200
async def test_trigger_scan_requires_auth(self, db_client):
resp = await db_client.post(
"/api/v1/scanner/scans",
json={"site_id": str(uuid.uuid4())},
)
assert resp.status_code in (401, 403)
async def test_list_scans_requires_auth(self, db_client):
resp = await db_client.get(f"/api/v1/scanner/scans/site/{uuid.uuid4()}")
assert resp.status_code in (401, 403)