feat: initial public release

ConsentOS — a privacy-first cookie consent management platform.

Self-hosted, source-available alternative to OneTrust, Cookiebot, and
CookieYes. Full standards coverage (IAB TCF v2.2, GPP v1, Google
Consent Mode v2, GPC, Shopify Customer Privacy API), multi-tenant
architecture with role-based access, configuration cascade
(system → org → group → site → region), dark-pattern detection in
the scanner, and a tamper-evident consent record audit trail.

This is the initial public release. Prior development history is
retained internally.

See README.md for the feature list, architecture overview, and
quick-start instructions. Licensed under the Elastic Licence 2.0 —
self-host freely; do not resell as a managed service.
This commit is contained in:
James Cottrill
2026-04-13 14:20:15 +00:00
commit fbf26453f2
341 changed files with 62807 additions and 0 deletions

View File

@@ -0,0 +1,869 @@
"""Tests for known cookies database and auto-categorisation engine — CMP-22.
Covers:
- Classification service logic (unit tests — pure functions)
- Pattern matching (exact, wildcard, regex)
- Priority ordering (allow-list → exact → regex → unmatched)
- Known cookie CRUD endpoints (unit tests with mocked DB)
- Classification endpoints (unit tests with mocked DB)
- Schema validation
- Integration tests against live database
"""
import uuid
from datetime import datetime
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from httpx import ASGITransport, AsyncClient
from src.schemas.cookie import (
ClassificationResultResponse,
ClassifySingleRequest,
ClassifySiteResponse,
KnownCookieCreate,
KnownCookieResponse,
KnownCookieUpdate,
)
from src.services.classification import (
ClassificationResult,
MatchSource,
_match_pattern,
_match_regex,
classify_cookie,
)
# ── Schema tests ─────────────────────────────────────────────────────
class TestSchemas:
"""Validate known cookie and classification schemas."""
def test_known_cookie_create(self):
kc = KnownCookieCreate(
name_pattern="_ga",
domain_pattern="*",
category_id=uuid.uuid4(),
vendor="Google",
description="GA cookie",
)
assert kc.is_regex is False
def test_known_cookie_create_regex(self):
kc = KnownCookieCreate(
name_pattern="_hj.*",
domain_pattern=".*",
category_id=uuid.uuid4(),
is_regex=True,
)
assert kc.is_regex is True
def test_known_cookie_update_partial(self):
ku = KnownCookieUpdate(vendor="Updated Vendor")
dumped = ku.model_dump(exclude_unset=True)
assert "vendor" in dumped
assert "category_id" not in dumped
def test_known_cookie_response(self):
resp = KnownCookieResponse(
id=uuid.uuid4(),
name_pattern="_ga",
domain_pattern="*",
category_id=uuid.uuid4(),
is_regex=False,
created_at=datetime.now(),
updated_at=datetime.now(),
)
assert resp.vendor is None
def test_classification_result_response(self):
crr = ClassificationResultResponse(
cookie_name="_ga",
cookie_domain=".example.com",
match_source="known_exact",
matched=True,
)
assert crr.matched is True
def test_classify_single_request(self):
req = ClassifySingleRequest(cookie_name="_ga", cookie_domain=".example.com")
assert req.cookie_name == "_ga"
def test_classify_single_request_validation(self):
with pytest.raises(ValueError):
ClassifySingleRequest(cookie_name="", cookie_domain=".example.com")
def test_classify_site_response(self):
resp = ClassifySiteResponse(
site_id="abc",
total=10,
matched=7,
unmatched=3,
results=[],
)
assert resp.matched == 7
def test_match_source_enum(self):
assert MatchSource.ALLOW_LIST == "allow_list"
assert MatchSource.KNOWN_EXACT == "known_exact"
assert MatchSource.KNOWN_REGEX == "known_regex"
assert MatchSource.UNMATCHED == "unmatched"
# ── Pattern matching unit tests ──────────────────────────────────────
class TestPatternMatching:
"""Test the _match_pattern and _match_regex helpers."""
def test_exact_match(self):
assert _match_pattern("_ga", "_ga") is True
def test_exact_match_case_insensitive(self):
assert _match_pattern("_GA", "_ga") is True
assert _match_pattern("_ga", "_GA") is True
def test_exact_no_match(self):
assert _match_pattern("_ga", "_gid") is False
def test_wildcard_star(self):
assert _match_pattern("*", "_ga") is True
assert _match_pattern("*", "anything") is True
def test_wildcard_prefix(self):
assert _match_pattern("_ga_*", "_ga_ABC123") is True
assert _match_pattern("_ga_*", "_ga_") is True
assert _match_pattern("_ga_*", "_gid") is False
def test_wildcard_suffix(self):
assert _match_pattern("*.google.com", ".google.com") is True
assert _match_pattern("*.google.com", "www.google.com") is True
assert _match_pattern("*.google.com", ".facebook.com") is False
def test_wildcard_middle(self):
assert _match_pattern("_ga*id", "_ga_gid") is True # * matches _g
assert _match_pattern("_ga*id", "_gaid") is True
assert _match_pattern("_ga*id", "_ga") is False # must end in id
def test_empty_values(self):
assert _match_pattern("", "_ga") is False
assert _match_pattern("_ga", "") is False
assert _match_pattern("", "") is False
def test_regex_match(self):
assert _match_regex(r"_hj.*", "_hjSession_12345") is True
assert _match_regex(r"_hj.*", "_ga") is False
def test_regex_case_insensitive(self):
assert _match_regex(r"_hj.*", "_HJSession") is True
def test_regex_anchored(self):
# re.match anchors at start by default
assert _match_regex(r"_pk_id.*", "_pk_id.abc.123") is True
assert _match_regex(r"_pk_id.*", "x_pk_id") is False
def test_regex_invalid_pattern(self):
assert _match_regex(r"[invalid", "test") is False
def test_regex_full_domain_match(self):
assert _match_regex(r".*", ".example.com") is True
def test_wildcard_dynamic_id_suffix(self):
"""Cookies with dynamic IDs should match wildcard prefix patterns."""
assert _match_pattern("_hjSessionUser_*", "_hjSessionUser_1150536") is True
assert _match_pattern("_hjSession_*", "_hjSession_9876543") is True
assert _match_pattern("ri--*", "ri--zC77O2yRxuIvW5fjRAq0RdzNYaF-x") is True
assert _match_pattern("intercom-id-*", "intercom-id-abc123def") is True
assert _match_pattern("amp_*", "amp_ff29a3") is True
assert _match_pattern("mp_*", "mp_abc123_mixpanel") is True
def test_wildcard_does_not_overmatch(self):
"""Wildcard patterns should not match unrelated cookies."""
assert _match_pattern("_hjSessionUser_*", "_hjSession_123") is False
assert _match_pattern("ri--*", "ri-single-dash") is False
assert _match_pattern("intercom-id-*", "intercom-session-xyz") is False
# ── Classification engine unit tests ─────────────────────────────────
def _make_category(slug: str, cat_id: uuid.UUID | None = None):
"""Create a mock CookieCategory."""
cat = MagicMock()
cat.id = cat_id or uuid.uuid4()
cat.slug = slug
return cat
def _make_known(
name_pattern: str,
domain_pattern: str,
category_id: uuid.UUID,
vendor: str | None = None,
description: str | None = None,
is_regex: bool = False,
):
"""Create a mock KnownCookie."""
known = MagicMock()
known.name_pattern = name_pattern
known.domain_pattern = domain_pattern
known.category_id = category_id
known.vendor = vendor
known.description = description
known.is_regex = is_regex
return known
def _make_allow_entry(
name_pattern: str,
domain_pattern: str,
category_id: uuid.UUID,
description: str | None = None,
):
"""Create a mock CookieAllowListEntry."""
entry = MagicMock()
entry.name_pattern = name_pattern
entry.domain_pattern = domain_pattern
entry.category_id = category_id
entry.description = description
return entry
class TestClassifyCookie:
"""Test the classify_cookie pure function."""
def setup_method(self):
self.analytics_cat = _make_category("analytics")
self.marketing_cat = _make_category("marketing")
self.necessary_cat = _make_category("necessary")
self.category_map = {
self.analytics_cat.id: self.analytics_cat,
self.marketing_cat.id: self.marketing_cat,
self.necessary_cat.id: self.necessary_cat,
}
def test_exact_known_match(self):
known = _make_known("_ga", "*", self.analytics_cat.id, vendor="Google")
result = classify_cookie("_ga", ".example.com", [], [known], [], self.category_map)
assert result.matched is True
assert result.match_source == MatchSource.KNOWN_EXACT
assert result.category_slug == "analytics"
assert result.vendor == "Google"
def test_regex_known_match(self):
known = _make_known(
r"_hj.*",
r".*",
self.analytics_cat.id,
vendor="Hotjar",
is_regex=True,
)
result = classify_cookie(
"_hjSession_123",
".example.com",
[],
[],
[known],
self.category_map,
)
assert result.matched is True
assert result.match_source == MatchSource.KNOWN_REGEX
assert result.vendor == "Hotjar"
def test_allow_list_match(self):
entry = _make_allow_entry(
"_custom_cookie",
"*",
self.necessary_cat.id,
description="Site-specific override",
)
result = classify_cookie(
"_custom_cookie",
".example.com",
[entry],
[],
[],
self.category_map,
)
assert result.matched is True
assert result.match_source == MatchSource.ALLOW_LIST
assert result.category_slug == "necessary"
def test_allow_list_takes_priority_over_known(self):
"""Allow-list should override known cookies database."""
allow_entry = _make_allow_entry(
"_ga",
"*",
self.necessary_cat.id,
description="Overridden to necessary",
)
known = _make_known("_ga", "*", self.analytics_cat.id, vendor="Google")
result = classify_cookie(
"_ga",
".example.com",
[allow_entry],
[known],
[],
self.category_map,
)
assert result.match_source == MatchSource.ALLOW_LIST
assert result.category_slug == "necessary"
def test_exact_takes_priority_over_regex(self):
"""Exact match should be preferred over regex match."""
exact = _make_known("_ga", "*", self.analytics_cat.id, vendor="Google")
regex = _make_known(
r"_g.*",
r".*",
self.marketing_cat.id,
vendor="Other",
is_regex=True,
)
result = classify_cookie(
"_ga",
".example.com",
[],
[exact],
[regex],
self.category_map,
)
assert result.match_source == MatchSource.KNOWN_EXACT
assert result.category_slug == "analytics"
def test_unmatched(self):
result = classify_cookie(
"obscure_cookie",
".unknown.com",
[],
[],
[],
self.category_map,
)
assert result.matched is False
assert result.match_source == MatchSource.UNMATCHED
assert result.category_id is None
def test_domain_must_match(self):
"""Cookie should not match if domain pattern doesn't match."""
known = _make_known("_ga", "*.google.com", self.analytics_cat.id)
result = classify_cookie(
"_ga",
".example.com",
[],
[known],
[],
self.category_map,
)
assert result.matched is False
def test_name_must_match(self):
"""Cookie should not match if name pattern doesn't match."""
known = _make_known("_gid", "*", self.analytics_cat.id)
result = classify_cookie(
"_ga",
".example.com",
[],
[known],
[],
self.category_map,
)
assert result.matched is False
def test_wildcard_domain_match(self):
known = _make_known(
"fr",
"*.facebook.com",
self.marketing_cat.id,
vendor="Meta",
)
result = classify_cookie(
"fr",
".facebook.com",
[],
[known],
[],
self.category_map,
)
assert result.matched is True
assert result.vendor == "Meta"
def test_classification_result_fields(self):
result = ClassificationResult(
cookie_name="_ga",
cookie_domain=".example.com",
)
assert result.category_id is None
assert result.match_source == MatchSource.UNMATCHED
assert result.matched is False
# ── Router unit tests (mocked service) ──────────────────────────────
def _mock_db():
"""Create a mock async DB session."""
db = AsyncMock()
mock_result = MagicMock()
mock_result.scalar_one_or_none.return_value = MagicMock()
mock_result.scalars.return_value.all.return_value = []
db.execute.return_value = mock_result
return db
async def _client(app, db):
"""Create an async test client with mocked DB and auth."""
from src.db import get_db
from src.services.dependencies import get_current_user, require_role
user = MagicMock()
user.organisation_id = uuid.uuid4()
user.role = "owner"
async def _override_get_db():
yield db
app.dependency_overrides[get_db] = _override_get_db
app.dependency_overrides[get_current_user] = lambda: user
def _override_require_role(*_roles):
return lambda: user
app.dependency_overrides[require_role] = _override_require_role
transport = ASGITransport(app=app)
return AsyncClient(transport=transport, base_url="http://test")
class TestKnownCookieRoutes:
"""Test known cookie CRUD endpoints."""
@pytest.mark.asyncio
async def test_list_known_cookies(self, app):
db = _mock_db()
async with await _client(app, db) as client:
resp = await client.get("/api/v1/cookies/known")
assert resp.status_code == 200
assert isinstance(resp.json(), list)
@pytest.mark.asyncio
async def test_create_known_cookie(self, app):
db = _mock_db()
# Mock category validation
cat_result = MagicMock()
cat_result.scalar_one_or_none.return_value = MagicMock()
# Mock the created known cookie
known_mock = MagicMock()
known_mock.id = uuid.uuid4()
known_mock.name_pattern = "_ga"
known_mock.domain_pattern = "*"
known_mock.category_id = uuid.uuid4()
known_mock.vendor = "Google"
known_mock.description = "GA cookie"
known_mock.is_regex = False
known_mock.created_at = datetime.now()
known_mock.updated_at = datetime.now()
call_count = 0
async def mock_execute(stmt):
nonlocal call_count
call_count += 1
if call_count == 1:
# Category validation
return cat_result
return MagicMock()
db.execute = mock_execute
db.flush = AsyncMock()
db.refresh = AsyncMock(side_effect=lambda obj: None)
db.add = MagicMock()
with patch(
"src.routers.cookies.KnownCookie",
return_value=known_mock,
):
async with await _client(app, db) as client:
resp = await client.post(
"/api/v1/cookies/known",
json={
"name_pattern": "_ga",
"domain_pattern": "*",
"category_id": str(uuid.uuid4()),
"vendor": "Google",
},
)
assert resp.status_code == 201
@pytest.mark.asyncio
async def test_get_known_cookie_not_found(self, app):
db = AsyncMock()
mock_result = MagicMock()
mock_result.scalar_one_or_none.return_value = None
db.execute.return_value = mock_result
async with await _client(app, db) as client:
resp = await client.get(f"/api/v1/cookies/known/{uuid.uuid4()}")
assert resp.status_code == 404
class TestClassificationRoutes:
"""Test classification endpoint responses."""
@pytest.mark.asyncio
async def test_classify_preview(self, app):
db = _mock_db()
mock_result = ClassificationResult(
cookie_name="_ga",
cookie_domain=".example.com",
category_id=uuid.uuid4(),
category_slug="analytics",
vendor="Google",
match_source=MatchSource.KNOWN_EXACT,
matched=True,
)
with patch(
"src.routers.cookies.classify_single_cookie",
return_value=mock_result,
):
async with await _client(app, db) as client:
resp = await client.post(
f"/api/v1/cookies/sites/{uuid.uuid4()}/classify/preview",
json={
"cookie_name": "_ga",
"cookie_domain": ".example.com",
},
)
assert resp.status_code == 200
data = resp.json()
assert data["matched"] is True
assert data["match_source"] == "known_exact"
# ── Integration tests ────────────────────────────────────────────────
try:
from tests.conftest import create_test_site, requires_db
except ImportError:
from conftest import create_test_site, requires_db
@requires_db
class TestClassificationIntegration:
"""Integration tests against a live database."""
async def _get_category_id(self, client: AsyncClient, headers: dict, slug: str) -> str:
"""Get a category ID by slug."""
resp = await client.get("/api/v1/cookies/categories", headers=headers)
assert resp.status_code == 200
for cat in resp.json():
if cat["slug"] == slug:
return cat["id"]
pytest.fail(f"Category '{slug}' not found")
async def _create_known_cookie(
self,
client: AsyncClient,
headers: dict,
name_pattern: str,
domain_pattern: str,
category_slug: str,
*,
vendor: str | None = None,
is_regex: bool = False,
) -> str:
"""Create a known cookie and return its ID."""
cat_id = await self._get_category_id(client, headers, category_slug)
resp = await client.post(
"/api/v1/cookies/known",
headers=headers,
json={
"name_pattern": name_pattern,
"domain_pattern": domain_pattern,
"category_id": cat_id,
"vendor": vendor,
"is_regex": is_regex,
},
)
assert resp.status_code == 201, resp.text
return resp.json()["id"]
async def _create_cookie(
self,
client: AsyncClient,
headers: dict,
site_id: str,
name: str,
domain: str,
) -> str:
"""Create a pending cookie on a site and return its ID."""
resp = await client.post(
f"/api/v1/cookies/sites/{site_id}",
headers=headers,
json={"name": name, "domain": domain},
)
assert resp.status_code == 201, resp.text
return resp.json()["id"]
async def test_known_cookies_crud(self, db_client, auth_headers):
"""Test full CRUD lifecycle for known cookies."""
cat_id = await self._get_category_id(db_client, auth_headers, "analytics")
# Create
resp = await db_client.post(
"/api/v1/cookies/known",
headers=auth_headers,
json={
"name_pattern": f"_test_{uuid.uuid4().hex[:6]}",
"domain_pattern": "*",
"category_id": cat_id,
"vendor": "TestVendor",
"description": "Test cookie",
},
)
assert resp.status_code == 201
known_id = resp.json()["id"]
# Read
resp = await db_client.get(
f"/api/v1/cookies/known/{known_id}",
headers=auth_headers,
)
assert resp.status_code == 200
assert resp.json()["vendor"] == "TestVendor"
# Update
resp = await db_client.patch(
f"/api/v1/cookies/known/{known_id}",
headers=auth_headers,
json={"vendor": "UpdatedVendor"},
)
assert resp.status_code == 200
assert resp.json()["vendor"] == "UpdatedVendor"
# List (with search)
resp = await db_client.get(
"/api/v1/cookies/known",
headers=auth_headers,
params={"vendor": "UpdatedVendor"},
)
assert resp.status_code == 200
assert any(k["id"] == known_id for k in resp.json())
# Delete
resp = await db_client.delete(
f"/api/v1/cookies/known/{known_id}",
headers=auth_headers,
)
assert resp.status_code == 204
# Verify deleted
resp = await db_client.get(
f"/api/v1/cookies/known/{known_id}",
headers=auth_headers,
)
assert resp.status_code == 404
async def test_classify_exact_match(self, db_client, auth_headers):
"""Test classification with exact known cookie match."""
site_id = await create_test_site(db_client, auth_headers, domain_prefix="classify-exact")
# Create a known cookie pattern
pattern_name = f"_test_exact_{uuid.uuid4().hex[:6]}"
await self._create_known_cookie(
db_client,
auth_headers,
pattern_name,
"*",
"analytics",
vendor="TestVendor",
)
# Create a pending cookie on the site
await self._create_cookie(
db_client,
auth_headers,
site_id,
pattern_name,
".example.com",
)
# Classify
resp = await db_client.post(
f"/api/v1/cookies/sites/{site_id}/classify",
headers=auth_headers,
)
assert resp.status_code == 200
data = resp.json()
assert data["total"] >= 1
assert data["matched"] >= 1
matched = [r for r in data["results"] if r["matched"]]
assert any(r["cookie_name"] == pattern_name for r in matched)
async def test_classify_regex_match(self, db_client, auth_headers):
"""Test classification with regex known cookie match."""
site_id = await create_test_site(db_client, auth_headers, domain_prefix="classify-regex")
prefix = f"_rx_{uuid.uuid4().hex[:4]}"
# Create regex pattern
await self._create_known_cookie(
db_client,
auth_headers,
f"{prefix}.*",
".*",
"analytics",
vendor="RegexVendor",
is_regex=True,
)
# Create a cookie that should match the regex
await self._create_cookie(
db_client,
auth_headers,
site_id,
f"{prefix}_session_123",
".example.com",
)
# Classify
resp = await db_client.post(
f"/api/v1/cookies/sites/{site_id}/classify",
headers=auth_headers,
)
assert resp.status_code == 200
data = resp.json()
assert data["matched"] >= 1
matched = [r for r in data["results"] if r["matched"]]
assert any(r["match_source"] == "known_regex" for r in matched)
async def test_classify_unmatched(self, db_client, auth_headers):
"""Cookies without known patterns should remain unmatched."""
site_id = await create_test_site(
db_client, auth_headers, domain_prefix="classify-unmatched"
)
unique_name = f"_unknown_{uuid.uuid4().hex[:8]}"
await self._create_cookie(
db_client,
auth_headers,
site_id,
unique_name,
".obscure-domain.com",
)
resp = await db_client.post(
f"/api/v1/cookies/sites/{site_id}/classify",
headers=auth_headers,
)
assert resp.status_code == 200
data = resp.json()
assert data["unmatched"] >= 1
async def test_classify_preview(self, db_client, auth_headers):
"""Test preview classification without saving."""
site_id = await create_test_site(db_client, auth_headers, domain_prefix="classify-preview")
resp = await db_client.post(
f"/api/v1/cookies/sites/{site_id}/classify/preview",
headers=auth_headers,
json={
"cookie_name": "_unknown_cookie",
"cookie_domain": ".test.com",
},
)
assert resp.status_code == 200
data = resp.json()
assert data["matched"] is False
assert data["match_source"] == "unmatched"
async def test_classify_allow_list_priority(self, db_client, auth_headers):
"""Allow-list entries should take priority over known cookies."""
site_id = await create_test_site(db_client, auth_headers, domain_prefix="classify-allow")
cookie_name = f"_priority_{uuid.uuid4().hex[:6]}"
# Add to known cookies as marketing
await self._create_known_cookie(
db_client,
auth_headers,
cookie_name,
"*",
"marketing",
)
# Add to allow-list as necessary (should take priority)
necessary_id = await self._get_category_id(db_client, auth_headers, "necessary")
resp = await db_client.post(
f"/api/v1/cookies/sites/{site_id}/allow-list",
headers=auth_headers,
json={
"name_pattern": cookie_name,
"domain_pattern": "*",
"category_id": necessary_id,
},
)
assert resp.status_code == 201
# Create cookie and classify
await self._create_cookie(
db_client,
auth_headers,
site_id,
cookie_name,
".example.com",
)
resp = await db_client.post(
f"/api/v1/cookies/sites/{site_id}/classify",
headers=auth_headers,
)
assert resp.status_code == 200
data = resp.json()
matched = [r for r in data["results"] if r["cookie_name"] == cookie_name]
assert len(matched) == 1
assert matched[0]["match_source"] == "allow_list"
assert matched[0]["category_id"] == necessary_id
async def test_known_cookies_not_found(self, db_client, auth_headers):
resp = await db_client.get(
f"/api/v1/cookies/known/{uuid.uuid4()}",
headers=auth_headers,
)
assert resp.status_code == 404
async def test_known_cookies_invalid_category(self, db_client, auth_headers):
resp = await db_client.post(
"/api/v1/cookies/known",
headers=auth_headers,
json={
"name_pattern": "_test",
"domain_pattern": "*",
"category_id": str(uuid.uuid4()),
},
)
assert resp.status_code == 400
async def test_known_cookies_auth_required(self, db_client):
"""Known cookie endpoints require authentication."""
resp = await db_client.get("/api/v1/cookies/known")
assert resp.status_code == 401
async def test_classify_empty_site(self, db_client, auth_headers):
"""Classifying a site with no cookies should return empty results."""
site_id = await create_test_site(db_client, auth_headers, domain_prefix="classify-empty")
resp = await db_client.post(
f"/api/v1/cookies/sites/{site_id}/classify",
headers=auth_headers,
)
assert resp.status_code == 200
data = resp.json()
assert data["total"] == 0
assert data["matched"] == 0
async def test_list_known_cookies_search(self, db_client, auth_headers):
"""Test searching known cookies by name pattern."""
unique = uuid.uuid4().hex[:6]
await self._create_known_cookie(
db_client,
auth_headers,
f"_search_{unique}",
"*",
"analytics",
)
resp = await db_client.get(
"/api/v1/cookies/known",
headers=auth_headers,
params={"search": f"_search_{unique}"},
)
assert resp.status_code == 200
results = resp.json()
assert len(results) >= 1
assert all(f"_search_{unique}" in r["name_pattern"] for r in results)