Files
consentos/apps/scanner/tests/test_classifier.py
James Cottrill fbf26453f2 feat: initial public release
ConsentOS — a privacy-first cookie consent management platform.

Self-hosted, source-available alternative to OneTrust, Cookiebot, and
CookieYes. Full standards coverage (IAB TCF v2.2, GPP v1, Google
Consent Mode v2, GPC, Shopify Customer Privacy API), multi-tenant
architecture with role-based access, configuration cascade
(system → org → group → site → region), dark-pattern detection in
the scanner, and a tamper-evident consent record audit trail.

This is the initial public release. Prior development history is
retained internally.

See README.md for the feature list, architecture overview, and
quick-start instructions. Licensed under the Elastic Licence 2.0 —
self-host freely; do not resell as a managed service.
2026-04-14 09:18:18 +00:00

145 lines
5.2 KiB
Python

"""Tests for cookie classification — CMP-21."""
from src.classifier import (
ClassificationResult,
KnownPattern,
_domain_matches,
classify_cookie,
)
# ── Domain matching ──────────────────────────────────────────────────
class TestDomainMatching:
def test_wildcard_matches_any(self):
assert _domain_matches("example.com", "*") is True
def test_exact_match(self):
assert _domain_matches("example.com", "example.com") is True
def test_exact_no_match(self):
assert _domain_matches("other.com", "example.com") is False
def test_subdomain_match(self):
assert _domain_matches("sub.example.com", "example.com") is True
def test_leading_dot_stripped(self):
assert _domain_matches(".example.com", "example.com") is True
def test_pattern_leading_dot(self):
assert _domain_matches("example.com", ".example.com") is True
def test_case_insensitive(self):
assert _domain_matches("Example.COM", "example.com") is True
def test_no_partial_match(self):
# "notexample.com" should NOT match "example.com"
assert _domain_matches("notexample.com", "example.com") is False
# ── Cookie classification ────────────────────────────────────────────
PATTERNS = [
KnownPattern(name_pattern="_ga", domain_pattern="*", category="analytics", vendor="Google"),
KnownPattern(name_pattern="_ga_*", domain_pattern="*", category="analytics", vendor="Google"),
KnownPattern(name_pattern="_gid", domain_pattern="*", category="analytics", vendor="Google"),
KnownPattern(
name_pattern="_fbp", domain_pattern=".facebook.com", category="marketing", vendor="Meta"
),
KnownPattern(
name_pattern="__cf_bm",
domain_pattern="*",
category="necessary",
vendor="Cloudflare",
),
KnownPattern(
name_pattern="_hj.*",
domain_pattern="*",
category="analytics",
vendor="Hotjar",
is_regex=True,
),
KnownPattern(
name_pattern="^_pk_id\\..*",
domain_pattern="*",
category="analytics",
vendor="Matomo",
is_regex=True,
),
]
class TestClassifyCookie:
def test_exact_match(self):
result = classify_cookie("_ga", "example.com", PATTERNS)
assert result.category == "analytics"
assert result.vendor == "Google"
assert result.match_source == "exact"
def test_wildcard_match(self):
result = classify_cookie("_ga_ABC123", "example.com", PATTERNS)
assert result.category == "analytics"
assert result.match_source == "wildcard"
def test_regex_match(self):
result = classify_cookie("_hjSession_123", "example.com", PATTERNS)
assert result.category == "analytics"
assert result.vendor == "Hotjar"
assert result.match_source == "regex"
def test_regex_matomo(self):
result = classify_cookie("_pk_id.1.abc1", "example.com", PATTERNS)
assert result.category == "analytics"
assert result.vendor == "Matomo"
assert result.match_source == "regex"
def test_domain_specific_match(self):
result = classify_cookie("_fbp", "sub.facebook.com", PATTERNS)
assert result.category == "marketing"
assert result.vendor == "Meta"
def test_domain_mismatch(self):
result = classify_cookie("_fbp", "example.com", PATTERNS)
assert result.category is None
assert result.match_source == "unmatched"
def test_unmatched_cookie(self):
result = classify_cookie("unknown_cookie", "example.com", PATTERNS)
assert result.category is None
assert result.match_source == "unmatched"
def test_necessary_cookie(self):
result = classify_cookie("__cf_bm", "example.com", PATTERNS)
assert result.category == "necessary"
assert result.vendor == "Cloudflare"
def test_empty_patterns(self):
result = classify_cookie("_ga", "example.com", [])
assert result.category is None
def test_exact_takes_priority_over_wildcard(self):
"""Exact match should come before wildcard in pattern list."""
patterns = [
KnownPattern(name_pattern="_ga", domain_pattern="*", category="analytics"),
KnownPattern(name_pattern="_ga*", domain_pattern="*", category="marketing"),
]
result = classify_cookie("_ga", "example.com", patterns)
assert result.category == "analytics"
assert result.match_source == "exact"
# ── ClassificationResult ─────────────────────────────────────────────
class TestClassificationResult:
def test_defaults(self):
r = ClassificationResult(category=None)
assert r.vendor is None
assert r.match_source == "unmatched"
def test_with_values(self):
r = ClassificationResult(category="analytics", vendor="Google", match_source="exact")
assert r.category == "analytics"
assert r.vendor == "Google"