feat: initial public release
ConsentOS — a privacy-first cookie consent management platform. Self-hosted, source-available alternative to OneTrust, Cookiebot, and CookieYes. Full standards coverage (IAB TCF v2.2, GPP v1, Google Consent Mode v2, GPC, Shopify Customer Privacy API), multi-tenant architecture with role-based access, configuration cascade (system → org → group → site → region), dark-pattern detection in the scanner, and a tamper-evident consent record audit trail. This is the initial public release. Prior development history is retained internally. See README.md for the feature list, architecture overview, and quick-start instructions. Licensed under the Elastic Licence 2.0 — self-host freely; do not resell as a managed service.
This commit is contained in:
144
apps/scanner/tests/test_classifier.py
Normal file
144
apps/scanner/tests/test_classifier.py
Normal file
@@ -0,0 +1,144 @@
|
||||
"""Tests for cookie classification — CMP-21."""
|
||||
|
||||
from src.classifier import (
|
||||
ClassificationResult,
|
||||
KnownPattern,
|
||||
_domain_matches,
|
||||
classify_cookie,
|
||||
)
|
||||
|
||||
# ── Domain matching ──────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestDomainMatching:
|
||||
def test_wildcard_matches_any(self):
|
||||
assert _domain_matches("example.com", "*") is True
|
||||
|
||||
def test_exact_match(self):
|
||||
assert _domain_matches("example.com", "example.com") is True
|
||||
|
||||
def test_exact_no_match(self):
|
||||
assert _domain_matches("other.com", "example.com") is False
|
||||
|
||||
def test_subdomain_match(self):
|
||||
assert _domain_matches("sub.example.com", "example.com") is True
|
||||
|
||||
def test_leading_dot_stripped(self):
|
||||
assert _domain_matches(".example.com", "example.com") is True
|
||||
|
||||
def test_pattern_leading_dot(self):
|
||||
assert _domain_matches("example.com", ".example.com") is True
|
||||
|
||||
def test_case_insensitive(self):
|
||||
assert _domain_matches("Example.COM", "example.com") is True
|
||||
|
||||
def test_no_partial_match(self):
|
||||
# "notexample.com" should NOT match "example.com"
|
||||
assert _domain_matches("notexample.com", "example.com") is False
|
||||
|
||||
|
||||
# ── Cookie classification ────────────────────────────────────────────
|
||||
|
||||
|
||||
PATTERNS = [
|
||||
KnownPattern(name_pattern="_ga", domain_pattern="*", category="analytics", vendor="Google"),
|
||||
KnownPattern(name_pattern="_ga_*", domain_pattern="*", category="analytics", vendor="Google"),
|
||||
KnownPattern(name_pattern="_gid", domain_pattern="*", category="analytics", vendor="Google"),
|
||||
KnownPattern(
|
||||
name_pattern="_fbp", domain_pattern=".facebook.com", category="marketing", vendor="Meta"
|
||||
),
|
||||
KnownPattern(
|
||||
name_pattern="__cf_bm",
|
||||
domain_pattern="*",
|
||||
category="necessary",
|
||||
vendor="Cloudflare",
|
||||
),
|
||||
KnownPattern(
|
||||
name_pattern="_hj.*",
|
||||
domain_pattern="*",
|
||||
category="analytics",
|
||||
vendor="Hotjar",
|
||||
is_regex=True,
|
||||
),
|
||||
KnownPattern(
|
||||
name_pattern="^_pk_id\\..*",
|
||||
domain_pattern="*",
|
||||
category="analytics",
|
||||
vendor="Matomo",
|
||||
is_regex=True,
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
class TestClassifyCookie:
|
||||
def test_exact_match(self):
|
||||
result = classify_cookie("_ga", "example.com", PATTERNS)
|
||||
assert result.category == "analytics"
|
||||
assert result.vendor == "Google"
|
||||
assert result.match_source == "exact"
|
||||
|
||||
def test_wildcard_match(self):
|
||||
result = classify_cookie("_ga_ABC123", "example.com", PATTERNS)
|
||||
assert result.category == "analytics"
|
||||
assert result.match_source == "wildcard"
|
||||
|
||||
def test_regex_match(self):
|
||||
result = classify_cookie("_hjSession_123", "example.com", PATTERNS)
|
||||
assert result.category == "analytics"
|
||||
assert result.vendor == "Hotjar"
|
||||
assert result.match_source == "regex"
|
||||
|
||||
def test_regex_matomo(self):
|
||||
result = classify_cookie("_pk_id.1.abc1", "example.com", PATTERNS)
|
||||
assert result.category == "analytics"
|
||||
assert result.vendor == "Matomo"
|
||||
assert result.match_source == "regex"
|
||||
|
||||
def test_domain_specific_match(self):
|
||||
result = classify_cookie("_fbp", "sub.facebook.com", PATTERNS)
|
||||
assert result.category == "marketing"
|
||||
assert result.vendor == "Meta"
|
||||
|
||||
def test_domain_mismatch(self):
|
||||
result = classify_cookie("_fbp", "example.com", PATTERNS)
|
||||
assert result.category is None
|
||||
assert result.match_source == "unmatched"
|
||||
|
||||
def test_unmatched_cookie(self):
|
||||
result = classify_cookie("unknown_cookie", "example.com", PATTERNS)
|
||||
assert result.category is None
|
||||
assert result.match_source == "unmatched"
|
||||
|
||||
def test_necessary_cookie(self):
|
||||
result = classify_cookie("__cf_bm", "example.com", PATTERNS)
|
||||
assert result.category == "necessary"
|
||||
assert result.vendor == "Cloudflare"
|
||||
|
||||
def test_empty_patterns(self):
|
||||
result = classify_cookie("_ga", "example.com", [])
|
||||
assert result.category is None
|
||||
|
||||
def test_exact_takes_priority_over_wildcard(self):
|
||||
"""Exact match should come before wildcard in pattern list."""
|
||||
patterns = [
|
||||
KnownPattern(name_pattern="_ga", domain_pattern="*", category="analytics"),
|
||||
KnownPattern(name_pattern="_ga*", domain_pattern="*", category="marketing"),
|
||||
]
|
||||
result = classify_cookie("_ga", "example.com", patterns)
|
||||
assert result.category == "analytics"
|
||||
assert result.match_source == "exact"
|
||||
|
||||
|
||||
# ── ClassificationResult ─────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestClassificationResult:
|
||||
def test_defaults(self):
|
||||
r = ClassificationResult(category=None)
|
||||
assert r.vendor is None
|
||||
assert r.match_source == "unmatched"
|
||||
|
||||
def test_with_values(self):
|
||||
r = ClassificationResult(category="analytics", vendor="Google", match_source="exact")
|
||||
assert r.category == "analytics"
|
||||
assert r.vendor == "Google"
|
||||
Reference in New Issue
Block a user