ConsentOS — a privacy-first cookie consent management platform. Self-hosted, source-available alternative to OneTrust, Cookiebot, and CookieYes. Full standards coverage (IAB TCF v2.2, GPP v1, Google Consent Mode v2, GPC, Shopify Customer Privacy API), multi-tenant architecture with role-based access, configuration cascade (system → org → group → site → region), dark-pattern detection in the scanner, and a tamper-evident consent record audit trail. This is the initial public release. Prior development history is retained internally. See README.md for the feature list, architecture overview, and quick-start instructions. Licensed under the Elastic Licence 2.0 — self-host freely; do not resell as a managed service.
123 lines
3.8 KiB
Python
123 lines
3.8 KiB
Python
"""Tests for the scanner HTTP service."""
|
|
|
|
from unittest.mock import AsyncMock, patch
|
|
|
|
import pytest
|
|
from fastapi.testclient import TestClient
|
|
|
|
from src.worker import create_app
|
|
|
|
|
|
@pytest.fixture
|
|
def client():
|
|
"""Create a test client for the scanner app."""
|
|
app = create_app()
|
|
return TestClient(app)
|
|
|
|
|
|
def test_health_endpoint(client):
|
|
"""Health endpoint returns ok."""
|
|
resp = client.get("/health")
|
|
assert resp.status_code == 200
|
|
assert resp.json() == {"status": "ok"}
|
|
|
|
|
|
@patch("src.sitemap.discover_urls", new_callable=AsyncMock)
|
|
@patch("src.crawler.CookieCrawler.crawl_site", new_callable=AsyncMock)
|
|
def test_scan_endpoint_with_domain(mock_crawl, mock_discover, client):
|
|
"""POST /scan with just a domain discovers URLs and crawls."""
|
|
from src.crawler import CrawlResult, DiscoveredCookie, SiteCrawlResult
|
|
|
|
mock_discover.return_value = ["https://example.com/"]
|
|
mock_crawl.return_value = SiteCrawlResult(
|
|
domain="example.com",
|
|
pages=[
|
|
CrawlResult(
|
|
url="https://example.com/",
|
|
cookies=[
|
|
DiscoveredCookie(
|
|
name="_ga",
|
|
domain=".example.com",
|
|
storage_type="cookie",
|
|
page_url="https://example.com/",
|
|
value_length=30,
|
|
),
|
|
DiscoveredCookie(
|
|
name="session_id",
|
|
domain="example.com",
|
|
storage_type="cookie",
|
|
page_url="https://example.com/",
|
|
value_length=36,
|
|
http_only=True,
|
|
secure=True,
|
|
),
|
|
],
|
|
),
|
|
],
|
|
total_cookies_found=2,
|
|
)
|
|
|
|
resp = client.post("/scan", json={"domain": "example.com", "max_pages": 5})
|
|
assert resp.status_code == 200
|
|
data = resp.json()
|
|
|
|
assert data["domain"] == "example.com"
|
|
assert data["pages_crawled"] == 1
|
|
assert data["total_cookies"] == 2
|
|
assert len(data["cookies"]) == 2
|
|
assert data["cookies"][0]["name"] == "_ga"
|
|
assert data["cookies"][1]["name"] == "session_id"
|
|
assert data["cookies"][1]["secure"] is True
|
|
|
|
|
|
@patch("src.crawler.CookieCrawler.crawl_site", new_callable=AsyncMock)
|
|
def test_scan_endpoint_with_urls(mock_crawl, client):
|
|
"""POST /scan with explicit URLs skips URL discovery."""
|
|
from src.crawler import CrawlResult, SiteCrawlResult
|
|
|
|
mock_crawl.return_value = SiteCrawlResult(
|
|
domain="example.com",
|
|
pages=[CrawlResult(url="https://example.com/about", cookies=[])],
|
|
total_cookies_found=0,
|
|
)
|
|
|
|
resp = client.post(
|
|
"/scan",
|
|
json={
|
|
"domain": "example.com",
|
|
"urls": ["https://example.com/about"],
|
|
"max_pages": 1,
|
|
},
|
|
)
|
|
assert resp.status_code == 200
|
|
data = resp.json()
|
|
assert data["pages_crawled"] == 1
|
|
assert data["cookies"] == []
|
|
|
|
|
|
@patch("src.sitemap.discover_urls", new_callable=AsyncMock)
|
|
@patch("src.crawler.CookieCrawler.crawl_site", new_callable=AsyncMock)
|
|
def test_scan_endpoint_with_errors(mock_crawl, mock_discover, client):
|
|
"""Scan results include page errors."""
|
|
from src.crawler import CrawlResult, SiteCrawlResult
|
|
|
|
mock_discover.return_value = ["https://example.com/"]
|
|
mock_crawl.return_value = SiteCrawlResult(
|
|
domain="example.com",
|
|
pages=[
|
|
CrawlResult(url="https://example.com/", cookies=[], error="Timeout"),
|
|
],
|
|
total_cookies_found=0,
|
|
)
|
|
|
|
resp = client.post("/scan", json={"domain": "example.com"})
|
|
assert resp.status_code == 200
|
|
data = resp.json()
|
|
assert data["errors"] == ["Timeout"]
|
|
|
|
|
|
def test_scan_request_validation(client):
|
|
"""Missing domain returns 422."""
|
|
resp = client.post("/scan", json={})
|
|
assert resp.status_code == 422
|